From 78327acd4cdc4a1601af718b781eece577b6b7d4 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:35 +0800 Subject: [PATCH 0001/2290] ext4: regenerate buddy after block freeing failed if under fc replay [ Upstream commit c9b528c35795b711331ed36dc3dbee90d5812d4e ] This mostly reverts commit 6bd97bf273bd ("ext4: remove redundant mb_regenerate_buddy()") and reintroduces mb_regenerate_buddy(). Based on code in mb_free_blocks(), fast commit replay can end up marking as free blocks that are already marked as such. This causes corruption of the buddy bitmap so we need to regenerate it in that case. Reported-by: Jan Kara Fixes: 6bd97bf273bd ("ext4: remove redundant mb_regenerate_buddy()") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c1515daf1def1..40903c172a34f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1118,6 +1118,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, atomic64_add(period, &sbi->s_mb_generation_time); } +static void mb_regenerate_buddy(struct ext4_buddy *e4b) +{ + int count; + int order = 1; + void *buddy; + + while ((buddy = mb_find_buddy(e4b, order++, &count))) + mb_set_bits(buddy, 0, count); + + e4b->bd_info->bb_fragments = 0; + memset(e4b->bd_info->bb_counters, 0, + sizeof(*e4b->bd_info->bb_counters) * + (e4b->bd_sb->s_blocksize_bits + 2)); + + ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, + e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); +} + /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1796,6 +1814,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, ext4_mark_group_bitmap_corrupted( sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); + } else { + mb_regenerate_buddy(e4b); } goto done; } -- GitLab From a1d7ca71ba1a91eeec2f34e3d3825a8847740a8c Mon Sep 17 00:00:00 2001 From: Guanhua Gao Date: Thu, 18 Jan 2024 11:29:16 -0500 Subject: [PATCH 0002/2290] dmaengine: fsl-dpaa2-qdma: Fix the size of dma pools [ Upstream commit b73e43dcd7a8be26880ef8ff336053b29e79dbc5 ] In case of long format of qDMA command descriptor, there are one frame descriptor, three entries in the frame list and two data entries. So the size of dma_pool_create for these three fields should be the same with the total size of entries respectively, or the contents may be overwritten by the next allocated descriptor. Fixes: 7fdf9b05c73b ("dmaengine: fsl-dpaa2-qdma: Add NXP dpaa2 qDMA controller driver for Layerscape SoCs") Signed-off-by: Guanhua Gao Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240118162917.2951450-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c index 8dd40d00a672a..6b829d347417a 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -38,15 +38,17 @@ static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan) if (!dpaa2_chan->fd_pool) goto err; - dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev, - sizeof(struct dpaa2_fl_entry), - sizeof(struct dpaa2_fl_entry), 0); + dpaa2_chan->fl_pool = + dma_pool_create("fl_pool", dev, + sizeof(struct dpaa2_fl_entry) * 3, + sizeof(struct dpaa2_fl_entry), 0); + if (!dpaa2_chan->fl_pool) goto err_fd; dpaa2_chan->sdd_pool = dma_pool_create("sdd_pool", dev, - sizeof(struct dpaa2_qdma_sd_d), + sizeof(struct dpaa2_qdma_sd_d) * 2, sizeof(struct dpaa2_qdma_sd_d), 0); if (!dpaa2_chan->sdd_pool) goto err_fl; -- GitLab From 908939b8e81a9ceaa34af6eae3d28b35e6697ee2 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Wed, 3 Jan 2024 14:37:55 +0530 Subject: [PATCH 0003/2290] dmaengine: ti: k3-udma: Report short packet errors [ Upstream commit bc9847c9ba134cfe3398011e343dcf6588c1c902 ] Propagate the TR response status to the device using BCDMA split-channels. For example CSI-RX driver should be able to check if a frame was not transferred completely (short packet) and needs to be discarded. Fixes: 25dcb5dd7b7c ("dmaengine: ti: New driver for K3 UDMA") Signed-off-by: Jai Luthra Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20240103-tr_resp_err-v1-1-2fdf6d48ab92@ti.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/ti/k3-udma.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b86b809eb1f7e..82e7acfda6ed0 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -3963,6 +3963,7 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, { struct udma_chan *uc = to_udma_chan(&vc->chan); struct udma_desc *d; + u8 status; if (!vd) return; @@ -3972,12 +3973,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, if (d->metadata_size) udma_fetch_epib(uc, d); - /* Provide residue information for the client */ if (result) { void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx); if (cppi5_desc_get_type(desc_vaddr) == CPPI5_INFO0_DESC_TYPE_VAL_HOST) { + /* Provide residue information for the client */ result->residue = d->residue - cppi5_hdesc_get_pktlen(desc_vaddr); if (result->residue) @@ -3986,7 +3987,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, result->result = DMA_TRANS_NOERROR; } else { result->residue = 0; - result->result = DMA_TRANS_NOERROR; + /* Propagate TR Response errors to the client */ + status = d->hwdesc[0].tr_resp_base->status; + if (status) + result->result = DMA_TRANS_ABORTED; + else + result->result = DMA_TRANS_NOERROR; } } } -- GitLab From 13535ae766550b9d5e1fd09134a6085c36035d40 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2024 11:02:03 +0100 Subject: [PATCH 0004/2290] dmaengine: fsl-qdma: Fix a memory leak related to the status queue DMA [ Upstream commit 968bc1d7203d384e72afe34124a1801b7af76514 ] This dma_alloc_coherent() is undone in the remove function, but not in the error handling path of fsl_qdma_probe(). Switch to the managed version to fix the issue in the probe and simplify the remove function. Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/a0ef5d0f5a47381617ef339df776ddc68ce48173.1704621515.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/fsl-qdma.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 045ead46ec8fc..5cc887acb05b2 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -563,11 +563,11 @@ static struct fsl_qdma_queue /* * Buffer for queue command */ - status_head->cq = dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - status_size, - &status_head->bus_addr, - GFP_KERNEL); + status_head->cq = dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + status_size, + &status_head->bus_addr, + GFP_KERNEL); if (!status_head->cq) { devm_kfree(&pdev->dev, status_head); return NULL; @@ -1272,8 +1272,6 @@ static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev) static int fsl_qdma_remove(struct platform_device *pdev) { - int i; - struct fsl_qdma_queue *status; struct device_node *np = pdev->dev.of_node; struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev); @@ -1282,11 +1280,6 @@ static int fsl_qdma_remove(struct platform_device *pdev) of_dma_controller_free(np); dma_async_device_unregister(&fsl_qdma->dma_dev); - for (i = 0; i < fsl_qdma->block_number; i++) { - status = fsl_qdma->status[i]; - dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) * - status->n_cq, status->cq, status->bus_addr); - } return 0; } -- GitLab From 25ab4d72eb7cbfa0f3d97a139a9b2bfcaa72dd59 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2024 11:02:04 +0100 Subject: [PATCH 0005/2290] dmaengine: fsl-qdma: Fix a memory leak related to the queue command DMA [ Upstream commit 3aa58cb51318e329d203857f7a191678e60bb714 ] This dma_alloc_coherent() is undone neither in the remove function, nor in the error handling path of fsl_qdma_probe(). Switch to the managed version to fix both issues. Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/7f66aa14f59d32b13672dde28602b47deb294e1f.1704621515.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/fsl-qdma.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 5cc887acb05b2..69385f32e2756 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -514,11 +514,11 @@ static struct fsl_qdma_queue queue_temp = queue_head + i + (j * queue_num); queue_temp->cq = - dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - queue_size[i], - &queue_temp->bus_addr, - GFP_KERNEL); + dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + queue_size[i], + &queue_temp->bus_addr, + GFP_KERNEL); if (!queue_temp->cq) return NULL; queue_temp->block_base = fsl_qdma->block_base + -- GitLab From ed3bb52a05d3975b77b143576ef109eb595f080b Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 5 Jan 2024 18:37:03 +0900 Subject: [PATCH 0006/2290] phy: renesas: rcar-gen3-usb2: Fix returning wrong error code [ Upstream commit 249abaf3bf0dd07f5ddebbb2fe2e8f4d675f074e ] Even if device_create_file() returns error code, rcar_gen3_phy_usb2_probe() will return zero because the "ret" is variable shadowing. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202312161021.gOLDl48K-lkp@intel.com/ Fixes: 441a681b8843 ("phy: rcar-gen3-usb2: fix implementation for runtime PM") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240105093703.3359949-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 9de617ca9daa2..7e61c6b278a74 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -675,8 +675,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { - int ret; - channel->is_otg_channel = true; channel->uses_otg_pins = !of_property_read_bool(dev->of_node, "renesas,no-otg-pins"); @@ -740,8 +738,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) ret = PTR_ERR(provider); goto error; } else if (channel->is_otg_channel) { - int ret; - ret = device_create_file(dev, &dev_attr_role); if (ret < 0) goto error; -- GitLab From 296fb308f4f822bdee1d07fd67a86d1003d2922b Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 23 Jan 2024 12:28:41 -0500 Subject: [PATCH 0007/2290] dmaengine: fix is_slave_direction() return false when DMA_DEV_TO_DEV [ Upstream commit a22fe1d6dec7e98535b97249fdc95c2be79120bb ] is_slave_direction() should return true when direction is DMA_DEV_TO_DEV. Fixes: 49920bc66984 ("dmaengine: add new enum dma_transfer_direction") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240123172842.3764529-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- include/linux/dmaengine.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c923f4e60f240..3576c6e89fea4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -954,7 +954,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan, static inline bool is_slave_direction(enum dma_transfer_direction direction) { - return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM); + return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) || + (direction == DMA_DEV_TO_DEV); } static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( -- GitLab From 0430bfcd46657d9116a26cd377f112cbc40826a4 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 28 Jan 2024 14:05:54 +0200 Subject: [PATCH 0008/2290] phy: ti: phy-omap-usb2: Fix NULL pointer dereference for SRP [ Upstream commit 7104ba0f1958adb250319e68a15eff89ec4fd36d ] If the external phy working together with phy-omap-usb2 does not implement send_srp(), we may still attempt to call it. This can happen on an idle Ethernet gadget triggering a wakeup for example: configfs-gadget.g1 gadget.0: ECM Suspend configfs-gadget.g1 gadget.0: Port suspended. Triggering wakeup ... Unable to handle kernel NULL pointer dereference at virtual address 00000000 when execute ... PC is at 0x0 LR is at musb_gadget_wakeup+0x1d4/0x254 [musb_hdrc] ... musb_gadget_wakeup [musb_hdrc] from usb_gadget_wakeup+0x1c/0x3c [udc_core] usb_gadget_wakeup [udc_core] from eth_start_xmit+0x3b0/0x3d4 [u_ether] eth_start_xmit [u_ether] from dev_hard_start_xmit+0x94/0x24c dev_hard_start_xmit from sch_direct_xmit+0x104/0x2e4 sch_direct_xmit from __dev_queue_xmit+0x334/0xd88 __dev_queue_xmit from arp_solicit+0xf0/0x268 arp_solicit from neigh_probe+0x54/0x7c neigh_probe from __neigh_event_send+0x22c/0x47c __neigh_event_send from neigh_resolve_output+0x14c/0x1c0 neigh_resolve_output from ip_finish_output2+0x1c8/0x628 ip_finish_output2 from ip_send_skb+0x40/0xd8 ip_send_skb from udp_send_skb+0x124/0x340 udp_send_skb from udp_sendmsg+0x780/0x984 udp_sendmsg from __sys_sendto+0xd8/0x158 __sys_sendto from ret_fast_syscall+0x0/0x58 Let's fix the issue by checking for send_srp() and set_vbus() before calling them. For USB peripheral only cases these both could be NULL. Fixes: 657b306a7bdf ("usb: phy: add a new driver for omap usb2 phy") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20240128120556.8848-1-tony@atomide.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/ti/phy-omap-usb2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index 31a775877f6e3..63c45809943ff 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -116,7 +116,7 @@ static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->set_vbus) return -ENODEV; return phy->comparator->set_vbus(phy->comparator, enabled); @@ -126,7 +126,7 @@ static int omap_usb_start_srp(struct usb_otg *otg) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->start_srp) return -ENODEV; return phy->comparator->start_srp(phy->comparator); -- GitLab From cbc53148cc0946b72d62a3c53870cb22ce4ec284 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 1 Feb 2024 11:15:29 +0000 Subject: [PATCH 0009/2290] cifs: failure to add channel on iface should bump up weight [ Upstream commit 6aac002bcfd554aff6d3ebb55e1660d078d70ab0 ] After the interface selection policy change to do a weighted round robin, each iface maintains a weight_fulfilled. When the weight_fulfilled reaches the total weight for the iface, we know that the weights can be reset and ifaces can be allocated from scratch again. During channel allocation failures on a particular channel, weight_fulfilled is not incremented. If a few interfaces are inactive, we could end up in a situation where the active interfaces are all allocated for the total_weight, and inactive ones are all that remain. This can cause a situation where no more channels can be allocated further. This change fixes it by increasing weight_fulfilled, even when channel allocation failure happens. This could mean that if there are temporary failures in channel allocation, the iface weights may not strictly be adhered to. But that's still okay. Fixes: a6d8fb54a515 ("cifs: distribute channels across interfaces based on speed") Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/sess.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 634035bcb9347..b8e14bcd2c68d 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -248,6 +248,8 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) &iface->sockaddr, rc); kref_put(&iface->refcount, release_iface); + /* failure to add chan should increase weight */ + iface->weight_fulfilled++; continue; } -- GitLab From d2b7e247f3688cb4b69368ad93de9af734f4ecdc Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 10 Jan 2024 12:18:51 -0800 Subject: [PATCH 0010/2290] drm/msms/dp: fixed link clock divider bits be over written in BPC unknown case [ Upstream commit 77e8aad5519e04f6c1e132aaec1c5f8faf41844f ] Since the value of DP_TEST_BIT_DEPTH_8 is already left shifted, in the BPC unknown case, the additional shift causes spill over to the other bits of the [DP_CONFIGURATION_CTRL] register. Fix this by changing the return value of dp_link_get_test_bits_depth() in the BPC unknown case to (DP_TEST_BIT_DEPTH_8 >> DP_TEST_BIT_DEPTH_SHIFT). Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Kuogee Hsieh Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/573989/ Link: https://lore.kernel.org/r/1704917931-30133-1-git-send-email-quic_khsieh@quicinc.com [quic_abhinavk@quicinc.com: fix minor checkpatch warning to align with opening braces] Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 5 ----- drivers/gpu/drm/msm/dp/dp_link.c | 10 +++++++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 103eef9f059a0..b20701893e5b3 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -133,11 +133,6 @@ static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl) tbd = dp_link_get_test_bits_depth(ctrl->link, ctrl->panel->dp_mode.bpp); - if (tbd == DP_TEST_BIT_DEPTH_UNKNOWN) { - pr_debug("BIT_DEPTH not set. Configure default\n"); - tbd = DP_TEST_BIT_DEPTH_8; - } - config |= tbd << DP_CONFIGURATION_CTRL_BPC_SHIFT; /* Num of Lanes */ diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index cb66d1126ea96..3c7884c85f61e 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -1172,6 +1172,9 @@ void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link) u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) { u32 tbd; + struct dp_link_private *link; + + link = container_of(dp_link, struct dp_link_private, dp_link); /* * Few simplistic rules and assumptions made here: @@ -1189,12 +1192,13 @@ u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) tbd = DP_TEST_BIT_DEPTH_10; break; default: - tbd = DP_TEST_BIT_DEPTH_UNKNOWN; + drm_dbg_dp(link->drm_dev, "bpp=%d not supported, use bpc=8\n", + bpp); + tbd = DP_TEST_BIT_DEPTH_8; break; } - if (tbd != DP_TEST_BIT_DEPTH_UNKNOWN) - tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); + tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); return tbd; } -- GitLab From 42939a1ea6d428582a0762d79e6b51f5bd12d4b3 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 17 Jan 2024 13:13:30 -0800 Subject: [PATCH 0011/2290] drm/msm/dp: return correct Colorimetry for DP_TEST_DYNAMIC_RANGE_CEA case [ Upstream commit fcccdafd91f8bdde568b86ff70848cf83f029add ] MSA MISC0 bit 1 to 7 contains Colorimetry Indicator Field. dp_link_get_colorimetry_config() returns wrong colorimetry value in the DP_TEST_DYNAMIC_RANGE_CEA case in the current implementation. Hence fix this problem by having dp_link_get_colorimetry_config() return defined CEA RGB colorimetry value in the case of DP_TEST_DYNAMIC_RANGE_CEA. Changes in V2: -- drop retrieving colorimetry from colorspace -- drop dr = link->dp_link.test_video.test_dyn_range assignment Changes in V3: -- move defined MISCr0a Colorimetry vale to dp_reg.h -- rewording commit title -- rewording commit text to more precise describe this patch Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/574888/ Link: https://lore.kernel.org/r/1705526010-597-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_link.c | 12 +++++++----- drivers/gpu/drm/msm/dp/dp_reg.h | 3 +++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index 3c7884c85f61e..ceb382fa56d5b 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -7,6 +7,7 @@ #include +#include "dp_reg.h" #include "dp_link.h" #include "dp_panel.h" @@ -1075,7 +1076,7 @@ int dp_link_process_request(struct dp_link *dp_link) int dp_link_get_colorimetry_config(struct dp_link *dp_link) { - u32 cc; + u32 cc = DP_MISC0_COLORIMERY_CFG_LEGACY_RGB; struct dp_link_private *link; if (!dp_link) { @@ -1089,10 +1090,11 @@ int dp_link_get_colorimetry_config(struct dp_link *dp_link) * Unless a video pattern CTS test is ongoing, use RGB_VESA * Only RGB_VESA and RGB_CEA supported for now */ - if (dp_link_is_video_pattern_requested(link)) - cc = link->dp_link.test_video.test_dyn_range; - else - cc = DP_TEST_DYNAMIC_RANGE_VESA; + if (dp_link_is_video_pattern_requested(link)) { + if (link->dp_link.test_video.test_dyn_range & + DP_TEST_DYNAMIC_RANGE_CEA) + cc = DP_MISC0_COLORIMERY_CFG_CEA_RGB; + } return cc; } diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h index 268602803d9a3..176a503ece9c0 100644 --- a/drivers/gpu/drm/msm/dp/dp_reg.h +++ b/drivers/gpu/drm/msm/dp/dp_reg.h @@ -129,6 +129,9 @@ #define DP_MISC0_COLORIMETRY_CFG_SHIFT (0x00000001) #define DP_MISC0_TEST_BITS_DEPTH_SHIFT (0x00000005) +#define DP_MISC0_COLORIMERY_CFG_LEGACY_RGB (0) +#define DP_MISC0_COLORIMERY_CFG_CEA_RGB (0x04) + #define REG_DP_VALID_BOUNDARY (0x00000030) #define REG_DP_VALID_BOUNDARY_2 (0x00000034) -- GitLab From fb8bfc6ea3cd8c5ac3d35711d064e2f6646aec17 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Wed, 17 Jan 2024 11:41:09 -0800 Subject: [PATCH 0012/2290] drm/msm/dpu: check for valid hw_pp in dpu_encoder_helper_phys_cleanup [ Upstream commit 7f3d03c48b1eb6bc45ab20ca98b8b11be25f9f52 ] The commit 8b45a26f2ba9 ("drm/msm/dpu: reserve cdm blocks for writeback in case of YUV output") introduced a smatch warning about another conditional block in dpu_encoder_helper_phys_cleanup() which had assumed hw_pp will always be valid which may not necessarily be true. Lets fix the other conditional block by making sure hw_pp is valid before dereferencing it. Reported-by: Dan Carpenter Fixes: ae4d721ce100 ("drm/msm/dpu: add an API to reset the encoder related hw blocks") Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/574878/ Link: https://lore.kernel.org/r/20240117194109.21609-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 38d38f923df64..25245ef386db6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2053,7 +2053,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) } /* reset the merge 3D HW block */ - if (phys_enc->hw_pp->merge_3d) { + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, BLEND_3D_NONE); if (phys_enc->hw_ctl->ops.update_pending_flush_merge_3d) @@ -2069,7 +2069,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) if (phys_enc->hw_wb) intf_cfg.wb = phys_enc->hw_wb->idx; - if (phys_enc->hw_pp->merge_3d) + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx; if (ctl->ops.reset_intf_cfg) -- GitLab From e42ff0844fe418c7d03a14f9f90e1b91ba119591 Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Wed, 31 Jan 2024 10:08:28 +0800 Subject: [PATCH 0013/2290] net: stmmac: xgmac: fix handling of DPP safety error for DMA channels [ Upstream commit 46eba193d04f8bd717e525eb4110f3c46c12aec3 ] Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core") checks and reports safety errors, but leaves the Data Path Parity Errors for each channel in DMA unhandled at all, lead to a storm of interrupt. Fix it by checking and clearing the DMA_DPP_Interrupt_Status register. Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core") Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 + .../net/ethernet/stmicro/stmmac/dwxgmac2.h | 3 + .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 57 ++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 54bb072aeb2d3..c11d626856247 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -209,6 +209,7 @@ struct stmmac_safety_stats { unsigned long mac_errors[32]; unsigned long mtl_errors[32]; unsigned long dma_errors[32]; + unsigned long dma_dpp_errors[32]; }; /* Number of fields in Safety Stats */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 880a75bf2eb1f..e67a880ebf645 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -282,6 +282,8 @@ #define XGMAC_RXCEIE BIT(4) #define XGMAC_TXCEIE BIT(0) #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc +#define XGMAC_MTL_DPP_CONTROL 0x000010e0 +#define XGMAC_DDPP_DISABLE BIT(0) #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x))) #define XGMAC_TQS GENMASK(25, 16) #define XGMAC_TQS_SHIFT 16 @@ -364,6 +366,7 @@ #define XGMAC_DCEIE BIT(1) #define XGMAC_TCEIE BIT(0) #define XGMAC_DMA_ECC_INT_STATUS 0x0000306c +#define XGMAC_DMA_DPP_INT_STATUS 0x00003074 #define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x))) #define XGMAC_SPH BIT(24) #define XGMAC_PBLx8 BIT(16) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index c2181c277291b..c24cd019460a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -789,6 +789,43 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= { { false, "UNKNOWN", "Unknown Error" }, /* 31 */ }; +static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error"; +static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error"; +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = { + { true, "TDPES0", dpp_tx_err }, + { true, "TDPES1", dpp_tx_err }, + { true, "TDPES2", dpp_tx_err }, + { true, "TDPES3", dpp_tx_err }, + { true, "TDPES4", dpp_tx_err }, + { true, "TDPES5", dpp_tx_err }, + { true, "TDPES6", dpp_tx_err }, + { true, "TDPES7", dpp_tx_err }, + { true, "TDPES8", dpp_tx_err }, + { true, "TDPES9", dpp_tx_err }, + { true, "TDPES10", dpp_tx_err }, + { true, "TDPES11", dpp_tx_err }, + { true, "TDPES12", dpp_tx_err }, + { true, "TDPES13", dpp_tx_err }, + { true, "TDPES14", dpp_tx_err }, + { true, "TDPES15", dpp_tx_err }, + { true, "RDPES0", dpp_rx_err }, + { true, "RDPES1", dpp_rx_err }, + { true, "RDPES2", dpp_rx_err }, + { true, "RDPES3", dpp_rx_err }, + { true, "RDPES4", dpp_rx_err }, + { true, "RDPES5", dpp_rx_err }, + { true, "RDPES6", dpp_rx_err }, + { true, "RDPES7", dpp_rx_err }, + { true, "RDPES8", dpp_rx_err }, + { true, "RDPES9", dpp_rx_err }, + { true, "RDPES10", dpp_rx_err }, + { true, "RDPES11", dpp_rx_err }, + { true, "RDPES12", dpp_rx_err }, + { true, "RDPES13", dpp_rx_err }, + { true, "RDPES14", dpp_rx_err }, + { true, "RDPES15", dpp_rx_err }, +}; + static void dwxgmac3_handle_dma_err(struct net_device *ndev, void __iomem *ioaddr, bool correctable, struct stmmac_safety_stats *stats) @@ -800,6 +837,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev, dwxgmac3_log_error(ndev, value, correctable, "DMA", dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats); + + value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS); + writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS); + + dwxgmac3_log_error(ndev, value, false, "DMA_DPP", + dwxgmac3_dma_dpp_errors, + STAT_OFF(dma_dpp_errors), stats); } static int @@ -838,6 +882,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp, value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */ writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL); + /* 5. Enable Data Path Parity Protection */ + value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL); + /* already enabled by default, explicit enable it again */ + value &= ~XGMAC_DDPP_DISABLE; + writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL); + return 0; } @@ -871,7 +921,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev, ret |= !corr; } - err = dma & (XGMAC_DEUIS | XGMAC_DECIS); + /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in + * DMA_Safety_Interrupt_Status, so we handle DMA Data Path + * Parity Errors here + */ + err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS); corr = dma & XGMAC_DECIS; if (err) { dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats); @@ -887,6 +941,7 @@ static const struct dwxgmac3_error { { dwxgmac3_mac_errors }, { dwxgmac3_mtl_errors }, { dwxgmac3_dma_errors }, + { dwxgmac3_dma_dpp_errors }, }; static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats, -- GitLab From bcabbf8ab5c61c67608e984c5368387c3b8d9b94 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:48:56 +0100 Subject: [PATCH 0014/2290] wifi: mac80211: fix waiting for beacons logic [ Upstream commit a0b4f2291319c5d47ecb196b90400814fdcfd126 ] This should be waiting if we don't have a beacon yet, but somehow I managed to invert the logic. Fix that. Fixes: 74e1309acedc ("wifi: mac80211: mlme: look up beacon elems only if needed") Link: https://msgid.link/20240131164856.922701229546.I239b379e7cee04608e73c016b737a5245e5b23dd@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c07645c999f9a..c6f0da028a2a4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7221,8 +7221,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); beacon_ies = rcu_dereference(req->bss->beacon_ies); - - if (beacon_ies) { + if (!beacon_ies) { /* * Wait up to one beacon interval ... * should this be more if we miss one? -- GitLab From 0193e0660cc6689c794794b471492923cfd7bfbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Feb 2024 17:53:24 +0000 Subject: [PATCH 0015/2290] netdevsim: avoid potential loop in nsim_dev_trap_report_work() [ Upstream commit ba5e1272142d051dcc57ca1d3225ad8a089f9858 ] Many syzbot reports include the following trace [1] If nsim_dev_trap_report_work() can not grab the mutex, it should rearm itself at least one jiffie later. [1] Sending NMI from CPU 1 to CPUs 0: NMI backtrace for cpu 0 CPU: 0 PID: 32383 Comm: kworker/0:2 Not tainted 6.8.0-rc2-syzkaller-00031-g861c0981648f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Workqueue: events nsim_dev_trap_report_work RIP: 0010:bytes_is_nonzero mm/kasan/generic.c:89 [inline] RIP: 0010:memory_is_nonzero mm/kasan/generic.c:104 [inline] RIP: 0010:memory_is_poisoned_n mm/kasan/generic.c:129 [inline] RIP: 0010:memory_is_poisoned mm/kasan/generic.c:161 [inline] RIP: 0010:check_region_inline mm/kasan/generic.c:180 [inline] RIP: 0010:kasan_check_range+0x101/0x190 mm/kasan/generic.c:189 Code: 07 49 39 d1 75 0a 45 3a 11 b8 01 00 00 00 7c 0b 44 89 c2 e8 21 ed ff ff 83 f0 01 5b 5d 41 5c c3 48 85 d2 74 4f 48 01 ea eb 09 <48> 83 c0 01 48 39 d0 74 41 80 38 00 74 f2 eb b6 41 bc 08 00 00 00 RSP: 0018:ffffc90012dcf998 EFLAGS: 00000046 RAX: fffffbfff258af1e RBX: fffffbfff258af1f RCX: ffffffff8168eda3 RDX: fffffbfff258af1f RSI: 0000000000000004 RDI: ffffffff92c578f0 RBP: fffffbfff258af1e R08: 0000000000000000 R09: fffffbfff258af1e R10: ffffffff92c578f3 R11: ffffffff8acbcbc0 R12: 0000000000000002 R13: ffff88806db38400 R14: 1ffff920025b9f42 R15: ffffffff92c578e8 FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000c00994e078 CR3: 000000002c250000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: instrument_atomic_read include/linux/instrumented.h:68 [inline] atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline] queued_spin_is_locked include/asm-generic/qspinlock.h:57 [inline] debug_spin_unlock kernel/locking/spinlock_debug.c:101 [inline] do_raw_spin_unlock+0x53/0x230 kernel/locking/spinlock_debug.c:141 __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:150 [inline] _raw_spin_unlock_irqrestore+0x22/0x70 kernel/locking/spinlock.c:194 debug_object_activate+0x349/0x540 lib/debugobjects.c:726 debug_work_activate kernel/workqueue.c:578 [inline] insert_work+0x30/0x230 kernel/workqueue.c:1650 __queue_work+0x62e/0x11d0 kernel/workqueue.c:1802 __queue_delayed_work+0x1bf/0x270 kernel/workqueue.c:1953 queue_delayed_work_on+0x106/0x130 kernel/workqueue.c:1989 queue_delayed_work include/linux/workqueue.h:563 [inline] schedule_delayed_work include/linux/workqueue.h:677 [inline] nsim_dev_trap_report_work+0x9c0/0xc80 drivers/net/netdevsim/dev.c:842 process_one_work+0x886/0x15d0 kernel/workqueue.c:2633 process_scheduled_works kernel/workqueue.c:2706 [inline] worker_thread+0x8b9/0x1290 kernel/workqueue.c:2787 kthread+0x2c6/0x3a0 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Fixes: 012ec02ae441 ("netdevsim: convert driver to use unlocked devlink API during init/fini") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201175324.3752746-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/netdevsim/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index c3fbdd6b68baf..f3fa4bd121169 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -835,14 +835,14 @@ static void nsim_dev_trap_report_work(struct work_struct *work) trap_report_dw.work); nsim_dev = nsim_trap_data->nsim_dev; - /* For each running port and enabled packet trap, generate a UDP - * packet with a random 5-tuple and report it. - */ if (!devl_trylock(priv_to_devlink(nsim_dev))) { - schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 0); + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 1); return; } + /* For each running port and enabled packet trap, generate a UDP + * packet with a random 5-tuple and report it. + */ list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { if (!netif_running(nsim_dev_port->ns->netdev)) continue; -- GitLab From 466ceebe48cbba3f4506f165fca7111f9eb8bb12 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 1 Feb 2024 10:47:51 +0100 Subject: [PATCH 0016/2290] net: atlantic: Fix DMA mapping for PTP hwts ring [ Upstream commit 2e7d3b67630dfd8f178c41fa2217aa00e79a5887 ] Function aq_ring_hwts_rx_alloc() maps extra AQ_CFG_RXDS_DEF bytes for PTP HWTS ring but then generic aq_ring_free() does not take this into account. Create and use a specific function to free HWTS ring to fix this issue. Trace: [ 215.351607] ------------[ cut here ]------------ [ 215.351612] DMA-API: atlantic 0000:4b:00.0: device driver frees DMA memory with different size [device address=0x00000000fbdd0000] [map size=34816 bytes] [unmap size=32768 bytes] [ 215.351635] WARNING: CPU: 33 PID: 10759 at kernel/dma/debug.c:988 check_unmap+0xa6f/0x2360 ... [ 215.581176] Call Trace: [ 215.583632] [ 215.585745] ? show_trace_log_lvl+0x1c4/0x2df [ 215.590114] ? show_trace_log_lvl+0x1c4/0x2df [ 215.594497] ? debug_dma_free_coherent+0x196/0x210 [ 215.599305] ? check_unmap+0xa6f/0x2360 [ 215.603147] ? __warn+0xca/0x1d0 [ 215.606391] ? check_unmap+0xa6f/0x2360 [ 215.610237] ? report_bug+0x1ef/0x370 [ 215.613921] ? handle_bug+0x3c/0x70 [ 215.617423] ? exc_invalid_op+0x14/0x50 [ 215.621269] ? asm_exc_invalid_op+0x16/0x20 [ 215.625480] ? check_unmap+0xa6f/0x2360 [ 215.629331] ? mark_lock.part.0+0xca/0xa40 [ 215.633445] debug_dma_free_coherent+0x196/0x210 [ 215.638079] ? __pfx_debug_dma_free_coherent+0x10/0x10 [ 215.643242] ? slab_free_freelist_hook+0x11d/0x1d0 [ 215.648060] dma_free_attrs+0x6d/0x130 [ 215.651834] aq_ring_free+0x193/0x290 [atlantic] [ 215.656487] aq_ptp_ring_free+0x67/0x110 [atlantic] ... [ 216.127540] ---[ end trace 6467e5964dd2640b ]--- [ 216.132160] DMA-API: Mapped at: [ 216.132162] debug_dma_alloc_coherent+0x66/0x2f0 [ 216.132165] dma_alloc_attrs+0xf5/0x1b0 [ 216.132168] aq_ring_hwts_rx_alloc+0x150/0x1f0 [atlantic] [ 216.132193] aq_ptp_ring_alloc+0x1bb/0x540 [atlantic] [ 216.132213] aq_nic_init+0x4a1/0x760 [atlantic] Fixes: 94ad94558b0f ("net: aquantia: add PTP rings infrastructure") Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201094752.883026-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_ptp.c | 4 ++-- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 +++++++++++++ drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index abd4832e4ed21..5acb3e16b5677 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -993,7 +993,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) return 0; err_exit_hwts_rx: - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); err_exit_ptp_rx: aq_ring_free(&aq_ptp->ptp_rx); err_exit_ptp_tx: @@ -1011,7 +1011,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic) aq_ring_free(&aq_ptp->ptp_tx); aq_ring_free(&aq_ptp->ptp_rx); - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); aq_ptp_skb_ring_release(&aq_ptp->skb_ring); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 9c314fe14ab62..0eaaba3a18ee0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -919,6 +919,19 @@ void aq_ring_free(struct aq_ring_s *self) } } +void aq_ring_hwts_rx_free(struct aq_ring_s *self) +{ + if (!self) + return; + + if (self->dx_ring) { + dma_free_coherent(aq_nic_get_dev(self->aq_nic), + self->size * self->dx_size + AQ_CFG_RXDS_DEF, + self->dx_ring, self->dx_ring_pa); + self->dx_ring = NULL; + } +} + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) { unsigned int count; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 52847310740a2..d627ace850ff5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -210,6 +210,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self); int aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic, unsigned int idx, unsigned int size, unsigned int dx_size); +void aq_ring_hwts_rx_free(struct aq_ring_s *self); void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic); unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data); -- GitLab From 7f484179c53a75944427e164035398c319a29c77 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 1 Feb 2024 19:42:38 +0100 Subject: [PATCH 0017/2290] selftests: net: cut more slack for gro fwd tests. [ Upstream commit cb9f4a30fb85e1f4f149ada595a67899adb3db19 ] The udpgro_fwd.sh self-tests are somewhat unstable. There are a few timing constraints the we struggle to meet on very slow environments. Instead of skipping the whole tests in such envs, increase the test resilience WRT very slow hosts: increase the inter-packets timeouts, avoid resetting the counters every second and finally disable reduce the background traffic noise. Tested with: for I in $(seq 1 100); do ./tools/testing/selftests/kselftest_install/run_kselftest.sh \ -t net:udpgro_fwd.sh || exit -1 done in a slow environment. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Link: https://lore.kernel.org/r/f4b6b11064a0d39182a9ae6a853abae3e9b4426a.1706812005.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/udpgro_fwd.sh | 14 ++++++++++++-- tools/testing/selftests/net/udpgso_bench_rx.c | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index c079565add392..9690a5d7ffd7d 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -37,6 +37,10 @@ create_ns() { for ns in $NS_SRC $NS_DST; do ip netns add $ns ip -n $ns link set dev lo up + + # disable route solicitations to decrease 'noise' traffic + ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0 done ip link add name veth$SRC type veth peer name veth$DST @@ -78,6 +82,12 @@ create_vxlan_pair() { create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad done + + # preload neighbur cache, do avoid some noisy traffic + local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]') + local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]') + ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC + ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST } is_ipv6() { @@ -117,7 +127,7 @@ run_test() { # not enable GRO ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 - ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & + ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args & local spid=$! sleep 0.1 ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst @@ -166,7 +176,7 @@ run_bench() { # bind the sender and the receiver to different CPUs to try # get reproducible results ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" - ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & + ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 & local spid=$! sleep 0.1 ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index f35a924d4a303..1cbadd267c963 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -375,7 +375,7 @@ static void do_recv(void) do_flush_udp(fd); tnow = gettimeofday_ms(); - if (tnow > treport) { + if (!cfg_expected_pkt_nr && tnow > treport) { if (packets) fprintf(stderr, "%s rx: %6lu MB/s %8lu calls/s\n", -- GitLab From 90fe47743a2baaf57cce22eb958fb48cb726b0e3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 1 Feb 2024 19:42:41 +0100 Subject: [PATCH 0018/2290] selftests: net: avoid just another constant wait [ Upstream commit 691bb4e49c98a47bc643dd808453136ce78b15b4 ] Using hard-coded constant timeout to wait for some expected event is deemed to fail sooner or later, especially in slow env. Our CI has spotted another of such race: # TEST: ipv6: cleanup of cached exceptions - nexthop objects [FAIL] # can't delete veth device in a timely manner, PMTU dst likely leaked Replace the crude sleep with a loop looking for the expected condition at low interval for a much longer range. Fixes: b3cc4f8a8a41 ("selftests: pmtu: add explicit tests for PMTU exceptions cleanup") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Link: https://lore.kernel.org/r/fd5c745e9bb665b724473af6a9373a8c2a62b247.1706812005.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/pmtu.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 1b6e484e586dc..00ab4c6e40446 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1928,6 +1928,13 @@ check_command() { return 0 } +check_running() { + pid=${1} + cmd=${2} + + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] +} + test_cleanup_vxlanX_exception() { outer="${1}" encap="vxlan" @@ -1958,11 +1965,12 @@ test_cleanup_vxlanX_exception() { ${ns_a} ip link del dev veth_A-R1 & iplink_pid=$! - sleep 1 - if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then - err " can't delete veth device in a timely manner, PMTU dst likely leaked" - return 1 - fi + for i in $(seq 1 20); do + check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0 + sleep 0.1 + done + err " can't delete veth device in a timely manner, PMTU dst likely leaked" + return 1 } test_cleanup_ipv6_exception() { -- GitLab From e37cde7a5716466ff2a76f7f27f0a29b05b9a732 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 1 Feb 2024 09:38:15 +0100 Subject: [PATCH 0019/2290] tunnels: fix out of bounds access when building IPv6 PMTU error [ Upstream commit d75abeec401f8c86b470e7028a13fcdc87e5dd06 ] If the ICMPv6 error is built from a non-linear skb we get the following splat, BUG: KASAN: slab-out-of-bounds in do_csum+0x220/0x240 Read of size 4 at addr ffff88811d402c80 by task netperf/820 CPU: 0 PID: 820 Comm: netperf Not tainted 6.8.0-rc1+ #543 ... kasan_report+0xd8/0x110 do_csum+0x220/0x240 csum_partial+0xc/0x20 skb_tunnel_check_pmtu+0xeb9/0x3280 vxlan_xmit_one+0x14c2/0x4080 vxlan_xmit+0xf61/0x5c00 dev_hard_start_xmit+0xfb/0x510 __dev_queue_xmit+0x7cd/0x32a0 br_dev_queue_push_xmit+0x39d/0x6a0 Use skb_checksum instead of csum_partial who cannot deal with non-linear SKBs. Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Antoine Tenart Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 586b1b3e35b80..80ccd6661aa32 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) }; skb_reset_network_header(skb); - csum = csum_partial(icmp6h, len, 0); + csum = skb_checksum(skb, skb_transport_offset(skb), len, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, IPPROTO_ICMPV6, csum); -- GitLab From cbf2e1660259bae3d5685c3068775c3385b00963 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 1 Feb 2024 20:41:05 +0800 Subject: [PATCH 0020/2290] atm: idt77252: fix a memleak in open_card_ubr0 [ Upstream commit f3616173bf9be9bf39d131b120d6eea4e6324cb5 ] When alloc_scq fails, card->vcs[0] (i.e. vc) should be freed. Otherwise, in the following call chain: idt77252_init_one |-> idt77252_dev_open |-> open_card_ubr0 |-> alloc_scq [failed] |-> deinit_card |-> vfree(card->vcs); card->vcs is freed and card->vcs[0] is leaked. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhipeng Lu Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/atm/idt77252.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 49cb4537344aa..2daf50d4cd47a 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -2930,6 +2930,8 @@ open_card_ubr0(struct idt77252_dev *card) vc->scq = alloc_scq(card, vc->class); if (!vc->scq) { printk("%s: can't get SCQ.\n", card->name); + kfree(card->vcs[0]); + card->vcs[0] = NULL; return -ENOMEM; } -- GitLab From 40657466863415572a5d3c3e119f651c62fce1f7 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 1 Feb 2024 20:47:13 +0800 Subject: [PATCH 0021/2290] octeontx2-pf: Fix a memleak otx2_sq_init [ Upstream commit b09b58e31b0f43d76f79b9943da3fb7c2843dcbb ] When qmem_alloc and pfvf->hw_ops->sq_aq_init fails, sq->sg should be freed to prevent memleak. Fixes: c9c12d339d93 ("octeontx2-pf: Add support for PTP clock") Signed-off-by: Zhipeng Lu Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 0f896f606c3e6..c00d6d67db518 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -930,8 +930,11 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) if (pfvf->ptp && qidx < pfvf->hw.tx_queues) { err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt, sizeof(*sq->timestamps)); - if (err) + if (err) { + kfree(sq->sg); + sq->sg = NULL; return err; + } } sq->head = 0; @@ -947,7 +950,14 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) sq->stats.bytes = 0; sq->stats.pkts = 0; - return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); + err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); + if (err) { + kfree(sq->sg); + sq->sg = NULL; + return err; + } + + return 0; } -- GitLab From a3156be201cf672449ddb60bfe52603bee47f70b Mon Sep 17 00:00:00 2001 From: Loic Prylli Date: Fri, 3 Nov 2023 11:30:55 +0100 Subject: [PATCH 0022/2290] hwmon: (aspeed-pwm-tacho) mutex for tach reading [ Upstream commit 1168491e7f53581ba7b6014a39a49cfbbb722feb ] the ASPEED_PTCR_RESULT Register can only hold the result for a single fan input. Adding a mutex to protect the register until the reading is done. Signed-off-by: Loic Prylli Signed-off-by: Alexander Hansen Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Link: https://lore.kernel.org/r/121d888762a1232ef403cf35230ccf7b3887083a.1699007401.git.alexander.hansen@9elements.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/aspeed-pwm-tacho.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index d11f674e3dc37..51f321bcd778a 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -194,6 +194,8 @@ struct aspeed_pwm_tacho_data { u8 fan_tach_ch_source[16]; struct aspeed_cooling_device *cdev[8]; const struct attribute_group *groups[3]; + /* protects access to shared ASPEED_PTCR_RESULT */ + struct mutex tach_lock; }; enum type { TYPEM, TYPEN, TYPEO }; @@ -528,6 +530,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, u8 fan_tach_ch_source, type, mode, both; int ret; + mutex_lock(&priv->tach_lock); + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0); regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch); @@ -545,6 +549,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, ASPEED_RPM_STATUS_SLEEP_USEC, usec); + mutex_unlock(&priv->tach_lock); + /* return -ETIMEDOUT if we didn't get an answer. */ if (ret) return ret; @@ -904,6 +910,7 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev) priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + mutex_init(&priv->tach_lock); priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs, &aspeed_pwm_tacho_regmap_config); if (IS_ERR(priv->regmap)) -- GitLab From 9bce69419271eb8b2b3ab467387cb59c99d80deb Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 2 Feb 2024 17:21:34 +0800 Subject: [PATCH 0023/2290] hwmon: (coretemp) Fix out-of-bounds memory access [ Upstream commit 4e440abc894585a34c2904a32cd54af1742311b3 ] Fix a bug that pdata->cpu_map[] is set before out-of-bounds check. The problem might be triggered on systems with more than 128 cores per package. Fixes: 7108b80a542b ("hwmon/coretemp: Handle large core ID value") Signed-off-by: Zhang Rui Cc: Link: https://lore.kernel.org/r/20240202092144.71180-2-rui.zhang@intel.com Signed-off-by: Guenter Roeck Stable-dep-of: fdaf0c8629d4 ("hwmon: (coretemp) Fix bogus core_id to attr name mapping") Signed-off-by: Sasha Levin --- drivers/hwmon/coretemp.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 09aab5859fa75..a80ebf433f7c6 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -467,18 +467,14 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, if (pkg_flag) { attr_no = PKG_SYSFS_ATTR_NO; } else { - index = ida_alloc(&pdata->ida, GFP_KERNEL); + index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); if (index < 0) return index; + pdata->cpu_map[index] = topology_core_id(cpu); attr_no = index + BASE_SYSFS_ATTR_NO; } - if (attr_no > MAX_CORE_DATA - 1) { - err = -ERANGE; - goto ida_free; - } - tdata = init_temp_data(cpu, pkg_flag); if (!tdata) { err = -ENOMEM; -- GitLab From 3fa78ee0e381d83d9413f05cc229c5ba399116b5 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 2 Feb 2024 17:21:35 +0800 Subject: [PATCH 0024/2290] hwmon: (coretemp) Fix bogus core_id to attr name mapping [ Upstream commit fdaf0c8629d4524a168cb9e4ad4231875749b28c ] Before commit 7108b80a542b ("hwmon/coretemp: Handle large core ID value"), there is a fixed mapping between 1. cpu_core_id 2. the index in pdata->core_data[] array 3. the sysfs attr name, aka "tempX_" The later two always equal cpu_core_id + 2. After the commit, pdata->core_data[] index is got from ida so that it can handle sparse core ids and support more cores within a package. However, the commit erroneously maps the sysfs attr name to pdata->core_data[] index instead of cpu_core_id + 2. As a result, the code is not aligned with the comments, and brings user visible changes in hwmon sysfs on systems with sparse core id. For example, before commit 7108b80a542b ("hwmon/coretemp: Handle large core ID value"), /sys/class/hwmon/hwmon2/temp2_label:Core 0 /sys/class/hwmon/hwmon2/temp3_label:Core 1 /sys/class/hwmon/hwmon2/temp4_label:Core 2 /sys/class/hwmon/hwmon2/temp5_label:Core 3 /sys/class/hwmon/hwmon2/temp6_label:Core 4 /sys/class/hwmon/hwmon3/temp10_label:Core 8 /sys/class/hwmon/hwmon3/temp11_label:Core 9 after commit, /sys/class/hwmon/hwmon2/temp2_label:Core 0 /sys/class/hwmon/hwmon2/temp3_label:Core 1 /sys/class/hwmon/hwmon2/temp4_label:Core 2 /sys/class/hwmon/hwmon2/temp5_label:Core 3 /sys/class/hwmon/hwmon2/temp6_label:Core 4 /sys/class/hwmon/hwmon2/temp7_label:Core 8 /sys/class/hwmon/hwmon2/temp8_label:Core 9 Restore the previous behavior and rework the code, comments and variable names to avoid future confusions. Fixes: 7108b80a542b ("hwmon/coretemp: Handle large core ID value") Signed-off-by: Zhang Rui Link: https://lore.kernel.org/r/20240202092144.71180-3-rui.zhang@intel.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/coretemp.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index a80ebf433f7c6..59344ad62822d 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -380,7 +380,7 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) } static int create_core_attrs(struct temp_data *tdata, struct device *dev, - int attr_no) + int index) { int i; static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev, @@ -392,13 +392,20 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev, }; for (i = 0; i < tdata->attr_size; i++) { + /* + * We map the attr number to core id of the CPU + * The attr number is always core id + 2 + * The Pkgtemp will always show up as temp1_*, if available + */ + int attr_no = tdata->is_pkg_data ? 1 : tdata->cpu_core_id + 2; + snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, "temp%d_%s", attr_no, suffixes[i]); sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr); tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i]; tdata->sd_attrs[i].dev_attr.attr.mode = 0444; tdata->sd_attrs[i].dev_attr.show = rd_ptr[i]; - tdata->sd_attrs[i].index = attr_no; + tdata->sd_attrs[i].index = index; tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr; } tdata->attr_group.attrs = tdata->attrs; @@ -456,23 +463,22 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, struct platform_data *pdata = platform_get_drvdata(pdev); struct cpuinfo_x86 *c = &cpu_data(cpu); u32 eax, edx; - int err, index, attr_no; + int err, index; /* - * Find attr number for sysfs: - * We map the attr number to core id of the CPU - * The attr number is always core id + 2 - * The Pkgtemp will always show up as temp1_*, if available + * Get the index of tdata in pdata->core_data[] + * tdata for package: pdata->core_data[1] + * tdata for core: pdata->core_data[2] .. pdata->core_data[NUM_REAL_CORES + 1] */ if (pkg_flag) { - attr_no = PKG_SYSFS_ATTR_NO; + index = PKG_SYSFS_ATTR_NO; } else { index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); if (index < 0) return index; pdata->cpu_map[index] = topology_core_id(cpu); - attr_no = index + BASE_SYSFS_ATTR_NO; + index += BASE_SYSFS_ATTR_NO; } tdata = init_temp_data(cpu, pkg_flag); @@ -504,20 +510,20 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, } } - pdata->core_data[attr_no] = tdata; + pdata->core_data[index] = tdata; /* Create sysfs interfaces */ - err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no); + err = create_core_attrs(tdata, pdata->hwmon_dev, index); if (err) goto exit_free; return 0; exit_free: - pdata->core_data[attr_no] = NULL; + pdata->core_data[index] = NULL; kfree(tdata); ida_free: if (!pkg_flag) - ida_free(&pdata->ida, index); + ida_free(&pdata->ida, index - BASE_SYSFS_ATTR_NO); return err; } -- GitLab From 54538752216bf89ee88d47ad07802063a498c299 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2024 09:54:04 +0000 Subject: [PATCH 0025/2290] inet: read sk->sk_family once in inet_recv_error() [ Upstream commit eef00a82c568944f113f2de738156ac591bbd5cd ] inet_recv_error() is called without holding the socket lock. IPv6 socket could mutate to IPv4 with IPV6_ADDRFORM socket option and trigger a KCSAN warning. Fixes: f4713a3dfad0 ("net-timestamp: make tcp_recvmsg call ipv6_recv_error for AF_INET6 socks") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/af_inet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2f646335d2183..9408dc3bb42d3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1637,10 +1637,12 @@ EXPORT_SYMBOL(inet_current_timestamp); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { - if (sk->sk_family == AF_INET) + unsigned int family = READ_ONCE(sk->sk_family); + + if (family == AF_INET) return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) + if (family == AF_INET6) return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); #endif return -EINVAL; -- GitLab From 05a4d0e16615b315d278ff4868c98b2d479e65b7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jan 2024 11:41:47 +0300 Subject: [PATCH 0026/2290] drm/i915/gvt: Fix uninitialized variable in handle_mmio() [ Upstream commit 47caa96478b99d6d1199b89467cc3e5a6cc754ee ] This code prints the wrong variable in the warning message. It should print "i" instead of "info->offset". On the first iteration "info" is uninitialized leading to a crash and on subsequent iterations it prints the previous offset instead of the current one. Fixes: e0f74ed4634d ("i915/gvt: Separate the MMIO tracking table from GVT-g") Signed-off-by: Dan Carpenter Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/11957c20-b178-4027-9b0a-e32e9591dd7c@moroto.mountain Reviewed-by: Zhenyu Wang Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gvt/handlers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index daac2050d77d0..6f531bb61f7e5 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2844,8 +2844,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset, for (i = start; i < end; i += 4) { p = intel_gvt_find_mmio_info(gvt, i); if (p) { - WARN(1, "dup mmio definition offset %x\n", - info->offset); + WARN(1, "dup mmio definition offset %x\n", i); /* We return -EEXIST here to make GVT-g load fail. * So duplicated MMIO can be found as soon as -- GitLab From cf6b97e1830eed623ae9484d4ff2bb252a99f1b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Feb 2024 15:19:15 +0000 Subject: [PATCH 0027/2290] rxrpc: Fix response to PING RESPONSE ACKs to a dead call [ Upstream commit 6f769f22822aa4124b556339781b04d810f0e038 ] Stop rxrpc from sending a DUP ACK in response to a PING RESPONSE ACK on a dead call. We may have initiated the ping but the call may have beaten the response to completion. Fixes: 18bfeba50dfd ("rxrpc: Perform terminal call ACK/ABORT retransmission from conn processor") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/conn_event.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index aab0697013982..5d91ef562ff78 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -41,6 +41,14 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _enter("%d", conn->debug_id); + if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &pkt.ack, sizeof(pkt.ack)) < 0) + return; + if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE) + return; + } + chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just -- GitLab From 3d3a5b31b43515b5752ff282702ca546ec3e48b6 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 1 Feb 2024 00:23:09 +0900 Subject: [PATCH 0028/2290] tipc: Check the bearer type before calling tipc_udp_nl_bearer_add() [ Upstream commit 3871aa01e1a779d866fa9dfdd5a836f342f4eb87 ] syzbot reported the following general protection fault [1]: general protection fault, probably for non-canonical address 0xdffffc0000000010: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000080-0x0000000000000087] ... RIP: 0010:tipc_udp_is_known_peer+0x9c/0x250 net/tipc/udp_media.c:291 ... Call Trace: tipc_udp_nl_bearer_add+0x212/0x2f0 net/tipc/udp_media.c:646 tipc_nl_bearer_add+0x21e/0x360 net/tipc/bearer.c:1089 genl_family_rcv_msg_doit+0x1fc/0x2e0 net/netlink/genetlink.c:972 genl_family_rcv_msg net/netlink/genetlink.c:1052 [inline] genl_rcv_msg+0x561/0x800 net/netlink/genetlink.c:1067 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2544 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x53b/0x810 net/netlink/af_netlink.c:1367 netlink_sendmsg+0x8b7/0xd70 net/netlink/af_netlink.c:1909 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b The cause of this issue is that when tipc_nl_bearer_add() is called with the TIPC_NLA_BEARER_UDP_OPTS attribute, tipc_udp_nl_bearer_add() is called even if the bearer is not UDP. tipc_udp_is_known_peer() called by tipc_udp_nl_bearer_add() assumes that the media_ptr field of the tipc_bearer has an udp_bearer type object, so the function goes crazy for non-UDP bearers. This patch fixes the issue by checking the bearer type before calling tipc_udp_nl_bearer_add() in tipc_nl_bearer_add(). Fixes: ef20cd4dd163 ("tipc: introduce UDP replicast") Reported-and-tested-by: syzbot+5142b87a9abc510e14fa@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5142b87a9abc510e14fa [1] Signed-off-by: Shigeru Yoshida Reviewed-by: Tung Nguyen Link: https://lore.kernel.org/r/20240131152310.4089541-1-syoshida@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/tipc/bearer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index cdcd2731860ba..1cb9935620886 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1088,6 +1088,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); + return -EINVAL; + } + err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); if (err) { -- GitLab From e0e09186d8821ad59806115d347ea32efa43ca4b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 3 Feb 2024 10:31:49 -0800 Subject: [PATCH 0029/2290] af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC. [ Upstream commit 1279f9d9dec2d7462823a18c29ad61359e0a007d ] syzbot reported a warning [0] in __unix_gc() with a repro, which creates a socketpair and sends one socket's fd to itself using the peer. socketpair(AF_UNIX, SOCK_STREAM, 0, [3, 4]) = 0 sendmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\360", iov_len=1}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[3]}], msg_controllen=24, msg_flags=0}, MSG_OOB|MSG_PROBE|MSG_DONTWAIT|MSG_ZEROCOPY) = 1 This forms a self-cyclic reference that GC should finally untangle but does not due to lack of MSG_OOB handling, resulting in memory leak. Recently, commit 11498715f266 ("af_unix: Remove io_uring code for GC.") removed io_uring's dead code in GC and revealed the problem. The code was executed at the final stage of GC and unconditionally moved all GC candidates from gc_candidates to gc_inflight_list. That papered over the reported problem by always making the following WARN_ON_ONCE(!list_empty(&gc_candidates)) false. The problem has been there since commit 2aab4b969002 ("af_unix: fix struct pid leaks in OOB support") added full scm support for MSG_OOB while fixing another bug. To fix this problem, we must call kfree_skb() for unix_sk(sk)->oob_skb if the socket still exists in gc_candidates after purging collected skb. Then, we need to set NULL to oob_skb before calling kfree_skb() because it calls last fput() and triggers unix_release_sock(), where we call duplicate kfree_skb(u->oob_skb) if not NULL. Note that the leaked socket remained being linked to a global list, so kmemleak also could not detect it. We need to check /proc/net/protocol to notice the unfreed socket. [0]: WARNING: CPU: 0 PID: 2863 at net/unix/garbage.c:345 __unix_gc+0xc74/0xe80 net/unix/garbage.c:345 Modules linked in: CPU: 0 PID: 2863 Comm: kworker/u4:11 Not tainted 6.8.0-rc1-syzkaller-00583-g1701940b1a02 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Workqueue: events_unbound __unix_gc RIP: 0010:__unix_gc+0xc74/0xe80 net/unix/garbage.c:345 Code: 8b 5c 24 50 e9 86 f8 ff ff e8 f8 e4 22 f8 31 d2 48 c7 c6 30 6a 69 89 4c 89 ef e8 97 ef ff ff e9 80 f9 ff ff e8 dd e4 22 f8 90 <0f> 0b 90 e9 7b fd ff ff 48 89 df e8 5c e7 7c f8 e9 d3 f8 ff ff e8 RSP: 0018:ffffc9000b03fba0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffffc9000b03fc10 RCX: ffffffff816c493e RDX: ffff88802c02d940 RSI: ffffffff896982f3 RDI: ffffc9000b03fb30 RBP: ffffc9000b03fce0 R08: 0000000000000001 R09: fffff52001607f66 R10: 0000000000000003 R11: 0000000000000002 R12: dffffc0000000000 R13: ffffc9000b03fc10 R14: ffffc9000b03fc10 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005559c8677a60 CR3: 000000000d57a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: process_one_work+0x889/0x15e0 kernel/workqueue.c:2633 process_scheduled_works kernel/workqueue.c:2706 [inline] worker_thread+0x8b9/0x12a0 kernel/workqueue.c:2787 kthread+0x2c6/0x3b0 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Reported-by: syzbot+fa3ef895554bdbfd1183@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa3ef895554bdbfd1183 Fixes: 2aab4b969002 ("af_unix: fix struct pid leaks in OOB support") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240203183149.63573-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/garbage.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dc27635403932..767b338a7a2d4 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -314,6 +314,17 @@ void unix_gc(void) /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + list_for_each_entry_safe(u, next, &gc_candidates, link) { + struct sk_buff *skb = u->oob_skb; + + if (skb) { + u->oob_skb = NULL; + kfree_skb(skb); + } + } +#endif + spin_lock(&unix_gc_lock); /* There could be io_uring registered files, just push them back to -- GitLab From 4e2c4846b2507f6dfc9bea72b7567c2693a82a16 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Feb 2024 17:10:04 +0000 Subject: [PATCH 0030/2290] ppp_async: limit MRU to 64K [ Upstream commit cb88cb53badb8aeb3955ad6ce80b07b598e310b8 ] syzbot triggered a warning [1] in __alloc_pages(): WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp) Willem fixed a similar issue in commit c0a2a1b0d631 ("ppp: limit MRU to 64K") Adopt the same sanity check for ppp_async_ioctl(PPPIOCSMRU) [1]: WARNING: CPU: 1 PID: 11 at mm/page_alloc.c:4543 __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 Modules linked in: CPU: 1 PID: 11 Comm: kworker/u4:0 Not tainted 6.8.0-rc2-syzkaller-g41bccc98fb79 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Workqueue: events_unbound flush_to_ldisc pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 lr : __alloc_pages+0xc8/0x698 mm/page_alloc.c:4537 sp : ffff800093967580 x29: ffff800093967660 x28: ffff8000939675a0 x27: dfff800000000000 x26: ffff70001272ceb4 x25: 0000000000000000 x24: ffff8000939675c0 x23: 0000000000000000 x22: 0000000000060820 x21: 1ffff0001272ceb8 x20: ffff8000939675e0 x19: 0000000000000010 x18: ffff800093967120 x17: ffff800083bded5c x16: ffff80008ac97500 x15: 0000000000000005 x14: 1ffff0001272cebc x13: 0000000000000000 x12: 0000000000000000 x11: ffff70001272cec1 x10: 1ffff0001272cec0 x9 : 0000000000000001 x8 : ffff800091c91000 x7 : 0000000000000000 x6 : 000000000000003f x5 : 00000000ffffffff x4 : 0000000000000000 x3 : 0000000000000020 x2 : 0000000000000008 x1 : 0000000000000000 x0 : ffff8000939675e0 Call trace: __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] __kmalloc_large_node+0xbc/0x1fc mm/slub.c:3926 __do_kmalloc_node mm/slub.c:3969 [inline] __kmalloc_node_track_caller+0x418/0x620 mm/slub.c:4001 kmalloc_reserve+0x17c/0x23c net/core/skbuff.c:590 __alloc_skb+0x1c8/0x3d8 net/core/skbuff.c:651 __netdev_alloc_skb+0xb8/0x3e8 net/core/skbuff.c:715 netdev_alloc_skb include/linux/skbuff.h:3235 [inline] dev_alloc_skb include/linux/skbuff.h:3248 [inline] ppp_async_input drivers/net/ppp/ppp_async.c:863 [inline] ppp_asynctty_receive+0x588/0x186c drivers/net/ppp/ppp_async.c:341 tty_ldisc_receive_buf+0x12c/0x15c drivers/tty/tty_buffer.c:390 tty_port_default_receive_buf+0x74/0xac drivers/tty/tty_port.c:37 receive_buf drivers/tty/tty_buffer.c:444 [inline] flush_to_ldisc+0x284/0x6e4 drivers/tty/tty_buffer.c:494 process_one_work+0x694/0x1204 kernel/workqueue.c:2633 process_scheduled_works kernel/workqueue.c:2706 [inline] worker_thread+0x938/0xef4 kernel/workqueue.c:2787 kthread+0x288/0x310 kernel/kthread.c:388 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+c5da1f087c9e4ec6c933@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240205171004.1059724-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ppp/ppp_async.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 15a179631903f..abc65c4d7a303 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -469,6 +469,10 @@ ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) case PPPIOCSMRU: if (get_user(val, p)) break; + if (val > U16_MAX) { + err = -EINVAL; + break; + } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; -- GitLab From 69d66d493b1f303d3fb44866485744e156f7bf26 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 4 Feb 2024 08:56:18 -0800 Subject: [PATCH 0031/2290] selftests: cmsg_ipv6: repeat the exact packet [ Upstream commit 4b00d0c513da58b68df015968721b11396fe4ab3 ] cmsg_ipv6 test requests tcpdump to capture 4 packets, and sends until tcpdump quits. Only the first packet is "real", however, and the rest are basic UDP packets. So if tcpdump doesn't start in time it will miss the real packet and only capture the UDP ones. This makes the test fail on slow machine (no KVM or with debug enabled) 100% of the time, while it passes in fast environments. Repeat the "real" / expected packet. Fixes: 9657ad09e1fa ("selftests: net: test IPV6_TCLASS") Fixes: 05ae83d5a4a2 ("selftests: net: test IPV6_HOPLIMIT") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/cmsg_ipv6.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index 330d0b1ceced3..c921750ca118d 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -91,7 +91,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "TCLASS $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null @@ -128,7 +128,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "HOPLIMIT $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null -- GitLab From e79ef7966ee95072b961ec86b59f66b28b131372 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 1 Feb 2024 22:58:36 +0100 Subject: [PATCH 0032/2290] netfilter: nft_compat: narrow down revision to unsigned 8-bits [ Upstream commit 36fa8d697132b4bed2312d700310e8a78b000c84 ] xt_find_revision() expects u8, restrict it to this datatype. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_compat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 6952da7dfc02c..d583ba50f1818 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr, static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, }; @@ -418,7 +418,7 @@ static void nft_match_eval(const struct nft_expr *expr, static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, }; @@ -721,7 +721,7 @@ out_put: static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, .len = NFT_COMPAT_NAME_MAX-1 }, - [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, }; -- GitLab From 8762bcc92719c73284f869986dce91972bf422ec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 1 Feb 2024 23:33:29 +0100 Subject: [PATCH 0033/2290] netfilter: nft_compat: reject unused compat flag [ Upstream commit 292781c3c5485ce33bd22b2ef1b2bed709b4d672 ] Flag (1 << 0) is ignored is set, never used, reject it it with EINVAL instead. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_compat.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index af8f4c304d272..707af820f1a97 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -266,9 +266,11 @@ enum nft_rule_attributes { /** * enum nft_rule_compat_flags - nf_tables rule compat flags * + * @NFT_RULE_COMPAT_F_UNUSED: unused * @NFT_RULE_COMPAT_F_INV: invert the check result */ enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_UNUSED = (1 << 0), NFT_RULE_COMPAT_F_INV = (1 << 1), NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, }; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index d583ba50f1818..05862b3ea33bf 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -212,7 +212,8 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); - if (flags & ~NFT_RULE_COMPAT_F_MASK) + if (flags & NFT_RULE_COMPAT_F_UNUSED || + flags & ~NFT_RULE_COMPAT_F_MASK) return -EINVAL; if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; -- GitLab From a060da32357a423391c6d664013223e331be9857 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 2 Feb 2024 00:05:23 +0100 Subject: [PATCH 0034/2290] netfilter: nft_compat: restrict match/target protocol to u16 [ Upstream commit d694b754894c93fb4d71a7f3699439dec111decc ] xt_check_{match,target} expects u16, but NFTA_RULE_COMPAT_PROTO is u32. NLA_POLICY_MAX(NLA_BE32, 65535) cannot be used because .max in nla_policy is s16, see 3e48be05f3c7 ("netlink: add attribute range validation to policy"). Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_compat.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 05862b3ea33bf..e1623fbf36548 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 l4proto; u32 flags; int err; @@ -218,7 +219,12 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + if (l4proto > U16_MAX) + return -EINVAL; + + *proto = l4proto; + return 0; } -- GitLab From efdd665ce1a1634b8c1dad5e7f6baaef3e131d0a Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 7 Feb 2024 10:20:57 +0530 Subject: [PATCH 0035/2290] drm/amd/display: Implement bounds check for stream encoder creation in DCN301 [ Upstream commit 58fca355ad37dcb5f785d9095db5f748b79c5dc2 ] 'stream_enc_regs' array is an array of dcn10_stream_enc_registers structures. The array is initialized with four elements, corresponding to the four calls to stream_enc_regs() in the array initializer. This means that valid indices for this array are 0, 1, 2, and 3. The error message 'stream_enc_regs' 4 <= 5 below, is indicating that there is an attempt to access this array with an index of 5, which is out of bounds. This could lead to undefined behavior Here, eng_id is used as an index to access the stream_enc_regs array. If eng_id is 5, this would result in an out-of-bounds access on the stream_enc_regs array. Thus fixing Buffer overflow error in dcn301_stream_encoder_create reported by Smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn301/dcn301_resource.c:1011 dcn301_stream_encoder_create() error: buffer overflow 'stream_enc_regs' 4 <= 5 Fixes: 3a83e4e64bb1 ("drm/amd/display: Add dcn3.01 support to DC (v2)") Cc: Roman Li Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Roman Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index f04595b750abc..5ec3f50a72acd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1014,7 +1014,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id vpg = dcn301_vpg_create(ctx, vpg_inst); afmt = dcn301_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); -- GitLab From 181dade2513017bee3804b2114b0726ba2d3ed0e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 5 Feb 2024 14:59:24 +0100 Subject: [PATCH 0036/2290] netfilter: nft_ct: reject direction for ct id [ Upstream commit 38ed1c7062ada30d7c11e7a7acc749bf27aa14aa ] Direction attribute is ignored, reject it in case this ever needs to be supported Fixes: 3087c3f7c23b ("netfilter: nft_ct: Add ct id support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_ct.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 1101665f52537..8df7564f0611e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -484,6 +484,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, break; #endif case NFT_CT_ID: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); break; default: -- GitLab From 3eaab7d565be167fd041025dea391752771414ac Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Feb 2024 21:52:46 +0100 Subject: [PATCH 0037/2290] netfilter: nft_set_pipapo: store index in scratch maps [ Upstream commit 76313d1a4aa9e30d5b43dee5efd8bcd4d8250006 ] Pipapo needs a scratchpad area to keep state during matching. This state can be large and thus cannot reside on stack. Each set preallocates percpu areas for this. On each match stage, one scratchpad half starts with all-zero and the other is inited to all-ones. At the end of each stage, the half that starts with all-ones is always zero. Before next field is tested, pointers to the two halves are swapped, i.e. resmap pointer turns into fill pointer and vice versa. After the last field has been processed, pipapo stashes the index toggle in a percpu variable, with assumption that next packet will start with the all-zero half and sets all bits in the other to 1. This isn't reliable. There can be multiple sets and we can't be sure that the upper and lower half of all set scratch map is always in sync (lookups can be conditional), so one set might have swapped, but other might not have been queried. Thus we need to keep the index per-set-and-cpu, just like the scratchpad. Note that this bug fix is incomplete, there is a related issue. avx2 and normal implementation might use slightly different areas of the map array space due to the avx2 alignment requirements, so m->scratch (generic/fallback implementation) and ->scratch_aligned (avx) may partially overlap. scratch and scratch_aligned are not distinct objects, the latter is just the aligned address of the former. After this change, write to scratch_align->map_index may write to scratch->map, so this issue becomes more prominent, we can set to 1 a bit in the supposedly-all-zero area of scratch->map[]. A followup patch will remove the scratch_aligned and makes generic and avx code use the same (aligned) area. Its done in a separate change to ease review. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 41 ++++++++++++++++++----------- net/netfilter/nft_set_pipapo.h | 14 ++++++++-- net/netfilter/nft_set_pipapo_avx2.c | 15 +++++------ 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4e1cc31729b80..fbd0dbf9b9655 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -342,9 +342,6 @@ #include "nft_set_pipapo_avx2.h" #include "nft_set_pipapo.h" -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); - /** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits @@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; @@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, local_bh_disable(); - map_index = raw_cpu_read(nft_pipapo_scratch_index); - m = rcu_dereference(priv->match); if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) goto out; - res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0); - fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max); + scratch = *raw_cpu_ptr(m->scratch); + + map_index = scratch->map_index; + + res_map = scratch->map + (map_index ? m->bsize_max : 0); + fill_map = scratch->map + (map_index ? 0 : m->bsize_max); memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); @@ -460,7 +460,7 @@ next_match: b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt, last); if (b < 0) { - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return false; @@ -477,7 +477,7 @@ next_match: * current inactive bitmap is clean and can be reused as * *next* bitmap (not initial) for the next packet. */ - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return true; @@ -1114,12 +1114,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, int i; for_each_possible_cpu(i) { - unsigned long *scratch; + struct nft_pipapo_scratch *scratch; #ifdef NFT_PIPAPO_ALIGN - unsigned long *scratch_aligned; + void *scratch_aligned; #endif - - scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + + scratch = kzalloc_node(struct_size(scratch, map, + bsize_max * 2) + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL, cpu_to_node(i)); if (!scratch) { @@ -1138,7 +1138,16 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, *per_cpu_ptr(clone->scratch, i) = scratch; #ifdef NFT_PIPAPO_ALIGN - scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); + /* Align &scratch->map (not the struct itself): the extra + * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() + * above guarantee we can waste up to those bytes in order + * to align the map field regardless of its offset within + * the struct. + */ + BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); + + scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); + scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; #endif } @@ -2132,7 +2141,7 @@ static int nft_pipapo_init(const struct nft_set *set, m->field_count = field_count; m->bsize_max = 0; - m->scratch = alloc_percpu(unsigned long *); + m->scratch = alloc_percpu(struct nft_pipapo_scratch *); if (!m->scratch) { err = -ENOMEM; goto out_scratch; @@ -2141,7 +2150,7 @@ static int nft_pipapo_init(const struct nft_set *set, *per_cpu_ptr(m->scratch, i) = NULL; #ifdef NFT_PIPAPO_ALIGN - m->scratch_aligned = alloc_percpu(unsigned long *); + m->scratch_aligned = alloc_percpu(struct nft_pipapo_scratch *); if (!m->scratch_aligned) { err = -ENOMEM; goto out_free; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a75591583eb..de96e1a01dc0b 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -130,6 +130,16 @@ struct nft_pipapo_field { union nft_pipapo_map_bucket *mt; }; +/** + * struct nft_pipapo_scratch - percpu data used for lookup and matching + * @map_index: Current working bitmap index, toggled between field matches + * @map: store partial matching results during lookup + */ +struct nft_pipapo_scratch { + u8 map_index; + unsigned long map[]; +}; + /** * struct nft_pipapo_match - Data used for lookup and matching * @field_count Amount of fields in set @@ -142,9 +152,9 @@ struct nft_pipapo_field { struct nft_pipapo_match { int field_count; #ifdef NFT_PIPAPO_ALIGN - unsigned long * __percpu *scratch_aligned; + struct nft_pipapo_scratch * __percpu *scratch_aligned; #endif - unsigned long * __percpu *scratch; + struct nft_pipapo_scratch * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; struct nft_pipapo_field f[]; diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 52e0d026d30ad..78213c73af2e2 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -71,9 +71,6 @@ #define NFT_PIPAPO_AVX2_ZERO(reg) \ asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg) -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index); - /** * nft_pipapo_avx2_prepare() - Prepare before main algorithm body * @@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); - unsigned long *res, *fill, *scratch; + struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; struct nft_pipapo_match *m; struct nft_pipapo_field *f; + unsigned long *res, *fill; bool map_index; int i, ret = 0; @@ -1146,10 +1144,11 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, kernel_fpu_end(); return false; } - map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index); - res = scratch + (map_index ? m->bsize_max : 0); - fill = scratch + (map_index ? 0 : m->bsize_max); + map_index = scratch->map_index; + + res = scratch->map + (map_index ? m->bsize_max : 0); + fill = scratch->map + (map_index ? 0 : m->bsize_max); /* Starting map doesn't need to be set for this implementation */ @@ -1221,7 +1220,7 @@ next_match: out: if (i % 2) - raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index); + scratch->map_index = !map_index; kernel_fpu_end(); return ret >= 0; -- GitLab From fac3478d5b87b8f9c6cd94e449c15fa4bf92a587 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Feb 2024 21:52:47 +0100 Subject: [PATCH 0038/2290] netfilter: nft_set_pipapo: add helper to release pcpu scratch area [ Upstream commit 47b1c03c3c1a119435480a1e73f27197dc59131d ] After next patch simple kfree() is not enough anymore, so add a helper for it. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Stable-dep-of: 5a8cdf6fd860 ("netfilter: nft_set_pipapo: remove scratch_aligned pointer") Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index fbd0dbf9b9655..977bf724fb7eb 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1101,6 +1101,24 @@ static void pipapo_map(struct nft_pipapo_match *m, f->mt[map[i].to + j].e = e; } +/** + * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address + * @m: Matching data + * @cpu: CPU number + */ +static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) +{ + struct nft_pipapo_scratch *s; + void *mem; + + s = *per_cpu_ptr(m->scratch, cpu); + if (!s) + return; + + mem = s; + kfree(mem); +} + /** * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results * @clone: Copy of matching data with pending insertions and deletions @@ -1133,7 +1151,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, return -ENOMEM; } - kfree(*per_cpu_ptr(clone->scratch, i)); + pipapo_free_scratch(clone, i); *per_cpu_ptr(clone->scratch, i) = scratch; @@ -1359,7 +1377,7 @@ out_lt: } out_scratch_realloc: for_each_possible_cpu(i) - kfree(*per_cpu_ptr(new->scratch, i)); + pipapo_free_scratch(new, i); #ifdef NFT_PIPAPO_ALIGN free_percpu(new->scratch_aligned); #endif @@ -1647,7 +1665,7 @@ static void pipapo_free_match(struct nft_pipapo_match *m) int i; for_each_possible_cpu(i) - kfree(*per_cpu_ptr(m->scratch, i)); + pipapo_free_scratch(m, i); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2249,7 +2267,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, free_percpu(m->scratch_aligned); #endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(m->scratch, cpu)); + pipapo_free_scratch(m, cpu); free_percpu(m->scratch); pipapo_free_fields(m); kfree(m); @@ -2266,7 +2284,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, free_percpu(priv->clone->scratch_aligned); #endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); + pipapo_free_scratch(priv->clone, cpu); free_percpu(priv->clone->scratch); pipapo_free_fields(priv->clone); -- GitLab From a442ff5405d7fa9a9023530d51a93894cc11d13d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 8 Feb 2024 10:31:29 +0100 Subject: [PATCH 0039/2290] netfilter: nft_set_pipapo: remove scratch_aligned pointer [ Upstream commit 5a8cdf6fd860ac5e6d08d72edbcecee049a7fec4 ] use ->scratch for both avx2 and the generic implementation. After previous change the scratch->map member is always aligned properly for AVX2, so we can just use scratch->map in AVX2 too. The alignoff delta is stored in the scratchpad so we can reconstruct the correct address to free the area again. Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation") Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 41 +++++------------------------ net/netfilter/nft_set_pipapo.h | 6 ++--- net/netfilter/nft_set_pipapo_avx2.c | 2 +- 3 files changed, 10 insertions(+), 39 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 977bf724fb7eb..e1969209b3abb 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1116,6 +1116,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c return; mem = s; + mem -= s->align_off; kfree(mem); } @@ -1135,6 +1136,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, struct nft_pipapo_scratch *scratch; #ifdef NFT_PIPAPO_ALIGN void *scratch_aligned; + u32 align_off; #endif scratch = kzalloc_node(struct_size(scratch, map, bsize_max * 2) + @@ -1153,8 +1155,6 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, pipapo_free_scratch(clone, i); - *per_cpu_ptr(clone->scratch, i) = scratch; - #ifdef NFT_PIPAPO_ALIGN /* Align &scratch->map (not the struct itself): the extra * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() @@ -1166,8 +1166,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); - *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; + align_off = scratch_aligned - (void *)scratch; + + scratch = scratch_aligned; + scratch->align_off = align_off; #endif + *per_cpu_ptr(clone->scratch, i) = scratch; } return 0; @@ -1321,11 +1325,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch) goto out_scratch; -#ifdef NFT_PIPAPO_ALIGN - new->scratch_aligned = alloc_percpu(*new->scratch_aligned); - if (!new->scratch_aligned) - goto out_scratch; -#endif for_each_possible_cpu(i) *per_cpu_ptr(new->scratch, i) = NULL; @@ -1378,9 +1377,6 @@ out_lt: out_scratch_realloc: for_each_possible_cpu(i) pipapo_free_scratch(new, i); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(new->scratch_aligned); -#endif out_scratch: free_percpu(new->scratch); kfree(new); @@ -1667,11 +1663,7 @@ static void pipapo_free_match(struct nft_pipapo_match *m) for_each_possible_cpu(i) pipapo_free_scratch(m, i); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); - pipapo_free_fields(m); kfree(m); @@ -2167,16 +2159,6 @@ static int nft_pipapo_init(const struct nft_set *set, for_each_possible_cpu(i) *per_cpu_ptr(m->scratch, i) = NULL; -#ifdef NFT_PIPAPO_ALIGN - m->scratch_aligned = alloc_percpu(struct nft_pipapo_scratch *); - if (!m->scratch_aligned) { - err = -ENOMEM; - goto out_free; - } - for_each_possible_cpu(i) - *per_cpu_ptr(m->scratch_aligned, i) = NULL; -#endif - rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { @@ -2207,9 +2189,6 @@ static int nft_pipapo_init(const struct nft_set *set, return 0; out_free: -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); out_scratch: kfree(m); @@ -2263,9 +2242,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif for_each_possible_cpu(cpu) pipapo_free_scratch(m, cpu); free_percpu(m->scratch); @@ -2280,9 +2256,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->dirty) nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(priv->clone->scratch_aligned); -#endif for_each_possible_cpu(cpu) pipapo_free_scratch(priv->clone, cpu); free_percpu(priv->clone->scratch); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index de96e1a01dc0b..30a3d092cd841 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -133,10 +133,12 @@ struct nft_pipapo_field { /** * struct nft_pipapo_scratch - percpu data used for lookup and matching * @map_index: Current working bitmap index, toggled between field matches + * @align_off: Offset to get the originally allocated address * @map: store partial matching results during lookup */ struct nft_pipapo_scratch { u8 map_index; + u32 align_off; unsigned long map[]; }; @@ -144,16 +146,12 @@ struct nft_pipapo_scratch { * struct nft_pipapo_match - Data used for lookup and matching * @field_count Amount of fields in set * @scratch: Preallocated per-CPU maps for partial matching results - * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes * @bsize_max: Maximum lookup table bucket size of all fields, in longs * @rcu Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { int field_count; -#ifdef NFT_PIPAPO_ALIGN - struct nft_pipapo_scratch * __percpu *scratch_aligned; -#endif struct nft_pipapo_scratch * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 78213c73af2e2..90e275bb3e5d7 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1139,7 +1139,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, */ kernel_fpu_begin_mask(0); - scratch = *raw_cpu_ptr(m->scratch_aligned); + scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) { kernel_fpu_end(); return false; -- GitLab From ec1bedd797588fe38fc11cba26d77bb1d9b194c6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 17 Oct 2023 17:04:39 +0300 Subject: [PATCH 0040/2290] fs/ntfs3: Fix an NULL dereference bug [ Upstream commit b2dd7b953c25ffd5912dda17e980e7168bebcf6c ] The issue here is when this is called from ntfs_load_attr_list(). The "size" comes from le32_to_cpu(attr->res.data_size) so it can't overflow on a 64bit systems but on 32bit systems the "+ 1023" can overflow and the result is zero. This means that the kmalloc will succeed by returning the ZERO_SIZE_PTR and then the memcpy() will crash with an Oops on the next line. Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Signed-off-by: Dan Carpenter Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/ntfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 8c9abaf139e67..74482ef569ab7 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -467,7 +467,7 @@ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, int al_update(struct ntfs_inode *ni, int sync); static inline size_t al_aligned(size_t size) { - return (size + 1023) & ~(size_t)1023; + return size_add(size, 1023) & ~(size_t)1023; } /* Globals from bitfunc.c */ -- GitLab From 1ebd75cefaac6fd74729a7d3157f6eaa59960ae2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 3 Feb 2024 10:45:21 +0800 Subject: [PATCH 0041/2290] scsi: core: Move scsi_host_busy() out of host lock if it is for per-command [ Upstream commit 4e6c9011990726f4d175e2cdfebe5b0b8cce4839 ] Commit 4373534a9850 ("scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler") intended to fix a hard lockup issue triggered by EH. The core idea was to move scsi_host_busy() out of the host lock when processing individual commands for EH. However, a suggested style change inadvertently caused scsi_host_busy() to remain under the host lock. Fix this by calling scsi_host_busy() outside the lock. Fixes: 4373534a9850 ("scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler") Cc: Sathya Prakash Veerichetty Cc: Bart Van Assche Cc: Ewan D. Milne Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240203024521.2006455-1-ming.lei@redhat.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_error.c | 3 ++- drivers/scsi/scsi_lib.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 66290961c47c2..cfa6f0edff17c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -277,11 +277,12 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) { struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); struct Scsi_Host *shost = scmd->device->host; + unsigned int busy = scsi_host_busy(shost); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; - scsi_eh_wakeup(shost, scsi_host_busy(shost)); + scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0e7e9f1e5a029..5c5954b78585e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -280,9 +280,11 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) rcu_read_lock(); __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state); if (unlikely(scsi_host_in_recovery(shost))) { + unsigned int busy = scsi_host_busy(shost); + spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) - scsi_eh_wakeup(shost, scsi_host_busy(shost)); + scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); -- GitLab From e5dc63f01e027721c29f82069f7e97e2149fa131 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Nov 2023 12:25:56 -1000 Subject: [PATCH 0042/2290] blk-iocost: Fix an UBSAN shift-out-of-bounds warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2a427b49d02995ea4a6ff93a1432c40fa4d36821 ] When iocg_kick_delay() is called from a CPU different than the one which set the delay, @now may be in the past of @iocg->delay_at leading to the following warning: UBSAN: shift-out-of-bounds in block/blk-iocost.c:1359:23 shift exponent 18446744073709 is too large for 64-bit type 'u64' (aka 'unsigned long long') ... Call Trace: dump_stack_lvl+0x79/0xc0 __ubsan_handle_shift_out_of_bounds+0x2ab/0x300 iocg_kick_delay+0x222/0x230 ioc_rqos_merge+0x1d7/0x2c0 __rq_qos_merge+0x2c/0x80 bio_attempt_back_merge+0x83/0x190 blk_attempt_plug_merge+0x101/0x150 blk_mq_submit_bio+0x2b1/0x720 submit_bio_noacct_nocheck+0x320/0x3e0 __swap_writepage+0x2ab/0x9d0 The underflow itself doesn't really affect the behavior in any meaningful way; however, the past timestamp may exaggerate the delay amount calculated later in the code, which shouldn't be a material problem given the nature of the delay mechanism. If @now is in the past, this CPU is racing another CPU which recently set up the delay and there's nothing this CPU can contribute w.r.t. the delay. Let's bail early from iocg_kick_delay() in such cases. Reported-by: Breno Leitão Signed-off-by: Tejun Heo Fixes: 5160a5a53c0c ("blk-iocost: implement delay adjustment hysteresis") Link: https://lore.kernel.org/r/ZVvc9L_CYk5LO1fT@slm.duckdns.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-iocost.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 7dd6a33e1d6a8..e6557024e3da8 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1337,6 +1337,13 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) lockdep_assert_held(&iocg->waitq.lock); + /* + * If the delay is set by another CPU, we may be in the past. No need to + * change anything if so. This avoids decay calculation underflow. + */ + if (time_before64(now->now, iocg->delay_at)) + return false; + /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; if (iocg->delay) -- GitLab From b478e414cf85cf7e2fc7190dde582af726e340e1 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 17 Nov 2022 17:11:52 -0500 Subject: [PATCH 0043/2290] fs: dlm: don't put dlm_local_addrs on heap [ Upstream commit c51c9cd8addcfbdc097dbefd59f022402183644b ] This patch removes to allocate the dlm_local_addr[] pointers on the heap. Instead we directly store the type of "struct sockaddr_storage". This removes function deinit_local() because it was freeing memory only. Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/lowcomms.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 72f34f96d0155..2c797eb519da9 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -174,7 +174,7 @@ static LIST_HEAD(dlm_node_addrs); static DEFINE_SPINLOCK(dlm_node_addrs_spin); static struct listen_connection listen_con; -static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; +static struct sockaddr_storage dlm_local_addr[DLM_MAX_ADDR_COUNT]; static int dlm_local_count; int dlm_allow_conn; @@ -398,7 +398,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, if (!sa_out) return 0; - if (dlm_local_addr[0]->ss_family == AF_INET) { + if (dlm_local_addr[0].ss_family == AF_INET) { struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; ret4->sin_addr.s_addr = in4->sin_addr.s_addr; @@ -727,7 +727,7 @@ static void add_sock(struct socket *sock, struct connection *con) static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, int *addr_len) { - saddr->ss_family = dlm_local_addr[0]->ss_family; + saddr->ss_family = dlm_local_addr[0].ss_family; if (saddr->ss_family == AF_INET) { struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; in4_addr->sin_port = cpu_to_be16(port); @@ -1167,7 +1167,7 @@ static int sctp_bind_addrs(struct socket *sock, uint16_t port) int i, addr_len, result = 0; for (i = 0; i < dlm_local_count; i++) { - memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); + memcpy(&localaddr, &dlm_local_addr[i], sizeof(localaddr)); make_sockaddr(&localaddr, port, &addr_len); if (!i) @@ -1187,7 +1187,7 @@ static int sctp_bind_addrs(struct socket *sock, uint16_t port) /* Get local addresses */ static void init_local(void) { - struct sockaddr_storage sas, *addr; + struct sockaddr_storage sas; int i; dlm_local_count = 0; @@ -1195,21 +1195,10 @@ static void init_local(void) if (dlm_our_addr(&sas, i)) break; - addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS); - if (!addr) - break; - dlm_local_addr[dlm_local_count++] = addr; + memcpy(&dlm_local_addr[dlm_local_count++], &sas, sizeof(sas)); } } -static void deinit_local(void) -{ - int i; - - for (i = 0; i < dlm_local_count; i++) - kfree(dlm_local_addr[i]); -} - static struct writequeue_entry *new_writequeue_entry(struct connection *con) { struct writequeue_entry *entry; @@ -1575,7 +1564,7 @@ static void dlm_connect(struct connection *con) } /* Create a socket to communicate with */ - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + result = sock_create_kern(&init_net, dlm_local_addr[0].ss_family, SOCK_STREAM, dlm_proto_ops->proto, &sock); if (result < 0) goto socket_err; @@ -1786,7 +1775,6 @@ void dlm_lowcomms_stop(void) foreach_conn(free_conn); srcu_read_unlock(&connections_srcu, idx); work_stop(); - deinit_local(); dlm_proto_ops = NULL; } @@ -1803,7 +1791,7 @@ static int dlm_listen_for_all(void) if (result < 0) return result; - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + result = sock_create_kern(&init_net, dlm_local_addr[0].ss_family, SOCK_STREAM, dlm_proto_ops->proto, &sock); if (result < 0) { log_print("Can't create comms socket: %d", result); @@ -1842,7 +1830,7 @@ static int dlm_tcp_bind(struct socket *sock) /* Bind to our cluster-known address connecting to avoid * routing problems. */ - memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); + memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr)); make_sockaddr(&src_addr, 0, &addr_len); result = kernel_bind(sock, (struct sockaddr *)&src_addr, @@ -1899,9 +1887,9 @@ static int dlm_tcp_listen_bind(struct socket *sock) int addr_len; /* Bind to our port */ - make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); + make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0], - addr_len); + addr_len); } static const struct dlm_proto_ops dlm_tcp_ops = { @@ -1992,7 +1980,7 @@ int dlm_lowcomms_start(void) error = work_start(); if (error) - goto fail_local; + goto fail; dlm_allow_conn = 1; @@ -2022,8 +2010,6 @@ fail_listen: fail_proto_ops: dlm_allow_conn = 0; work_stop(); -fail_local: - deinit_local(); fail: return error; } -- GitLab From 82761993d429755e1bdfa93b0401f4cf2ee6aacf Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Tue, 24 Jan 2023 11:44:44 +0100 Subject: [PATCH 0044/2290] mtd: parsers: ofpart: add workaround for #size-cells 0 commit 84549c816dc317f012798e706e58669b3b013604 upstream. Add a mechanism to handle the case in which partitions are present as direct child of the nand controller node and #size-cells is set to <0>. This could happen if the nand-controller node in the DTS is supposed to have #size-cells set to 0, but for some historical reason/bug it was set to 1 in the past, and the firmware (e.g. U-Boot) is adding the partition as direct children of the nand-controller defaulting to #size-cells being to 1. This prevents a real boot failure on colibri-imx7 that happened during v6.1 development cycles. Link: https://lore.kernel.org/all/Y4dgBTGNWpM6SQXI@francesco-nb.int.toradex.com/ Link: https://lore.kernel.org/all/20221202071900.1143950-1-francesco@dolcini.it/ Signed-off-by: Francesco Dolcini Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230124104444.330913-1-francesco@dolcini.it Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/parsers/ofpart_core.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/mtd/parsers/ofpart_core.c b/drivers/mtd/parsers/ofpart_core.c index 192190c42fc84..e7b8e9d0a9103 100644 --- a/drivers/mtd/parsers/ofpart_core.c +++ b/drivers/mtd/parsers/ofpart_core.c @@ -122,6 +122,25 @@ static int parse_fixed_partitions(struct mtd_info *master, a_cells = of_n_addr_cells(pp); s_cells = of_n_size_cells(pp); + if (!dedicated && s_cells == 0) { + /* + * This is a ugly workaround to not create + * regression on devices that are still creating + * partitions as direct children of the nand controller. + * This can happen in case the nand controller node has + * #size-cells equal to 0 and the firmware (e.g. + * U-Boot) just add the partitions there assuming + * 32-bit addressing. + * + * If you get this warning your firmware and/or DTS + * should be really fixed. + * + * This is working only for devices smaller than 4GiB. + */ + pr_warn("%s: ofpart partition %pOF (%pOF) #size-cells is wrongly set to <0>, assuming <1> for parsing partitions.\n", + master->name, pp, mtd_node); + s_cells = 1; + } if (len / 4 != a_cells + s_cells) { pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n", master->name, pp, -- GitLab From b8259a502320ddaac79a5618db4cc4afed12aca8 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Wed, 24 Jan 2024 16:02:39 +0300 Subject: [PATCH 0045/2290] ALSA: usb-audio: Add delay quirk for MOTU M Series 2nd revision commit d915a6850e27efb383cd4400caadfe47792623df upstream. Audio control requests that sets sampling frequency sometimes fail on this card. Adding delay between control messages eliminates that problem. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217601 Cc: Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240124130239.358298-1-alexander@tsoy.me Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 33380cad3a735..88d7f68ed27c2 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2071,6 +2071,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x07fd, 0x000b, /* MOTU M Series 2nd hardware revision */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */ -- GitLab From 2552f6b1bd645759c9190aa93770f941dadf0f21 Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Tue, 23 Jan 2024 09:49:35 +0100 Subject: [PATCH 0046/2290] ALSA: usb-audio: Add a quirk for Yamaha YIT-W12TX transmitter commit a969210066054ea109d8b7aff29a9b1c98776841 upstream. The device fails to initialize otherwise, giving the following error: [ 3676.671641] usb 2-1.1: 1:1: cannot get freq at ep 0x1 Signed-off-by: Julian Sikorski Cc: Link: https://lore.kernel.org/r/20240123084935.2745-1-belegdol+github@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 88d7f68ed27c2..ec7049576353b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2029,6 +2029,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x0499, 0x3108, /* Yamaha YIT-W12TX */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ -- GitLab From 39fbca505fe6cae8702a7b5afc8dff818f60e6b0 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 24 Jan 2024 15:15:24 +0000 Subject: [PATCH 0047/2290] ALSA: usb-audio: add quirk for RODE NT-USB+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7822baa844a87cbb93308c1032c3d47d4079bb8a upstream. The RODE NT-USB+ is marketed as a professional usb microphone, however the usb audio interface is a mess: [ 1.130977] usb 1-5: new full-speed USB device number 2 using xhci_hcd [ 1.503906] usb 1-5: config 1 has an invalid interface number: 5 but max is 4 [ 1.503912] usb 1-5: config 1 has no interface number 4 [ 1.519689] usb 1-5: New USB device found, idVendor=19f7, idProduct=0035, bcdDevice= 1.09 [ 1.519695] usb 1-5: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 1.519697] usb 1-5: Product: RØDE NT-USB+ [ 1.519699] usb 1-5: Manufacturer: RØDE [ 1.519700] usb 1-5: SerialNumber: 1D773A1A [ 8.327495] usb 1-5: 1:1: cannot get freq at ep 0x82 [ 8.344500] usb 1-5: 1:2: cannot get freq at ep 0x82 [ 8.365499] usb 1-5: 2:1: cannot get freq at ep 0x2 Add QUIRK_FLAG_GET_SAMPLE_RATE to work around the broken sample rate get. I have asked Rode support to fix it, but they show no interest. Signed-off-by: Sean Young Cc: Link: https://lore.kernel.org/r/20240124151524.23314-1-sean@mess.org Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ec7049576353b..520a4e34609fc 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2181,6 +2181,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ + QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ -- GitLab From 234099ab7f8ab0e8a6ba9df24931037bbd0fc21e Mon Sep 17 00:00:00 2001 From: JackBB Wu Date: Tue, 23 Jan 2024 17:39:48 +0800 Subject: [PATCH 0048/2290] USB: serial: qcserial: add new usb-id for Dell Wireless DW5826e commit 129690fb229a20b6e563a77a2c85266acecf20bc upstream. Add support for Dell DW5826e with USB-id 0x413c:0x8217 & 0x413c:0x8218. It is 0x413c:0x8217 T: Bus=02 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=413c ProdID=8217 Rev= 5.04 S: Manufacturer=DELL S: Product=COMPAL Electronics EXM-G1A S: SerialNumber=359302940050401 C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=qcserial E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=qcserial E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=qcserial E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms It is 0x413c:0x8218 T: Bus=02 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=413c ProdID=8218 Rev= 0.00 S: Manufacturer=DELL S: Product=COMPAL Electronics EXM-G1A S: SerialNumber=359302940050401 C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr= 2mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=qcserial E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: JackBB Wu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index b1e844bf31f81..703a9c5635573 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -184,6 +184,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ {DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */ + {DEVICE_SWI(0x413c, 0x8217)}, /* Dell Wireless DW5826e */ + {DEVICE_SWI(0x413c, 0x8218)}, /* Dell Wireless DW5826e QDL */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ -- GitLab From 36ef5b7b4f0ac2ba927139ae9279ac9afa910a88 Mon Sep 17 00:00:00 2001 From: Puliang Lu Date: Wed, 31 Jan 2024 17:12:24 +0800 Subject: [PATCH 0049/2290] USB: serial: option: add Fibocom FM101-GL variant commit b4a1f4eaf1d798066affc6ad040f76eb1a16e1c9 upstream. Update the USB serial option driver support for the Fibocom FM101-GL LTE modules as there are actually several different variants. - VID:PID 2cb7:01a3, FM101-GL are laptop M.2 cards (with MBIM interfaces for /Linux/Chrome OS) 0x01a3:mbim,gnss Here are the outputs of usb-devices: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=01a3 Rev=05.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=5ccd5cd4 C: #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Signed-off-by: Puliang Lu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4adef92598709..c0a0cca65437f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2269,6 +2269,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ -- GitLab From 041cb58f54f53f84214a6ce5c902bd420f57863e Mon Sep 17 00:00:00 2001 From: Leonard Dallmayr Date: Fri, 5 Jan 2024 13:35:51 +0100 Subject: [PATCH 0050/2290] USB: serial: cp210x: add ID for IMST iM871A-USB commit 12b17b4eb82a41977eb848048137b5908d52845c upstream. The device IMST USB-Stick for Smart Meter is a rebranded IMST iM871A-USB Wireless M-Bus USB-adapter. It is used to read wireless water, gas and electricity meters. Signed-off-by: Leonard Dallmayr Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f1d7a5a863aa4..b3e60b3847941 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -146,6 +146,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */ { USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */ { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ -- GitLab From f2cf6db28545e999d7ba906c4411e433e27779fb Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 16 Jan 2024 11:28:15 +0530 Subject: [PATCH 0051/2290] usb: dwc3: host: Set XHCI_SG_TRB_CACHE_SIZE_QUIRK commit 817349b6d26aadd8b38283a05ce0bab106b4c765 upstream. Upstream commit bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") introduced a new quirk in XHCI which fixes XHC timeout, which was seen on synopsys XHCs while using SG buffers. But the support for this quirk isn't present in the DWC3 layer. We will encounter this XHCI timeout/hung issue if we run iperf loopback tests using RTL8156 ethernet adaptor on DWC3 targets with scatter-gather enabled. This gets resolved after enabling the XHCI_SG_TRB_CACHE_SIZE_QUIRK. This patch enables it using the xhci device property since its needed for DWC3 controller. In Synopsys DWC3 databook, Table 9-3: xHCI Debug Capability Limitations Chained TRBs greater than TRB cache size: The debug capability driver must not create a multi-TRB TD that describes smaller than a 1K packet that spreads across 8 or more TRBs on either the IN TR or the OUT TR. Cc: stable@vger.kernel.org #5.11 Signed-off-by: Prashanth K Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240116055816.1169821-2-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/host.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index f6f13e7f1ba14..f4d8e80c4c347 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -66,7 +66,7 @@ out: int dwc3_host_init(struct dwc3 *dwc) { - struct property_entry props[4]; + struct property_entry props[5]; struct platform_device *xhci; int ret, irq; int prop_idx = 0; @@ -94,6 +94,8 @@ int dwc3_host_init(struct dwc3 *dwc) memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); + props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-sg-trb-cache-size-quirk"); + if (dwc->usb3_lpm_capable) props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb3-lpm-capable"); -- GitLab From 8b380ad970d0b0d3dff7451e5c3b540fb9f2e00c Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 16 Jan 2024 11:28:16 +0530 Subject: [PATCH 0052/2290] usb: host: xhci-plat: Add support for XHCI_SG_TRB_CACHE_SIZE_QUIRK commit 520b391e3e813c1dd142d1eebb3ccfa6d08c3995 upstream. Upstream commit bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") introduced a new quirk in XHCI which fixes XHC timeout, which was seen on synopsys XHCs while using SG buffers. Currently this quirk can only be set using xhci private data. But there are some drivers like dwc3/host.c which adds adds quirks using software node for xhci device. Hence set this xhci quirk by iterating over device properties. Cc: stable@vger.kernel.org # 5.11 Fixes: bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20240116055816.1169821-3-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index c9438dc56f5fc..b387d39bfb81d 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -301,6 +301,9 @@ static int xhci_plat_probe(struct platform_device *pdev) if (device_property_read_bool(tmpdev, "quirk-broken-port-ped")) xhci->quirks |= XHCI_BROKEN_PORT_PED; + if (device_property_read_bool(tmpdev, "xhci-sg-trb-cache-size-quirk")) + xhci->quirks |= XHCI_SG_TRB_CACHE_SIZE_QUIRK; + device_property_read_u32(tmpdev, "imod-interval-ns", &xhci->imod_interval); } -- GitLab From a94d303bea6bd8edc10aea9982e73c61293c5abe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Jan 2024 15:56:36 -0800 Subject: [PATCH 0053/2290] hrtimer: Report offline hrtimer enqueue commit dad6a09f3148257ac1773cd90934d721d68ab595 upstream. The hrtimers migration on CPU-down hotplug process has been moved earlier, before the CPU actually goes to die. This leaves a small window of opportunity to queue an hrtimer in a blind spot, leaving it ignored. For example a practical case has been reported with RCU waking up a SCHED_FIFO task right before the CPUHP_AP_IDLE_DEAD stage, queuing that way a sched/rt timer to the local offline CPU. Make sure such situations never go unnoticed and warn when that happens. Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Reported-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240129235646.3171983-4-boqun.feng@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/hrtimer.h | 4 +++- kernel/time/hrtimer.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f2044d5a652b5..254d4a898179c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -197,6 +197,7 @@ enum hrtimer_base_type { * @max_hang_time: Maximum time spent in hrtimer_interrupt * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are * expired + * @online: CPU is online from an hrtimers point of view * @timer_waiters: A hrtimer_cancel() invocation waits for the timer * callback to finish. * @expires_next: absolute time of the next event, is required for remote @@ -219,7 +220,8 @@ struct hrtimer_cpu_base { unsigned int hres_active : 1, in_hrtirq : 1, hang_detected : 1, - softirq_activated : 1; + softirq_activated : 1, + online : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5561dabc9b225..8e0aff1d1ea4f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1082,6 +1082,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, enum hrtimer_mode mode) { debug_activate(timer, mode); + WARN_ON_ONCE(!base->cpu_base->online); base->cpu_base->active_bases |= 1 << base->index; @@ -2180,6 +2181,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->online = 1; hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } @@ -2247,6 +2249,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); raw_spin_unlock(&new_base->lock); + old_base->online = 0; raw_spin_unlock(&old_base->lock); return 0; -- GitLab From 315075ac739c7955dd30b2ae76db3b29555eb5d1 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 5 Dec 2023 17:36:01 +0100 Subject: [PATCH 0054/2290] Input: i8042 - fix strange behavior of touchpad on Clevo NS70PU commit a60e6c3918d20848906ffcdfcf72ca6a8cfbcf2e upstream. When closing the laptop lid with an external screen connected, the mouse pointer has a constant movement to the lower right corner. Opening the lid again stops this movement, but after that the touchpad does no longer register clicks. The touchpad is connected both via i2c-hid and PS/2, the predecessor of this device (NS70MU) has the same layout in this regard and also strange behaviour caused by the psmouse and the i2c-hid driver fighting over touchpad control. This fix is reusing the same workaround by just disabling the PS/2 aux port, that is only used by the touchpad, to give the i2c-hid driver the lone control over the touchpad. v2: Rebased on current master Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231205163602.16106-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-acpipnpio.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index b585b1dab870e..cd45a65e17f2c 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1208,6 +1208,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NS5x_7xPU"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), -- GitLab From 08249dc3d9c1a6a54e9ad04148d0def5f8deff2e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 Jan 2024 17:07:23 +0100 Subject: [PATCH 0055/2290] Input: atkbd - skip ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID commit 683cd8259a9b883a51973511f860976db2550a6e upstream. After commit 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") the keyboard on Dell XPS 13 9350 / 9360 / 9370 models has stopped working after a suspend/resume. The problem appears to be that atkbd_probe() fails when called from atkbd_reconnect() on resume, which on systems where ATKBD_CMD_GETID is skipped can only happen by ATKBD_CMD_SETLEDS failing. ATKBD_CMD_SETLEDS failing because ATKBD_CMD_GETID was skipped is weird, but apparently that is what is happening. Fix this by also skipping ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID. Fixes: 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") Reported-by: Paul Menzel Closes: https://lore.kernel.org/linux-input/0aa4a61f-c939-46fe-a572-08022e8931c7@molgen.mpg.de/ Closes: https://bbs.archlinux.org/viewtopic.php?pid=2146300 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218424 Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2260517 Tested-by: Paul Menzel Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240126160724.13278-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/atkbd.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index c4d8caadec59e..661d6c8b059bf 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -792,7 +792,6 @@ static int atkbd_probe(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; - bool skip_getid; /* * Some systems, where the bit-twiddling when testing the io-lines of the @@ -806,6 +805,11 @@ static int atkbd_probe(struct atkbd *atkbd) "keyboard reset failed on %s\n", ps2dev->serio->phys); + if (atkbd_skip_getid(atkbd)) { + atkbd->id = 0xab83; + return 0; + } + /* * Then we check the keyboard ID. We should get 0xab83 under normal conditions. * Some keyboards report different values, but the first byte is always 0xab or @@ -814,18 +818,17 @@ static int atkbd_probe(struct atkbd *atkbd) */ param[0] = param[1] = 0xa5; /* initialize with invalid values */ - skip_getid = atkbd_skip_getid(atkbd); - if (skip_getid || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { + if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { /* - * If the get ID command was skipped or failed, we check if we can at least set + * If the get ID command failed, we check if we can at least set * the LEDs on the keyboard. This should work on every keyboard out there. * It also turns the LEDs off, which we want anyway. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; - atkbd->id = skip_getid ? 0xab83 : 0xabba; + atkbd->id = 0xabba; return 0; } -- GitLab From fbd77ce1d1748239bb60317ddae3b75e18d51cf2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Feb 2024 06:42:36 -0700 Subject: [PATCH 0056/2290] io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL and buffers commit 72bd80252feeb3bef8724230ee15d9f7ab541c6e upstream. If we use IORING_OP_RECV with provided buffers and pass in '0' as the length of the request, the length is retrieved from the selected buffer. If MSG_WAITALL is also set and we get a short receive, then we may hit the retry path which decrements sr->len and increments the buffer for a retry. However, the length is still zero at this point, which means that sr->len now becomes huge and import_ubuf() will cap it to MAX_RW_COUNT and subsequently return -EFAULT for the range as a whole. Fix this by always assigning sr->len once the buffer has been selected. Cc: stable@vger.kernel.org Fixes: 7ba89d2af17a ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index 67f09a40bcb21..618ab186fe036 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -875,6 +875,7 @@ retry_multishot: if (!buf) return -ENOBUFS; sr->buf = buf; + sr->len = len; } ret = import_single_range(ITER_DEST, sr->buf, len, &iov, &msg.msg_iter); -- GitLab From d8712c6c6a384e23adeef4b0e03b10b8d242fd07 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Feb 2024 15:44:48 +0100 Subject: [PATCH 0057/2290] Revert "ASoC: amd: Add new dmi entries for acp5x platform" This reverts commit 48ad42cd95acc2da22b38497f22d53cb433863a1 which is commit c3ab23a10771bbe06300e5374efa809789c65455 upstream. Link: https://lore.kernel.org/r/CAD_nV8BG0t7US=+C28kQOR==712MPfZ9m-fuKksgoZCgrEByCw@mail.gmail.com Reported-by: Ted Chang Cc: Takashi Iwai Cc: Venkata Prasad Potturu Cc: Mark Brown Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/acp-config.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/sound/soc/amd/acp-config.c b/sound/soc/amd/acp-config.c index 9ee71a99a0871..0932473b63945 100644 --- a/sound/soc/amd/acp-config.c +++ b/sound/soc/amd/acp-config.c @@ -3,7 +3,7 @@ // This file is provided under a dual BSD/GPLv2 license. When using or // redistributing this file, you may do so under either license. // -// Copyright(c) 2021, 2023 Advanced Micro Devices, Inc. +// Copyright(c) 2021 Advanced Micro Devices, Inc. // // Authors: Ajit Kumar Pandey // @@ -35,19 +35,6 @@ static const struct config_entry config_table[] = { {} }, }, - { - .flags = FLAG_AMD_LEGACY, - .device = ACP_PCI_DEV_ID, - .dmi_table = (const struct dmi_system_id []) { - { - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Valve"), - DMI_MATCH(DMI_PRODUCT_NAME, "Jupiter"), - }, - }, - {} - }, - }, { .flags = FLAG_AMD_SOF, .device = ACP_PCI_DEV_ID, -- GitLab From 4675661672e3730597babf97c4e9593a775c8917 Mon Sep 17 00:00:00 2001 From: Prathu Baronia Date: Mon, 22 May 2023 14:20:19 +0530 Subject: [PATCH 0058/2290] vhost: use kzalloc() instead of kmalloc() followed by memset() commit 4d8df0f5f79f747d75a7d356d9b9ea40a4e4c8a9 upstream. Use kzalloc() to allocate new zeroed out msg node instead of memsetting a node allocated with kmalloc(). Signed-off-by: Prathu Baronia Message-Id: <20230522085019.42914-1-prathubaronia2011@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Signed-off-by: Ajay Kaher Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2eea080298812..61c72e62abd49 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2588,12 +2588,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { - struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + /* Make sure all padding within the structure is initialized. */ + struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; - /* Make sure all padding within the structure is initialized. */ - memset(&node->msg, 0, sizeof node->msg); node->vq = vq; node->msg.type = type; return node; -- GitLab From 9f74b3d7183aff25589728c484049991f187cb01 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:43 -0600 Subject: [PATCH 0059/2290] RDMA/irdma: Fix support for 64k pages commit 03769f72d66edab82484449ed594cb6b00ae0223 upstream. Virtual QP and CQ require a 4K HW page size but the driver passes PAGE_SIZE to ib_umem_find_best_pgsz() instead. Fix this by using the appropriate 4k value in the bitmap passed to ib_umem_find_best_pgsz(). Fixes: 693a5386eff0 ("RDMA/irdma: Split mr alloc and free into new functions") Link: https://lore.kernel.org/r/20231129202143.1434-4-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 447e1bcc82a32..4859b99d54fc2 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2825,7 +2825,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; iwmr->ibmr.iova = virt; - iwmr->page_size = PAGE_SIZE; + iwmr->page_size = SZ_4K; if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { iwmr->page_size = ib_umem_find_best_pgsz(region, -- GitLab From cf3d57ad6ff8b566deba3544b9ad3384781fb604 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 12 Jun 2023 11:01:16 +0800 Subject: [PATCH 0060/2290] f2fs: add helper to check compression level commit c571fbb5b59a3741e48014faa92c2f14bc59fe50 upstream. This patch adds a helper function to check if compression level is valid. Meanwhile, this patch fixes a reported issue [1]: The issue is easily reproducible by: 1. dd if=/dev/zero of=test.img count=100 bs=1M 2. mkfs.f2fs -f -O compression,extra_attr ./test.img 3. mount -t f2fs -o compress_algorithm=zstd:6,compress_chksum,atgc,gc_merge,lazytime ./test.img /mnt resulting in [ 60.789982] F2FS-fs (loop0): invalid zstd compress level: 6 A bugzilla report has been submitted in https://bugzilla.kernel.org/show_bug.cgi?id=218471 [1] https://lore.kernel.org/lkml/ZcWDOjKEnPDxZ0Or@google.com/T/ The root cause is commit 00e120b5e4b5 ("f2fs: assign default compression level") tries to check low boundary of compress level w/ zstd_min_clevel(), however, since commit e0c1b49f5b67 ("lib: zstd: Upgrade to latest upstream zstd version 1.4.10"), zstd supports negative compress level, it cast type for negative value returned from zstd_min_clevel() to unsigned int in below check condition, result in repored issue. if (level < zstd_min_clevel() || ... This patch fixes this issue by casting type for level to int before comparison. Fixes: 00e120b5e4b5 ("f2fs: assign default compression level") Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/compress.c | 27 +++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/super.c | 4 ++-- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 3d9f6495a4db4..967262c37da52 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -55,6 +55,7 @@ struct f2fs_compress_ops { int (*init_decompress_ctx)(struct decompress_io_ctx *dic); void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); int (*decompress_pages)(struct decompress_io_ctx *dic); + bool (*is_level_valid)(int level); }; static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index) @@ -322,11 +323,21 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) return 0; } +static bool lz4_is_level_valid(int lvl) +{ +#ifdef CONFIG_F2FS_FS_LZ4HC + return !lvl || (lvl >= LZ4HC_MIN_CLEVEL && lvl <= LZ4HC_MAX_CLEVEL); +#else + return lvl == 0; +#endif +} + static const struct f2fs_compress_ops f2fs_lz4_ops = { .init_compress_ctx = lz4_init_compress_ctx, .destroy_compress_ctx = lz4_destroy_compress_ctx, .compress_pages = lz4_compress_pages, .decompress_pages = lz4_decompress_pages, + .is_level_valid = lz4_is_level_valid, }; #endif @@ -490,6 +501,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) return 0; } +static bool zstd_is_level_valid(int lvl) +{ + return lvl >= zstd_min_clevel() && lvl <= zstd_max_clevel(); +} + static const struct f2fs_compress_ops f2fs_zstd_ops = { .init_compress_ctx = zstd_init_compress_ctx, .destroy_compress_ctx = zstd_destroy_compress_ctx, @@ -497,6 +513,7 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = { .init_decompress_ctx = zstd_init_decompress_ctx, .destroy_decompress_ctx = zstd_destroy_decompress_ctx, .decompress_pages = zstd_decompress_pages, + .is_level_valid = zstd_is_level_valid, }; #endif @@ -555,6 +572,16 @@ bool f2fs_is_compress_backend_ready(struct inode *inode) return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; } +bool f2fs_is_compress_level_valid(int alg, int lvl) +{ + const struct f2fs_compress_ops *cops = f2fs_cops[alg]; + + if (cops->is_level_valid) + return cops->is_level_valid(lvl); + + return lvl == 0; +} + static mempool_t *compress_page_pool; static int num_compress_pages = 512; module_param(num_compress_pages, uint, 0444); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5c76ba764b71f..e5a9498b89c06 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4219,6 +4219,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); +bool f2fs_is_compress_level_valid(int alg, int lvl); int f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); @@ -4283,6 +4284,7 @@ static inline bool f2fs_is_compress_backend_ready(struct inode *inode) /* not support compression */ return false; } +static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; } static inline struct page *f2fs_compress_control_page(struct page *page) { WARN_ON_ONCE(1); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3805162dcef2b..0c0d0671febea 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -628,7 +628,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (level < LZ4HC_MIN_CLEVEL || level > LZ4HC_MAX_CLEVEL) { + if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) { f2fs_info(sbi, "invalid lz4hc compress level: %d", level); return -EINVAL; } @@ -666,7 +666,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (level < zstd_min_clevel() || level > zstd_max_clevel()) { + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; } -- GitLab From 492e0aba08848fedf2a3c6e3efb4836fd3d4fff6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Jan 2023 07:51:07 -0700 Subject: [PATCH 0061/2290] block: treat poll queue enter similarly to timeouts commit 33391eecd63158536fb5257fee5be3a3bdc30e3c upstream. We ran into an issue where a production workload would randomly grind to a halt and not continue until the pending IO had timed out. This turned out to be a complicated interaction between queue freezing and polled IO: 1) You have an application that does polled IO. At any point in time, there may be polled IO pending. 2) You have a monitoring application that issues a passthrough command, which is marked with side effects such that it needs to freeze the queue. 3) Passthrough command is started, which calls blk_freeze_queue_start() on the device. At this point the queue is marked frozen, and any attempt to enter the queue will fail (for non-blocking) or block. 4) Now the driver calls blk_mq_freeze_queue_wait(), which will return when the queue is quiesced and pending IO has completed. 5) The pending IO is polled IO, but any attempt to poll IO through the normal iocb_bio_iopoll() -> bio_poll() will fail when it gets to bio_queue_enter() as the queue is frozen. Rather than poll and complete IO, the polling threads will sit in a tight loop attempting to poll, but failing to enter the queue to do so. The end result is that progress for either application will be stalled until all pending polled IO has timed out. This causes obvious huge latency issues for the application doing polled IO, but also long delays for passthrough command. Fix this by treating queue enter for polled IO just like we do for timeouts. This allows quick quiesce of the queue as we still poll and complete this IO, while still disallowing queueing up new IO. Reviewed-by: Keith Busch Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 6eaf2b0ad7cca..aefdf07bdc2cf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -864,7 +864,16 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) */ blk_flush_plug(current->plug, false); - if (bio_queue_enter(bio)) + /* + * We need to be able to enter a frozen queue, similar to how + * timeouts also need to do that. If that is blocked, then we can + * have pending IO when a queue freeze is started, and then the + * wait for the freeze to finish will wait for polled requests to + * timeout as the poller is preventer from entering the queue and + * completing them. As long as we prevent new IO from being queued, + * that should be all that matters. + */ + if (!percpu_ref_tryget(&q->q_usage_counter)) return 0; if (queue_is_mq(q)) { ret = blk_mq_poll(q, cookie, iob, flags); -- GitLab From 499e6e9f0737ee776059be54c8f047d2c67e8b0d Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Mon, 22 Jan 2024 18:23:50 +0100 Subject: [PATCH 0062/2290] clocksource: Skip watchdog check for large watchdog intervals commit 644649553508b9bacf0fc7a5bdc4f9e0165576a5 upstream. There have been reports of the watchdog marking clocksources unstable on machines with 8 NUMA nodes: clocksource: timekeeping watchdog on CPU373: Marking clocksource 'tsc' as unstable because the skew is too large: clocksource: 'hpet' wd_nsec: 14523447520 clocksource: 'tsc' cs_nsec: 14524115132 The measured clocksource skew - the absolute difference between cs_nsec and wd_nsec - was 668 microseconds: cs_nsec - wd_nsec = 14524115132 - 14523447520 = 667612 The kernel used 200 microseconds for the uncertainty_margin of both the clocksource and watchdog, resulting in a threshold of 400 microseconds (the md variable). Both the cs_nsec and the wd_nsec value indicate that the readout interval was circa 14.5 seconds. The observed behaviour is that watchdog checks failed for large readout intervals on 8 NUMA node machines. This indicates that the size of the skew was directly proportinal to the length of the readout interval on those machines. The measured clocksource skew, 668 microseconds, was evaluated against a threshold (the md variable) that is suited for readout intervals of roughly WATCHDOG_INTERVAL, i.e. HZ >> 1, which is 0.5 second. The intention of 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold") was to tighten the threshold for evaluating skew and set the lower bound for the uncertainty_margin of clocksources to twice WATCHDOG_MAX_SKEW. Later in c37e85c135ce ("clocksource: Loosen clocksource watchdog constraints"), the WATCHDOG_MAX_SKEW constant was increased to 125 microseconds to fit the limit of NTP, which is able to use a clocksource that suffers from up to 500 microseconds of skew per second. Both the TSC and the HPET use default uncertainty_margin. When the readout interval gets stretched the default uncertainty_margin is no longer a suitable lower bound for evaluating skew - it imposes a limit that is far stricter than the skew with which NTP can deal. The root causes of the skew being directly proportinal to the length of the readout interval are: * the inaccuracy of the shift/mult pairs of clocksources and the watchdog * the conversion to nanoseconds is imprecise for large readout intervals Prevent this by skipping the current watchdog check if the readout interval exceeds 2 * WATCHDOG_INTERVAL. Considering the maximum readout interval of 2 * WATCHDOG_INTERVAL, the current default uncertainty margin (of the TSC and HPET) corresponds to a limit on clocksource skew of 250 ppm (microseconds of skew per second). To keep the limit imposed by NTP (500 microseconds of skew per second) for all possible readout intervals, the margins would have to be scaled so that the threshold value is proportional to the length of the actual readout interval. As for why the readout interval may get stretched: Since the watchdog is executed in softirq context the expiration of the watchdog timer can get severely delayed on account of a ksoftirqd thread not getting to run in a timely manner. Surely, a system with such belated softirq execution is not working well and the scheduling issue should be looked into but the clocksource watchdog should be able to deal with it accordingly. Fixes: 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold") Suggested-by: Feng Tang Signed-off-by: Jiri Wiesner Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Reviewed-by: Feng Tang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240122172350.GA740@incl Signed-off-by: Greg Kroah-Hartman --- kernel/time/clocksource.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c90e710d537f..cc6db3bce1b2f 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -126,6 +126,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; static atomic_t watchdog_reset_pending; +static int64_t watchdog_max_interval; static inline void clocksource_watchdog_lock(unsigned long *flags) { @@ -144,6 +145,7 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating); * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) +#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ)) static void clocksource_watchdog_work(struct work_struct *work) { @@ -396,8 +398,8 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_watchdog(struct timer_list *unused) { u64 csnow, wdnow, cslast, wdlast, delta; + int64_t wd_nsec, cs_nsec, interval; int next_cpu, reset_pending; - int64_t wd_nsec, cs_nsec; struct clocksource *cs; enum wd_read_status read_ret; unsigned long extra_wait = 0; @@ -467,6 +469,27 @@ static void clocksource_watchdog(struct timer_list *unused) if (atomic_read(&watchdog_reset_pending)) continue; + /* + * The processing of timer softirqs can get delayed (usually + * on account of ksoftirqd not getting to run in a timely + * manner), which causes the watchdog interval to stretch. + * Skew detection may fail for longer watchdog intervals + * on account of fixed margins being used. + * Some clocksources, e.g. acpi_pm, cannot tolerate + * watchdog intervals longer than a few seconds. + */ + interval = max(cs_nsec, wd_nsec); + if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) { + if (system_state > SYSTEM_SCHEDULING && + interval > 2 * watchdog_max_interval) { + watchdog_max_interval = interval; + pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n", + cs_nsec, wd_nsec); + } + watchdog_timer.expires = jiffies; + continue; + } + /* Check the deviation from the watchdog clocksource. */ md = cs->uncertainty_margin + watchdog->uncertainty_margin; if (abs(cs_nsec - wd_nsec) > md) { -- GitLab From 06040fadbfef65bc6ce86c4d7ac46c712e592477 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 8 Feb 2024 09:48:27 +0000 Subject: [PATCH 0063/2290] net: stmmac: xgmac: use #define for string constants commit 1692b9775e745f84b69dc8ad0075b0855a43db4e upstream. The cited commit introduces and uses the string constants dpp_tx_err and dpp_rx_err. These are assigned to constant fields of the array dwxgmac3_error_desc. It has been reported that on GCC 6 and 7.5.0 this results in warnings such as: .../dwxgmac2_core.c:836:20: error: initialiser element is not constant { true, "TDPES0", dpp_tx_err }, I have been able to reproduce this using: GCC 7.5.0, 8.4.0, 9.4.0 and 10.5.0. But not GCC 13.2.0. So it seems this effects older compilers but not newer ones. As Jon points out in his report, the minimum compiler supported by the kernel is GCC 5.1, so it does seem that this ought to be fixed. It is not clear to me what combination of 'const', if any, would address this problem. So this patch takes of using #defines for the string constants Compile tested only. Fixes: 46eba193d04f ("net: stmmac: xgmac: fix handling of DPP safety error for DMA channels") Reported-by: Jon Hunter Closes: https://lore.kernel.org/netdev/c25eb595-8d91-40ea-9f52-efa15ebafdbc@nvidia.com/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402081135.lAxxBXHk-lkp@intel.com/ Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20240208-xgmac-const-v1-1-e69a1eeabfc8@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 69 ++++++++++--------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index c24cd019460a7..b1ab6ed0027eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -789,41 +789,42 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= { { false, "UNKNOWN", "Unknown Error" }, /* 31 */ }; -static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error"; -static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error"; +#define DPP_RX_ERR "Read Rx Descriptor Parity checker Error" +#define DPP_TX_ERR "Read Tx Descriptor Parity checker Error" + static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = { - { true, "TDPES0", dpp_tx_err }, - { true, "TDPES1", dpp_tx_err }, - { true, "TDPES2", dpp_tx_err }, - { true, "TDPES3", dpp_tx_err }, - { true, "TDPES4", dpp_tx_err }, - { true, "TDPES5", dpp_tx_err }, - { true, "TDPES6", dpp_tx_err }, - { true, "TDPES7", dpp_tx_err }, - { true, "TDPES8", dpp_tx_err }, - { true, "TDPES9", dpp_tx_err }, - { true, "TDPES10", dpp_tx_err }, - { true, "TDPES11", dpp_tx_err }, - { true, "TDPES12", dpp_tx_err }, - { true, "TDPES13", dpp_tx_err }, - { true, "TDPES14", dpp_tx_err }, - { true, "TDPES15", dpp_tx_err }, - { true, "RDPES0", dpp_rx_err }, - { true, "RDPES1", dpp_rx_err }, - { true, "RDPES2", dpp_rx_err }, - { true, "RDPES3", dpp_rx_err }, - { true, "RDPES4", dpp_rx_err }, - { true, "RDPES5", dpp_rx_err }, - { true, "RDPES6", dpp_rx_err }, - { true, "RDPES7", dpp_rx_err }, - { true, "RDPES8", dpp_rx_err }, - { true, "RDPES9", dpp_rx_err }, - { true, "RDPES10", dpp_rx_err }, - { true, "RDPES11", dpp_rx_err }, - { true, "RDPES12", dpp_rx_err }, - { true, "RDPES13", dpp_rx_err }, - { true, "RDPES14", dpp_rx_err }, - { true, "RDPES15", dpp_rx_err }, + { true, "TDPES0", DPP_TX_ERR }, + { true, "TDPES1", DPP_TX_ERR }, + { true, "TDPES2", DPP_TX_ERR }, + { true, "TDPES3", DPP_TX_ERR }, + { true, "TDPES4", DPP_TX_ERR }, + { true, "TDPES5", DPP_TX_ERR }, + { true, "TDPES6", DPP_TX_ERR }, + { true, "TDPES7", DPP_TX_ERR }, + { true, "TDPES8", DPP_TX_ERR }, + { true, "TDPES9", DPP_TX_ERR }, + { true, "TDPES10", DPP_TX_ERR }, + { true, "TDPES11", DPP_TX_ERR }, + { true, "TDPES12", DPP_TX_ERR }, + { true, "TDPES13", DPP_TX_ERR }, + { true, "TDPES14", DPP_TX_ERR }, + { true, "TDPES15", DPP_TX_ERR }, + { true, "RDPES0", DPP_RX_ERR }, + { true, "RDPES1", DPP_RX_ERR }, + { true, "RDPES2", DPP_RX_ERR }, + { true, "RDPES3", DPP_RX_ERR }, + { true, "RDPES4", DPP_RX_ERR }, + { true, "RDPES5", DPP_RX_ERR }, + { true, "RDPES6", DPP_RX_ERR }, + { true, "RDPES7", DPP_RX_ERR }, + { true, "RDPES8", DPP_RX_ERR }, + { true, "RDPES9", DPP_RX_ERR }, + { true, "RDPES10", DPP_RX_ERR }, + { true, "RDPES11", DPP_RX_ERR }, + { true, "RDPES12", DPP_RX_ERR }, + { true, "RDPES13", DPP_RX_ERR }, + { true, "RDPES14", DPP_RX_ERR }, + { true, "RDPES15", DPP_RX_ERR }, }; static void dwxgmac3_handle_dma_err(struct net_device *ndev, -- GitLab From 7b430fb92440794042ac59f0e91e4127f6275dbb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Jan 2024 16:53:07 +0100 Subject: [PATCH 0064/2290] ALSA: usb-audio: Sort quirk table entries commit 668abe6dc7b61941fa5c724c06797efb0b87f070 upstream. The quirk table entries should be put in the USB ID order, but some entries have been put in random places. Re-sort them. Fixes: bf990c102319 ("ALSA: usb-audio: add quirk to fix Hamedal C20 disconnect issue") Fixes: fd28941cff1c ("ALSA: usb-audio: Add new quirk FIXED_RATE for JBL Quantum810 Wireless") Fixes: dfd5fe19db7d ("ALSA: usb-audio: Add FIXED_RATE quirk for JBL Quantum610 Wireless") Fixes: 4a63e68a2951 ("ALSA: usb-audio: Fix microphone sound on Nexigo webcam.") Fixes: 7822baa844a8 ("ALSA: usb-audio: add quirk for RODE NT-USB+") Fixes: 4fb7c24f69c4 ("ALSA: usb-audio: Add quirk for Fiero SC-01") Fixes: 2307a0e1ca0b ("ALSA: usb-audio: Add quirk for Fiero SC-01 (fw v1.0.0)") Link: https://lore.kernel.org/r/20240124155307.16996-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 520a4e34609fc..b8a474a2e4d59 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2035,6 +2035,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_CTL_MSG_DELAY_5M), + DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ + QUIRK_FLAG_IFACE_SKIP_CLOSE), DEVICE_FLG(0x054c, 0x0b8c, /* Sony WALKMAN NW-A45 DAC */ QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */ @@ -2081,8 +2083,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ + QUIRK_FLAG_FIXED_RATE), + DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ + QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */ @@ -2115,6 +2123,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x1901, 0x0191, /* GE B850V3 CP2114 audio interface */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ + QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x2040, 0x7201, /* Hauppauge HVR-950Q-MXL */ @@ -2157,6 +2169,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ @@ -2165,24 +2183,6 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ QUIRK_FLAG_ALIGN_TRANSFER), - DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ - QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ - QUIRK_FLAG_IFACE_SKIP_CLOSE), - DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ - QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ - QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ -- GitLab From d89a80e4827d8bad1c1eeb9c050b08f6ac5b0e68 Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Sat, 3 Feb 2024 13:31:33 +0800 Subject: [PATCH 0065/2290] net: stmmac: xgmac: fix a typo of register name in DPP safety handling commit 1ce2654d87e2fb91fea83b288bd9b2641045e42a upstream. DDPP is copied from Synopsys Data book: DDPP: Disable Data path Parity Protection. When it is 0x0, Data path Parity Protection is enabled. When it is 0x1, Data path Parity Protection is disabled. The macro name should be XGMAC_DPP_DISABLE. Fixes: 46eba193d04f ("net: stmmac: xgmac: fix handling of DPP safety error for DMA channels") Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20240203053133.1129236-1-0x1207@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index e67a880ebf645..8748c37e9dac9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -283,7 +283,7 @@ #define XGMAC_TXCEIE BIT(0) #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc #define XGMAC_MTL_DPP_CONTROL 0x000010e0 -#define XGMAC_DDPP_DISABLE BIT(0) +#define XGMAC_DPP_DISABLE BIT(0) #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x))) #define XGMAC_TQS GENMASK(25, 16) #define XGMAC_TQS_SHIFT 16 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index b1ab6ed0027eb..ec1616ffbfa7a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -886,7 +886,7 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp, /* 5. Enable Data Path Parity Protection */ value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL); /* already enabled by default, explicit enable it again */ - value &= ~XGMAC_DDPP_DISABLE; + value &= ~XGMAC_DPP_DISABLE; writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL); return 0; -- GitLab From 1296c110c5a0b45a8fcf58e7d18bc5da61a565cb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 7 Feb 2024 18:49:51 +0100 Subject: [PATCH 0066/2290] netfilter: nft_set_rbtree: skip end interval element from gc commit 60c0c230c6f046da536d3df8b39a20b9a9fd6af0 upstream. rbtree lazy gc on insert might collect an end interval element that has been just added in this transactions, skip end interval elements that are not yet active. Fixes: f718863aca46 ("netfilter: nft_set_rbtree: fix overlap expiration walk") Cc: stable@vger.kernel.org Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_set_rbtree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index e34662f4a71e0..5bf5572e945cc 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -235,7 +235,7 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, static const struct nft_rbtree_elem * nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe, u8 genmask) + struct nft_rbtree_elem *rbe) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); @@ -254,7 +254,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, while (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe_prev) && - nft_set_elem_active(&rbe_prev->ext, genmask)) + nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) break; prev = rb_prev(prev); @@ -365,7 +365,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_set_elem_active(&rbe->ext, cur_genmask)) { const struct nft_rbtree_elem *removed_end; - removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); + removed_end = nft_rbtree_gc_elem(set, priv, rbe); if (IS_ERR(removed_end)) return PTR_ERR(removed_end); -- GitLab From 8b4118fabd6eb75fed19483b04dab3a036886489 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 16 Feb 2024 19:06:32 +0100 Subject: [PATCH 0067/2290] Linux 6.1.78 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240213171844.702064831@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Pavel Machek (CIP) Tested-by: Kelsey Steele Tested-by: Florian Fainelli Tested-by: Allen Pais Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Tested-by: Yann Sionneau Tested-by: Salvatore Bonaccorso Tested-by: Sven Joachim Link: https://lore.kernel.org/r/20240214142941.551330912@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: kernelci.org bot Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Allen Pais Tested-by: Mateusz Jończyk Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f5598d90093f5..e93554269e474 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 77 +SUBLEVEL = 78 EXTRAVERSION = NAME = Curry Ramen -- GitLab From f70efe54b97e95c369ab3f46cdbed8b5608e36d7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Feb 2024 12:39:31 -0800 Subject: [PATCH 0068/2290] work around gcc bugs with 'asm goto' with outputs commit 68fb3ca0e408e00db1c3f8fccdfa19e274c033be upstream. We've had issues with gcc and 'asm goto' before, and we created a 'asm_volatile_goto()' macro for that in the past: see commits 3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation bug") and a9f180345f53 ("compiler/gcc4: Make quirk for asm_volatile_goto() unconditional"). Then, much later, we ended up removing the workaround in commit 43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR 58670") because we no longer supported building the kernel with the affected gcc versions, but we left the macro uses around. Now, Sean Christopherson reports a new version of a very similar problem, which is fixed by re-applying that ancient workaround. But the problem in question is limited to only the 'asm goto with outputs' cases, so instead of re-introducing the old workaround as-is, let's rename and limit the workaround to just that much less common case. It looks like there are at least two separate issues that all hit in this area: (a) some versions of gcc don't mark the asm goto as 'volatile' when it has outputs: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 which is easy to work around by just adding the 'volatile' by hand. (b) Internal compiler errors: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 which are worked around by adding the extra empty 'asm' as a barrier, as in the original workaround. but the problem Sean sees may be a third thing since it involves bad code generation (not an ICE) even with the manually added 'volatile'. The same old workaround works for this case, even if this feels a bit like voodoo programming and may only be hiding the issue. Reported-and-tested-by: Sean Christopherson Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Cc: Nick Desaulniers Cc: Uros Bizjak Cc: Jakub Jelinek Cc: Andrew Pinski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/jump_label.h | 4 ++-- arch/arm/include/asm/jump_label.h | 4 ++-- arch/arm64/include/asm/alternative-macros.h | 4 ++-- arch/arm64/include/asm/jump_label.h | 4 ++-- arch/csky/include/asm/jump_label.h | 4 ++-- arch/mips/include/asm/jump_label.h | 4 ++-- arch/parisc/include/asm/jump_label.h | 4 ++-- arch/powerpc/include/asm/bug.h | 2 +- arch/powerpc/include/asm/jump_label.h | 4 ++-- arch/powerpc/include/asm/uaccess.h | 8 ++++---- arch/powerpc/kernel/irq_64.c | 2 +- arch/riscv/include/asm/jump_label.h | 4 ++-- arch/s390/include/asm/jump_label.h | 4 ++-- arch/sparc/include/asm/jump_label.h | 4 ++-- arch/um/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/jump_label.h | 6 +++--- arch/x86/include/asm/rmwcc.h | 2 +- arch/x86/include/asm/uaccess.h | 10 +++++----- arch/x86/include/asm/virtext.h | 12 ++++++------ arch/x86/kvm/svm/svm_ops.h | 6 +++--- arch/x86/kvm/vmx/vmx.c | 8 ++++---- arch/x86/kvm/vmx/vmx_ops.h | 6 +++--- arch/xtensa/include/asm/jump_label.h | 4 ++-- include/linux/compiler-gcc.h | 19 +++++++++++++++++++ include/linux/compiler_types.h | 4 ++-- net/netfilter/nft_set_pipapo_avx2.c | 2 +- samples/bpf/asm_goto_workaround.h | 8 ++++---- tools/arch/x86/include/asm/rmwcc.h | 2 +- tools/include/linux/compiler_types.h | 4 ++-- 30 files changed, 86 insertions(+), 67 deletions(-) diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index 9d96180797396..a339223d9e052 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -31,7 +31,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "nop \n" ".pushsection __jump_table, \"aw\" \n" @@ -47,7 +47,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "b %l[l_yes] \n" ".pushsection __jump_table, \"aw\" \n" diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index e12d7d096fc03..e4eb54f6cd9fe 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -11,7 +11,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(nop) "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -25,7 +25,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 3622e9f4fb442..51738c56e96cd 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -229,7 +229,7 @@ alternative_has_feature_likely(unsigned long feature) compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); - asm_volatile_goto( + asm goto( ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) : : [feature] "i" (feature) @@ -247,7 +247,7 @@ alternative_has_feature_unlikely(unsigned long feature) compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); - asm_volatile_goto( + asm goto( ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) : : [feature] "i" (feature) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index cea441b6aa5dc..b5bd3c38a01b2 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" @@ -35,7 +35,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: b %l[l_yes] \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h index 98a3f4b168bd2..ef2e37a10a0fe 100644 --- a/arch/csky/include/asm/jump_label.h +++ b/arch/csky/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop32 \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" @@ -29,7 +29,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: bsr32 %l[label] \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index c5c6864e64bc4..405c85173f2c1 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -36,7 +36,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + asm goto("1:\t" B_INSN " 2f\n\t" "2:\t.insn\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" @@ -50,7 +50,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t" + asm goto("1:\t" J_INSN " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index 94428798b6aa6..317ebc5edc9fe 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" @@ -29,7 +29,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 61a4736355c24..20d5052e22925 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -74,7 +74,7 @@ ##__VA_ARGS__) #define WARN_ENTRY(insn, flags, label, ...) \ - asm_volatile_goto( \ + asm goto( \ "1: " insn "\n" \ EX_TABLE(1b, %l[label]) \ _EMIT_BUG_ENTRY \ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 93ce3ec253877..2f2a86ed2280a 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" @@ -32,7 +32,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 3ddc65c63a49e..45d4c9cf3f3a2 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -72,7 +72,7 @@ __pu_failed: \ * are no aliasing issues. */ #define __put_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -85,7 +85,7 @@ __pu_failed: \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ @@ -132,7 +132,7 @@ do { \ #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define __get_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: "op"%U1%X1 %0, %1 # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ @@ -145,7 +145,7 @@ do { \ __get_user_asm_goto(x, addr, label, "ld") #else /* __powerpc64__ */ #define __get_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: lwz%X1 %0, %1\n" \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 9dc0ad3c533a8..5a6e44e4d36f5 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -230,7 +230,7 @@ again: * This allows interrupts to be unmasked without hard disabling, and * also without new hard interrupts coming in ahead of pending ones. */ - asm_volatile_goto( + asm goto( "1: \n" " lbz 9,%0(13) \n" " cmpwi 9,0 \n" diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h index 14a5ea8d8ef0f..4a35d787c0191 100644 --- a/arch/riscv/include/asm/jump_label.h +++ b/arch/riscv/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 895f774bbcc55..bf78cf381dfcd 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -25,7 +25,7 @@ */ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 0,%l[label]\n" + asm goto("0: brcl 0,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 15,%l[label]\n" + asm goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 94eb529dcb776..2718cbea826a7 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -10,7 +10,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" @@ -26,7 +26,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes]\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 4b6d1b526bc12..66fe06db872f0 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" + asm goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce0c8f7d32186..f835b328ba24f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto( + asm goto( ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 071572e23d3a0..cbbef32517f00 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -24,7 +24,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes] # objtool NOPs this \n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (2 | branch) : : l_yes); @@ -38,7 +38,7 @@ l_yes: static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); @@ -52,7 +52,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes]\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 7fa6112164172..1919ccf493cd1 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -18,7 +18,7 @@ #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c = false; \ - asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ + asm goto (fullop "; j" #cc " %l[cc_label]" \ : : [var] "m" (_var), ## __VA_ARGS__ \ : clobbers : cc_label); \ if (0) { \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 6ca0c661cb637..c638535eedd55 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -155,7 +155,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_goto_u64(x, addr, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ _ASM_EXTABLE_UA(1b, %l2) \ @@ -317,7 +317,7 @@ do { \ } while (0) #define __get_user_asm(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: mov"itype" %[umem],%[output]\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : [output] ltype(x) \ @@ -397,7 +397,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -416,7 +416,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -499,7 +499,7 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_goto(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: mov"itype" %0,%1\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : : ltype(x), "m" (__m(addr)) \ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 6c2e3ff3cb28f..724ce44809ed2 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -43,9 +43,9 @@ static inline int cpu_has_vmx(void) */ static inline int cpu_vmxoff(void) { - asm_volatile_goto("1: vmxoff\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "cc", "memory" : fault); + asm goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "cc", "memory" : fault); cr4_clear_bits(X86_CR4_VMXE); return 0; @@ -129,9 +129,9 @@ static inline void cpu_svm_disable(void) * case, GIF must already be set, otherwise the NMI would have * been blocked, so just eat the fault. */ - asm_volatile_goto("1: stgi\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "memory" : fault); + asm goto("1: stgi\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "memory" : fault); fault: wrmsrl(MSR_EFER, efer & ~EFER_SVME); } diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 36c8af87a707a..4e725854c63a1 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -8,7 +8,7 @@ #define svm_asm(insn, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) "\n\t" \ + asm goto("1: " __stringify(insn) "\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ ::: clobber : fault); \ return; \ @@ -18,7 +18,7 @@ fault: \ #define svm_asm1(insn, op1, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1 : clobber : fault); \ return; \ @@ -28,7 +28,7 @@ fault: \ #define svm_asm2(insn, op1, op2, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1, op2 : clobber : fault); \ return; \ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 98d732b9418f1..57c1374fdfd49 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2469,10 +2469,10 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer) cr4_set_bits(X86_CR4_VMXE); - asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" - _ASM_EXTABLE(1b, %l[fault]) - : : [vmxon_pointer] "m"(vmxon_pointer) - : : fault); + asm goto("1: vmxon %[vmxon_pointer]\n\t" + _ASM_EXTABLE(1b, %l[fault]) + : : [vmxon_pointer] "m"(vmxon_pointer) + : : fault); return 0; fault: diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index ec268df83ed67..5edab28dfb2ef 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -73,7 +73,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT - asm_volatile_goto("1: vmread %[field], %[output]\n\t" + asm_goto_output("1: vmread %[field], %[output]\n\t" "jna %l[do_fail]\n\t" _ASM_EXTABLE(1b, %l[do_exception]) @@ -166,7 +166,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) #define vmx_asm1(insn, op1, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ @@ -183,7 +183,7 @@ fault: \ #define vmx_asm2(insn, op1, op2, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h index c812bf85021c0..46c8596259d2d 100644 --- a/arch/xtensa/include/asm/jump_label.h +++ b/arch/xtensa/include/asm/jump_label.h @@ -13,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "_nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, * make it reachable and wrap both into a no-transform block * to avoid any assembler interference with this. */ - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" ".begin no-transform\n\t" "_j %l[l_yes]\n\t" "2:\n\t" diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f55a37efdb974..e31243ea637c7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,6 +66,25 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * + * Work it around via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index eb0466236661f..a95ec8a3a7ca3 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -284,8 +284,8 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 90e275bb3e5d7..a3a8ddca99189 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -57,7 +57,7 @@ /* Jump to label if @reg is zero */ #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ - asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ "je %l[" #label "]" : : : : label) /* Store 256 bits from YMM register into memory. Contrary to bucket load diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7048bb3594d65..634e81d83efd9 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -4,14 +4,14 @@ #define __ASM_GOTO_WORKAROUND_H /* - * This will bring in asm_volatile_goto and asm_inline macro definitions + * This will bring in asm_goto_output and asm_inline macro definitions * if enabled by compiler and config options. */ #include -#ifdef asm_volatile_goto -#undef asm_volatile_goto -#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#ifdef asm_goto_output +#undef asm_goto_output +#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output") #endif /* diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h index 11ff975242cac..e2ff22b379a44 100644 --- a/tools/arch/x86/include/asm/rmwcc.h +++ b/tools/arch/x86/include/asm/rmwcc.h @@ -4,7 +4,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm goto (fullop "; j" cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 1bdd834bdd571..d09f9dc172a48 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -36,8 +36,8 @@ #include #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #endif /* __LINUX_COMPILER_TYPES_H */ -- GitLab From 3af7236d6dbffaeee7538b37e81de39dde4cd5dd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Feb 2024 11:14:33 -0800 Subject: [PATCH 0069/2290] update workarounds for gcc "asm goto" issue commit 68fb3ca0e408e00db1c3f8fccdfa19e274c033be upstream. In commit 4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs") I did the gcc workaround unconditionally, because the cause of the bad code generation wasn't entirely clear. In the meantime, Jakub Jelinek debugged the issue, and has come up with a fix in gcc [2], which also got backported to the still maintained branches of gcc-11, gcc-12 and gcc-13. Note that while the fix technically wasn't in the original gcc-14 branch, Jakub says: "while it is true that no GCC 14 snapshots until today (or whenever the fix will be committed) have the fix, for GCC trunk it is up to the distros to use the latest snapshot if they use it at all and would allow better testing of the kernel code without the workaround, so that if there are other issues they won't be discovered years later. Most userland code doesn't actually use asm goto with outputs..." so we will consider gcc-14 to be fixed - if somebody is using gcc snapshots of the gcc-14 before the fix, they should upgrade. Note that while the bug goes back to gcc-11, in practice other gcc changes seem to have effectively hidden it since gcc-12.1 as per a bisect by Jakub. So even a gcc-14 snapshot without the fix likely doesn't show actual problems. Also, make the default 'asm_goto_output()' macro mark the asm as volatile by hand, because of an unrelated gcc issue [1] where it doesn't match the documented behavior ("asm goto is always volatile"). Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 [1] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 [2] Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Requested-by: Jakub Jelinek Cc: Uros Bizjak Cc: Nick Desaulniers Cc: Sean Christopherson Cc: Andrew Pinski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 7 ++++--- include/linux/compiler_types.h | 9 ++++++++- init/Kconfig | 9 +++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e31243ea637c7..149a520515e1d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -69,10 +69,9 @@ /* * GCC 'asm goto' with outputs miscompiles certain code sequences: * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 * - * Work it around via the same compiler barrier quirk that we used + * Work around it via the same compiler barrier quirk that we used * to use for the old 'asm goto' workaround. * * Also, always mark such 'asm goto' statements as volatile: all @@ -82,8 +81,10 @@ * * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND #define asm_goto_output(x...) \ do { asm volatile goto(x); asm (""); } while (0) +#endif #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a95ec8a3a7ca3..574b4121ebe3e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -284,8 +284,15 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ #ifndef asm_goto_output -#define asm_goto_output(x...) asm goto(x) +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/init/Kconfig b/init/Kconfig index 148704640252e..ffb927bf6034f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) +config GCC_ASM_GOTO_OUTPUT_WORKAROUND + bool + depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT + # Fixed in GCC 14, 13.3, 12.4 and 11.5 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 + config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) -- GitLab From 84b576ad44ea9c5149be6c288c46924490c94709 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jan 2024 09:53:06 +0000 Subject: [PATCH 0070/2290] btrfs: add and use helper to check if block group is used commit 1693d5442c458ae8d5b0d58463b873cd879569ed upstream. Add a helper function to determine if a block group is being used and make use of it at btrfs_delete_unused_bgs(). This helper will also be used in future code changes. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 3 +-- fs/btrfs/block-group.h | 7 +++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 08017b180a10d..efc6f03773eb3 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1375,8 +1375,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) } spin_lock(&block_group->lock); - if (block_group->reserved || block_group->pinned || - block_group->used || block_group->ro || + if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 47a2dcbfee255..bace40a006379 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -241,6 +241,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) return (block_group->start + block_group->length); } +static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg) +{ + lockdep_assert_held(&bg->lock); + + return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0); +} + static inline bool btrfs_is_block_group_data_only( struct btrfs_block_group *block_group) { -- GitLab From e717aecd2a430873edf63444543c768e42c6a91f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jan 2024 09:53:14 +0000 Subject: [PATCH 0071/2290] btrfs: do not delete unused block group if it may be used soon commit f4a9f219411f318ae60d6ff7f129082a75686c6c upstream. Before deleting a block group that is in the list of unused block groups (fs_info->unused_bgs), we check if the block group became used before deleting it, as extents from it may have been allocated after it was added to the list. However even if the block group was not yet used, there may be tasks that have only reserved space and have not yet allocated extents, and they might be relying on the availability of the unused block group in order to allocate extents. The reservation works first by increasing the "bytes_may_use" field of the corresponding space_info object (which may first require flushing delayed items, allocating a new block group, etc), and only later a task does the actual allocation of extents. For metadata we usually don't end up using all reserved space, as we are pessimistic and typically account for the worst cases (need to COW every single node in a path of a tree at maximum possible height, etc). For data we usually reserve the exact amount of space we're going to allocate later, except when using compression where we always reserve space based on the uncompressed size, as compression is only triggered when writeback starts so we don't know in advance how much space we'll actually need, or if the data is compressible. So don't delete an unused block group if the total size of its space_info object minus the block group's size is less then the sum of used space and space that may be used (space_info->bytes_may_use), as that means we have tasks that reserved space and may need to allocate extents from the block group. In this case, besides skipping the deletion, re-add the block group to the list of unused block groups so that it may be reconsidered later, in case the tasks that reserved space end up not needing to allocate extents from it. Allowing the deletion of the block group while we have reserved space, can result in tasks failing to allocate metadata extents (-ENOSPC) while under a transaction handle, resulting in a transaction abort, or failure during writeback for the case of data extents. CC: stable@vger.kernel.org # 6.0+ Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index efc6f03773eb3..9f77565bd7f5a 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1318,6 +1318,7 @@ out: */ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) { + LIST_HEAD(retry_list); struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; @@ -1339,6 +1340,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { + u64 used; int trimming; block_group = list_first_entry(&fs_info->unused_bgs, @@ -1374,6 +1376,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) goto next; } + spin_lock(&space_info->lock); spin_lock(&block_group->lock); if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { @@ -1385,10 +1388,49 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) */ trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); + up_write(&space_info->groups_sem); + goto next; + } + + /* + * The block group may be unused but there may be space reserved + * accounting with the existence of that block group, that is, + * space_info->bytes_may_use was incremented by a task but no + * space was yet allocated from the block group by the task. + * That space may or may not be allocated, as we are generally + * pessimistic about space reservation for metadata as well as + * for data when using compression (as we reserve space based on + * the worst case, when data can't be compressed, and before + * actually attempting compression, before starting writeback). + * + * So check if the total space of the space_info minus the size + * of this block group is less than the used space of the + * space_info - if that's the case, then it means we have tasks + * that might be relying on the block group in order to allocate + * extents, and add back the block group to the unused list when + * we finish, so that we retry later in case no tasks ended up + * needing to allocate extents from the block group. + */ + used = btrfs_space_info_used(space_info, true); + if (space_info->total_bytes - block_group->length < used) { + /* + * Add a reference for the list, compensate for the ref + * drop under the "next" label for the + * fs_info->unused_bgs list. + */ + btrfs_get_block_group(block_group); + list_add_tail(&block_group->bg_list, &retry_list); + + trace_btrfs_skip_unused_block_group(block_group); + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); /* We don't want to force the issue, only flip if it's ok. */ ret = inc_block_group_ro(block_group, 0); @@ -1512,12 +1554,16 @@ next: btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } + list_splice_tail(&retry_list, &fs_info->unused_bgs); spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); return; flip_async: btrfs_end_transaction(trans); + spin_lock(&fs_info->unused_bgs_lock); + list_splice_tail(&retry_list, &fs_info->unused_bgs); + spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_put_block_group(block_group); btrfs_discard_punt_unused_bgs_list(fs_info); -- GitLab From a1a7b9589574792796efee9e8023087e2b4f8fa8 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 10 Jan 2024 17:51:26 -0800 Subject: [PATCH 0072/2290] btrfs: forbid creating subvol qgroups commit 0c309d66dacddf8ce939b891d9ead4a8e21ad6f0 upstream. Creating a qgroup 0/subvolid leads to various races and it isn't helpful, because you can't specify a subvol id when creating a subvol, so you can't be sure it will be the right one. Any requirements on the automatic subvol can be gratified by using a higher level qgroup and the inheritance parameters of subvol creation. Fixes: cecbb533b5fc ("btrfs: record simple quota deltas in delayed refs") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8516c70b5edc1..196e222749ccd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4695,6 +4695,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } + if (sa->create && is_fstree(sa->qgroupid)) { + ret = -EINVAL; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); -- GitLab From 66b317a2fc45b2ef66527ee3f8fa08fb5beab88d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 20 Jan 2024 19:41:28 +1030 Subject: [PATCH 0073/2290] btrfs: do not ASSERT() if the newly created subvolume already got read commit e03ee2fe873eb68c1f9ba5112fee70303ebf9dfb upstream. [BUG] There is a syzbot crash, triggered by the ASSERT() during subvolume creation: assertion failed: !anon_dev, in fs/btrfs/disk-io.c:1319 ------------[ cut here ]------------ kernel BUG at fs/btrfs/disk-io.c:1319! invalid opcode: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:btrfs_get_root_ref.part.0+0x9aa/0xa60 btrfs_get_new_fs_root+0xd3/0xf0 create_subvol+0xd02/0x1650 btrfs_mksubvol+0xe95/0x12b0 __btrfs_ioctl_snap_create+0x2f9/0x4f0 btrfs_ioctl_snap_create+0x16b/0x200 btrfs_ioctl+0x35f0/0x5cf0 __x64_sys_ioctl+0x19d/0x210 do_syscall_64+0x3f/0xe0 entry_SYSCALL_64_after_hwframe+0x63/0x6b ---[ end trace 0000000000000000 ]--- [CAUSE] During create_subvol(), after inserting root item for the newly created subvolume, we would trigger btrfs_get_new_fs_root() to get the btrfs_root of that subvolume. The idea here is, we have preallocated an anonymous device number for the subvolume, thus we can assign it to the new subvolume. But there is really nothing preventing things like backref walk to read the new subvolume. If that happens before we call btrfs_get_new_fs_root(), the subvolume would be read out, with a new anonymous device number assigned already. In that case, we would trigger ASSERT(), as we really expect no one to read out that subvolume (which is not yet accessible from the fs). But things like backref walk is still possible to trigger the read on the subvolume. Thus our assumption on the ASSERT() is not correct in the first place. [FIX] Fix it by removing the ASSERT(), and just free the @anon_dev, reset it to 0, and continue. If the subvolume tree is read out by something else, it should have already get a new anon_dev assigned thus we only need to free the preallocated one. Reported-by: Chenyuan Yang Fixes: 2dfb1e43f57d ("btrfs: preallocate anon block device at first phase of snapshot creation") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 40152458e7b74..0d1b05ded1e35 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1662,8 +1662,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { - /* Shouldn't get preallocated anon_dev for cached roots */ - ASSERT(!anon_dev); + /* + * Some other caller may have read out the newly inserted + * subvolume already (for things like backref walk etc). Not + * that common but still possible. In that case, we just need + * to free the anon_dev. + */ + if (unlikely(anon_dev)) { + free_anon_bdev(anon_dev); + anon_dev = 0; + } + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); -- GitLab From f98913c07cd87f54f7355fa3a0d762b7585c0ae3 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 10 Jan 2024 17:30:00 -0800 Subject: [PATCH 0074/2290] btrfs: forbid deleting live subvol qgroup commit a8df35619948bd8363d330c20a90c9a7fbff28c0 upstream. If a subvolume still exists, forbid deleting its qgroup 0/subvolid. This behavior generally leads to incorrect behavior in squotas and doesn't have a legitimate purpose. Fixes: cecbb533b5fc ("btrfs: record simple quota deltas in delayed refs") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 96ec9ccc2ef61..b3472bf6b288f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1635,6 +1635,15 @@ out: return ret; } +static bool qgroup_has_usage(struct btrfs_qgroup *qgroup) +{ + return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 || + qgroup->excl > 0 || qgroup->excl_cmpr > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0); +} + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1654,6 +1663,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) goto out; } + if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) { + ret = -EBUSY; + goto out; + } + /* Check if there are no children of this qgroup */ if (!list_empty(&qgroup->members)) { ret = -EBUSY; -- GitLab From dfd1f44e49585f0248b08a8e17201572909e001c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 10 Jan 2024 17:48:44 +0100 Subject: [PATCH 0075/2290] btrfs: send: return EOPNOTSUPP on unknown flags commit f884a9f9e59206a2d41f265e7e403f080d10b493 upstream. When some ioctl flags are checked we return EOPNOTSUPP, like for BTRFS_SCRUB_SUPPORTED_FLAGS, BTRFS_SUBVOL_CREATE_ARGS_MASK or fallocate modes. The EINVAL is supposed to be for a supported but invalid values or combination of options. Fix that when checking send flags so it's consistent with the rest. CC: stable@vger.kernel.org # 4.14+ Link: https://lore.kernel.org/linux-btrfs/CAL3q7H5rryOLzp3EKq8RTbjMHMHeaJubfpsVLF6H4qJnKCUR1w@mail.gmail.com/ Reviewed-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4a4d65b5e24f7..a75669972dc73 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7852,7 +7852,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) } if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { - ret = -EINVAL; + ret = -EOPNOTSUPP; goto out; } -- GitLab From 4d6b2e17b5504a41472deec3628b43285940b6da Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 31 Jan 2024 17:18:04 +0000 Subject: [PATCH 0076/2290] btrfs: don't reserve space for checksums when writing to nocow files commit feefe1f49d26bad9d8997096e3a200280fa7b1c5 upstream. Currently when doing a write to a file we always reserve metadata space for inserting data checksums. However we don't need to do it if we have a nodatacow file (-o nodatacow mount option or chattr +C) or if checksums are disabled (-o nodatasum mount option), as in that case we are only adding unnecessary pressure to metadata reservations. For example on x86_64, with the default node size of 16K, a 4K buffered write into a nodatacow file is reserving 655360 bytes of metadata space, as it's accounting for checksums. After this change, which stops reserving space for checksums if we have a nodatacow file or checksums are disabled, we only need to reserve 393216 bytes of metadata. CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delalloc-space.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index f2bc5563c0f92..63b7fa7067434 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -243,7 +243,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; u64 qgroup_rsv_size = 0; - u64 csum_leaves; unsigned outstanding_extents; lockdep_assert_held(&inode->lock); @@ -258,10 +257,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, outstanding_extents); reserve_size += btrfs_calc_metadata_size(fs_info, 1); } - csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, - inode->csum_bytes); - reserve_size += btrfs_calc_insert_metadata_size(fs_info, - csum_leaves); + if (!(inode->flags & BTRFS_INODE_NODATASUM)) { + u64 csum_leaves; + + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); + reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves); + } /* * For qgroup rsv, the calculation is very simple: * account one nodesize for each outstanding extent @@ -276,14 +277,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, spin_unlock(&block_rsv->lock); } -static void calc_inode_reservations(struct btrfs_fs_info *fs_info, +static void calc_inode_reservations(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, u64 *meta_reserve, u64 *qgroup_reserve) { + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 nr_extents = count_max_extents(fs_info, num_bytes); - u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + u64 csum_leaves; u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); + if (inode->flags & BTRFS_INODE_NODATASUM) + csum_leaves = 0; + else + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + *meta_reserve = btrfs_calc_insert_metadata_size(fs_info, nr_extents + csum_leaves); @@ -335,7 +342,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, * everything out and try again, which is bad. This way we just * over-reserve slightly, and clean up the mess when we are done. */ - calc_inode_reservations(fs_info, num_bytes, disk_num_bytes, + calc_inode_reservations(inode, num_bytes, disk_num_bytes, &meta_reserve, &qgroup_reserve); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true, noflush); @@ -356,7 +363,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, spin_lock(&inode->lock); nr_extents = count_max_extents(fs_info, num_bytes); btrfs_mod_outstanding_extents(inode, nr_extents); - inode->csum_bytes += disk_num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes += disk_num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); @@ -390,7 +398,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); - inode->csum_bytes -= num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes -= num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); -- GitLab From 7ba7f9ed88a161091f2aa163370deb5ebc504524 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 2 Feb 2024 12:09:22 +0000 Subject: [PATCH 0077/2290] btrfs: reject encoded write if inode has nodatasum flag set commit 1bd96c92c6a0a4d43815eb685c15aa4b78879dc9 upstream. Currently we allow an encoded write against inodes that have the NODATASUM flag set, either because they are NOCOW files or they were created while the filesystem was mounted with "-o nodatasum". This results in having compressed extents without corresponding checksums, which is a filesystem inconsistency reported by 'btrfs check'. For example, running btrfs/281 with MOUNT_OPTIONS="-o nodatacow" triggers this and 'btrfs check' errors out with: [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots root 256 inode 257 errors 1040, bad file extent, some csum missing root 256 inode 258 errors 1040, bad file extent, some csum missing ERROR: errors found in fs roots (...) So reject encoded writes if the target inode has NODATASUM set. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82f92b5652a77..63d61fcd95e3b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10774,6 +10774,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE) return -EINVAL; + /* + * Compressed extents should always have checksums, so error out if we + * have a NOCOW file or inode was created while mounted with NODATASUM. + */ + if (inode->flags & BTRFS_INODE_NODATASUM) + return -EINVAL; + orig_count = iov_iter_count(from); /* The extent size must be sane. */ -- GitLab From 02f2b95b00bf57d20320ee168b30fb7f3db8e555 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 31 Jan 2024 14:27:25 -0500 Subject: [PATCH 0078/2290] btrfs: don't drop extent_map for free space inode on write error commit 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade upstream. While running the CI for an unrelated change I hit the following panic with generic/648 on btrfs_holes_spacecache. assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385 ------------[ cut here ]------------ kernel BUG at fs/btrfs/extent_io.c:1385! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1 RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0 Call Trace: extent_write_cache_pages+0x2ac/0x8f0 extent_writepages+0x87/0x110 do_writepages+0xd5/0x1f0 filemap_fdatawrite_wbc+0x63/0x90 __filemap_fdatawrite_range+0x5c/0x80 btrfs_fdatawrite_range+0x1f/0x50 btrfs_write_out_cache+0x507/0x560 btrfs_write_dirty_block_groups+0x32a/0x420 commit_cowonly_roots+0x21b/0x290 btrfs_commit_transaction+0x813/0x1360 btrfs_sync_file+0x51a/0x640 __x64_sys_fdatasync+0x52/0x90 do_syscall_64+0x9c/0x190 entry_SYSCALL_64_after_hwframe+0x6e/0x76 This happens because we fail to write out the free space cache in one instance, come back around and attempt to write it again. However on the second pass through we go to call btrfs_get_extent() on the inode to get the extent mapping. Because this is a new block group, and with the free space inode we always search the commit root to avoid deadlocking with the tree, we find nothing and return a EXTENT_MAP_HOLE for the requested range. This happens because the first time we try to write the space cache out we hit an error, and on an error we drop the extent mapping. This is normal for normal files, but the free space cache inode is special. We always expect the extent map to be correct. Thus the second time through we end up with a bogus extent map. Since we're deprecating this feature, the most straightforward way to fix this is to simply skip dropping the extent map range for this failed range. I shortened the test by using error injection to stress the area to make it easier to reproduce. With this patch in place we no longer panic with my error injection test. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 63d61fcd95e3b..f7f4bcc094642 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3364,8 +3364,23 @@ out: unwritten_start += logical_len; clear_extent_uptodate(io_tree, unwritten_start, end, NULL); - /* Drop extent maps for the part of the extent we didn't write. */ - btrfs_drop_extent_map_range(inode, unwritten_start, end, false); + /* + * Drop extent maps for the part of the extent we didn't write. + * + * We have an exception here for the free_space_inode, this is + * because when we do btrfs_get_extent() on the free space inode + * we will search the commit root. If this is a new block group + * we won't find anything, and we will trip over the assert in + * writepage where we do ASSERT(em->block_start != + * EXTENT_MAP_HOLE). + * + * Theoretically we could also skip this for any NOCOW extent as + * we don't mess with the extent map tree in the NOCOW case, but + * for now simply skip this if we are the free space inode. + */ + if (!btrfs_is_free_space_inode(inode)) + btrfs_drop_extent_map_range(inode, unwritten_start, + end, false); /* * If the ordered extent had an IOERR or something else went -- GitLab From 0f081fcfaac32897a78a7ddb376631d36f774e44 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 2 Feb 2024 01:56:33 -0800 Subject: [PATCH 0079/2290] driver core: Fix device_link_flag_is_sync_state_only() commit 7fddac12c38237252431d5b8af7b6d5771b6d125 upstream. device_link_flag_is_sync_state_only() correctly returns true on the flags of an existing device link that only implements sync_state() functionality. However, it incorrectly and confusingly returns false if it's called with DL_FLAG_SYNC_STATE_ONLY. This bug doesn't manifest in any of the existing calls to this function, but fix this confusing behavior to avoid future bugs. Fixes: 67cad5c67019 ("driver core: fw_devlink: Add DL_FLAG_CYCLE support to device links") Signed-off-by: Saravana Kannan Tested-by: Xu Yang Link: https://lore.kernel.org/r/20240202095636.868578-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index af90bfb0cc3d8..191590055932f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -337,10 +337,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target) return false; } +#define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ + DL_FLAG_CYCLE | \ + DL_FLAG_MANAGED) static inline bool device_link_flag_is_sync_state_only(u32 flags) { - return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) == - (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED); + return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } /** -- GitLab From 0cf05064008c13c7ff9ffbf4e4735d2c14518a6f Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Mon, 29 Jan 2024 20:25:56 +0100 Subject: [PATCH 0080/2290] of: unittest: Fix compile in the non-dynamic case [ Upstream commit 607aad1e4356c210dbef9022955a3089377909b2 ] If CONFIG_OF_KOBJ is not set, a device_node does not contain a kobj and attempts to access the embedded kobj via kref_read break the compile. Replace affected kref_read calls with a macro that reads the refcount if it exists and returns 1 if there is no embedded kobj. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401291740.VP219WIz-lkp@intel.com/ Fixes: 4dde83569832 ("of: Fix double free in of_parse_phandle_with_args_map") Signed-off-by: Christian A. Ehrhardt Link: https://lore.kernel.org/r/20240129192556.403271-1-lk@c--e.de Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/unittest.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index e541a8960f1de..ce1386074e66b 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -49,6 +49,12 @@ static struct unittest_results { failed; \ }) +#ifdef CONFIG_OF_KOBJ +#define OF_KREF_READ(NODE) kref_read(&(NODE)->kobj.kref) +#else +#define OF_KREF_READ(NODE) 1 +#endif + /* * Expected message may have a message level other than KERN_INFO. * Print the expected message only if the current loglevel will allow @@ -562,7 +568,7 @@ static void __init of_unittest_parse_phandle_with_args_map(void) pr_err("missing testcase data\n"); return; } - prefs[i] = kref_read(&p[i]->kobj.kref); + prefs[i] = OF_KREF_READ(p[i]); } rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells"); @@ -685,9 +691,9 @@ static void __init of_unittest_parse_phandle_with_args_map(void) unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); for (i = 0; i < ARRAY_SIZE(p); ++i) { - unittest(prefs[i] == kref_read(&p[i]->kobj.kref), + unittest(prefs[i] == OF_KREF_READ(p[i]), "provider%d: expected:%d got:%d\n", - i, prefs[i], kref_read(&p[i]->kobj.kref)); + i, prefs[i], OF_KREF_READ(p[i])); of_node_put(p[i]); } } -- GitLab From b3557a3697aee39bf80b9645cfd2d0ec69ec4e7f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:13 +0800 Subject: [PATCH 0081/2290] KVM: selftests: Clear dirty ring states between two modes in dirty_log_test [ Upstream commit 7167190ddb863bd061c0c6b61f4cec94184b40da ] There are two states, which need to be cleared before next mode is executed. Otherwise, we will hit failure as the following messages indicate. - The variable 'dirty_ring_vcpu_ring_full' shared by main and vcpu thread. It's indicating if the vcpu exit due to full ring buffer. The value can be carried from previous mode (VM_MODE_P40V48_4K) to current one (VM_MODE_P40V48_64K) when VM_MODE_P40V48_16K isn't supported. - The current ring buffer index needs to be reset before next mode (VM_MODE_P40V48_64K) is executed. Otherwise, the stale value is carried from previous mode (VM_MODE_P40V48_4K). # ./dirty_log_test -M dirty-ring Setting log mode to: 'dirty-ring' Test iterations: 32, interval: 10 (ms) Testing guest mode: PA-bits:40, VA-bits:48, 4K pages guest physical test memory offset: 0xffbfffc000 : Dirtied 995328 pages Total bits checked: dirty (1012434), clear (7114123), track_next (966700) Testing guest mode: PA-bits:40, VA-bits:48, 64K pages guest physical test memory offset: 0xffbffc0000 vcpu stops because vcpu is kicked out... vcpu continues now. Notifying vcpu to continue Iteration 1 collected 0 pages vcpu stops because dirty ring is full... vcpu continues now. vcpu stops because dirty ring is full... vcpu continues now. vcpu stops because dirty ring is full... ==== Test Assertion Failure ==== dirty_log_test.c:369: cleared == count pid=10541 tid=10541 errno=22 - Invalid argument 1 0x0000000000403087: dirty_ring_collect_dirty_pages at dirty_log_test.c:369 2 0x0000000000402a0b: log_mode_collect_dirty_pages at dirty_log_test.c:492 3 (inlined by) run_test at dirty_log_test.c:795 4 (inlined by) run_test at dirty_log_test.c:705 5 0x0000000000403a37: for_each_guest_mode at guest_modes.c:100 6 0x0000000000401ccf: main at dirty_log_test.c:938 7 0x0000ffff9ecd279b: ?? ??:0 8 0x0000ffff9ecd286b: ?? ??:0 9 0x0000000000401def: _start at ??:? Reset dirty pages (0) mismatch with collected (35566) Fix the issues by clearing 'dirty_ring_vcpu_ring_full' and the ring buffer index before next new mode is to be executed. Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-7-gshan@redhat.com Stable-dep-of: ba58f873cdee ("KVM: selftests: Fix a semaphore imbalance in the dirty ring logging test") Signed-off-by: Sasha Levin --- tools/testing/selftests/kvm/dirty_log_test.c | 27 ++++++++++++-------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6efbe15..8758c10ec8503 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -226,13 +226,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -329,10 +331,9 @@ static void dirty_ring_continue_vcpu(void) } static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { - /* We only have one vcpu */ - static uint32_t fetch_index = 0; uint32_t count = 0, cleared; bool continued_vcpu = false; @@ -349,7 +350,8 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), - slot, bitmap, num_pages, &fetch_index); + slot, bitmap, num_pages, + ring_buf_idx); cleared = kvm_vm_reset_dirty_ring(vcpu->vm); @@ -406,7 +408,8 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages); + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); void (*before_vcpu_join) (void); @@ -471,13 +474,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; TEST_ASSERT(mode->collect_dirty_pages != NULL, "collect_dirty_pages() is required for any log mode!"); - mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages); + mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) @@ -696,6 +700,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap; + uint32_t ring_buf_idx = 0; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -771,6 +776,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); @@ -778,7 +784,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Give the vcpu thread some time to dirty some pages */ usleep(p->interval * 1000); log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages); + bmap, host_num_pages, + &ring_buf_idx); /* * See vcpu_sync_stop_requested definition for details on why -- GitLab From e5ed6c9225378b7ab6b0e305dd84da7c87219a1b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Feb 2024 15:18:31 -0800 Subject: [PATCH 0082/2290] KVM: selftests: Fix a semaphore imbalance in the dirty ring logging test [ Upstream commit ba58f873cdeec30b6da48e28dd5782c5a3e1371b ] When finishing the final iteration of dirty_log_test testcase, set host_quit _before_ the final "continue" so that the vCPU worker doesn't run an extra iteration, and delete the hack-a-fix of an extra "continue" from the dirty ring testcase. This fixes a bug where the extra post to sem_vcpu_cont may not be consumed, which results in failures in subsequent runs of the testcases. The bug likely was missed during development as x86 supports only a single "guest mode", i.e. there aren't any subsequent testcases after the dirty ring test, because for_each_guest_mode() only runs a single iteration. For the regular dirty log testcases, letting the vCPU run one extra iteration is a non-issue as the vCPU worker waits on sem_vcpu_cont if and only if the worker is explicitly told to stop (vcpu_sync_stop_requested). But for the dirty ring test, which needs to periodically stop the vCPU to reap the dirty ring, letting the vCPU resume the guest _after_ the last iteration means the vCPU will get stuck without an extra "continue". However, blindly firing off an post to sem_vcpu_cont isn't guaranteed to be consumed, e.g. if the vCPU worker sees host_quit==true before resuming the guest. This results in a dangling sem_vcpu_cont, which leads to subsequent iterations getting out of sync, as the vCPU worker will continue on before the main task is ready for it to resume the guest, leading to a variety of asserts, e.g. ==== Test Assertion Failure ==== dirty_log_test.c:384: dirty_ring_vcpu_ring_full pid=14854 tid=14854 errno=22 - Invalid argument 1 0x00000000004033eb: dirty_ring_collect_dirty_pages at dirty_log_test.c:384 2 0x0000000000402d27: log_mode_collect_dirty_pages at dirty_log_test.c:505 3 (inlined by) run_test at dirty_log_test.c:802 4 0x0000000000403dc7: for_each_guest_mode at guest_modes.c:100 5 0x0000000000401dff: main at dirty_log_test.c:941 (discriminator 3) 6 0x0000ffff9be173c7: ?? ??:0 7 0x0000ffff9be1749f: ?? ??:0 8 0x000000000040206f: _start at ??:? Didn't continue vcpu even without ring full Alternatively, the test could simply reset the semaphores before each testcase, but papering over hacks with more hacks usually ends in tears. Reported-by: Shaoqin Huang Fixes: 84292e565951 ("KVM: selftests: Add dirty ring buffer test") Reviewed-by: Peter Xu Reviewed-by: Shaoqin Huang Link: https://lore.kernel.org/r/20240202231831.354848-1-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- tools/testing/selftests/kvm/dirty_log_test.c | 50 +++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 8758c10ec8503..ec40a33c29fda 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -355,7 +355,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, cleared = kvm_vm_reset_dirty_ring(vcpu->vm); - /* Cleared pages should be the same as collected */ + /* + * Cleared pages should be the same as collected, as KVM is supposed to + * clear only the entries that have been harvested. + */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); @@ -394,12 +397,6 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) } } -static void dirty_ring_before_vcpu_join(void) -{ - /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&sem_vcpu_cont); -} - struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -412,7 +409,6 @@ struct log_mode { uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); - void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -431,7 +427,6 @@ struct log_mode { .supported = dirty_ring_supported, .create_vm_done = dirty_ring_create_vm_done, .collect_dirty_pages = dirty_ring_collect_dirty_pages, - .before_vcpu_join = dirty_ring_before_vcpu_join, .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; @@ -492,14 +487,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void log_mode_before_vcpu_join(void) -{ - struct log_mode *mode = &log_modes[host_log_mode]; - - if (mode->before_vcpu_join) - mode->before_vcpu_join(); -} - static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -701,6 +688,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; unsigned long *bmap; uint32_t ring_buf_idx = 0; + int sem_val; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -772,12 +760,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); - host_quit = false; + WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + /* + * Ensure the previous iteration didn't leave a dangling semaphore, i.e. + * that the main task and vCPU worker were synchronized and completed + * verification of all iterations. + */ + sem_getvalue(&sem_vcpu_stop, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + sem_getvalue(&sem_vcpu_cont, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); while (iteration < p->iterations) { @@ -803,15 +801,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) assert(host_log_mode == LOG_MODE_DIRTY_RING || atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); - sem_post(&sem_vcpu_cont); - iteration++; + /* + * Set host_quit before sem_vcpu_cont in the final iteration to + * ensure that the vCPU worker doesn't resume the guest. As + * above, the dirty ring test may stop and wait even when not + * explicitly request to do so, i.e. would hang waiting for a + * "continue" if it's allowed to resume the guest. + */ + if (++iteration == p->iterations) + WRITE_ONCE(host_quit, true); + + sem_post(&sem_vcpu_cont); sync_global_to_guest(vm, iteration); } - /* Tell the vcpu thread to quit */ - host_quit = true; - log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " -- GitLab From 72d4600a6eb8d93e1e0458ee3988ff58019ac0a2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:17:06 +0300 Subject: [PATCH 0083/2290] wifi: iwlwifi: Fix some error codes [ Upstream commit c6ebb5b67641994de8bc486b33457fe0b681d6fe ] This saves the error as PTR_ERR(wifi_pkg). The problem is that "wifi_pkg" is a valid pointer, not an error pointer. Set the error code to -EINVAL instead. Fixes: 2a8084147bff ("iwlwifi: acpi: support reading and storing WRDS revision 1 and 2") Signed-off-by: Dan Carpenter Link: https://msgid.link/9620bb77-2d7c-4d76-b255-ad824ebf8e35@moroto.mountain Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 585e8cd2d332d..bdb8464cd4321 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -576,7 +576,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -592,7 +592,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -608,7 +608,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -665,7 +665,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -681,7 +681,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -697,7 +697,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } -- GitLab From 4bd106ac1c38eb2eb8693f2ceb7d9517f56aadcf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:17:31 +0300 Subject: [PATCH 0084/2290] wifi: iwlwifi: uninitialized variable in iwl_acpi_get_ppag_table() [ Upstream commit 65c6ee90455053cfd3067c17aaa4a42b0c766543 ] This is an error path and Smatch complains that "tbl_rev" is uninitialized on this path. All the other functions follow this same patter where they set the error code and goto out_free so that's probably what was intended here as well. Fixes: e8e10a37c51c ("iwlwifi: acpi: move ppag code from mvm to fw/acpi") Signed-off-by: Dan Carpenter Link: https://msgid.link/09900c01-6540-4a32-9451-563da0029cb6@moroto.mountain Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index bdb8464cd4321..f5fcc547de391 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -1044,6 +1044,9 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt) goto read_table; } + ret = PTR_ERR(wifi_pkg); + goto out_free; + read_table: fwrt->ppag_ver = tbl_rev; flags = &wifi_pkg->package.elements[1]; -- GitLab From 68614f1865a0dec2b1dbfdeafd575ed8a0378799 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 6 Feb 2024 17:18:01 -0800 Subject: [PATCH 0085/2290] of: property: Improve finding the supplier of a remote-endpoint property [ Upstream commit 782bfd03c3ae2c0e6e01b661b8e18f1de50357be ] After commit 4a032827daa8 ("of: property: Simplify of_link_to_phandle()"), remote-endpoint properties created a fwnode link from the consumer device to the supplier endpoint. This is a tiny bit inefficient (not buggy) when trying to create device links or detecting cycles. So, improve this the same way we improved finding the consumer of a remote-endpoint property. Fixes: 4a032827daa8 ("of: property: Simplify of_link_to_phandle()") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20240207011803.2637531-3-saravanak@google.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/property.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index b636777e6f7c8..e1946cc170309 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1261,7 +1261,6 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) -DEFINE_SIMPLE_PROP(remote_endpoint, "remote-endpoint", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) @@ -1326,6 +1325,17 @@ static struct device_node *parse_interrupts(struct device_node *np, return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } +static struct device_node *parse_remote_endpoint(struct device_node *np, + const char *prop_name, + int index) +{ + /* Return NULL for index > 0 to signify end of remote-endpoints. */ + if (!index || strcmp(prop_name, "remote-endpoint")) + return NULL; + + return of_graph_get_remote_port_parent(np); +} + static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, -- GitLab From 65ded4eb220695909eee657758e824fc30f0b561 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 7 Feb 2024 08:24:15 -0500 Subject: [PATCH 0086/2290] net: openvswitch: limit the number of recursions from action sets [ Upstream commit 6e2f90d31fe09f2b852de25125ca875aabd81367 ] The ovs module allows for some actions to recursively contain an action list for complex scenarios, such as sampling, checking lengths, etc. When these actions are copied into the internal flow table, they are evaluated to validate that such actions make sense, and these calls happen recursively. The ovs-vswitchd userspace won't emit more than 16 recursion levels deep. However, the module has no such limit and will happily accept limits larger than 16 levels nested. Prevent this by tracking the number of recursions happening and manually limiting it to 16 levels nested. The initial implementation of the sample action would track this depth and prevent more than 3 levels of recursion, but this was removed to support the clone use case, rather than limited at the current userspace limit. Fixes: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases") Signed-off-by: Aaron Conole Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240207132416.1488485-2-aconole@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/openvswitch/flow_netlink.c | 49 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ead5418c126e3..e3c85ceb1f0a5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -47,6 +47,7 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +#define OVS_COPY_ACTIONS_MAX_DEPTH 16 static bool actions_may_change_flow(const struct nlattr *actions) { @@ -2543,13 +2544,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log); + u32 mpls_label_count, bool log, + u32 depth); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -2600,7 +2603,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return err; err = __ovs_nla_copy_actions(net, actions, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2615,7 +2619,8 @@ static int validate_and_copy_dec_ttl(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; int start, action_start, err, rem; @@ -2658,7 +2663,8 @@ static int validate_and_copy_dec_ttl(struct net *net, return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, - vlan_tci, mpls_label_count, log); + vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2672,7 +2678,8 @@ static int validate_and_copy_clone(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { int start, err; u32 exec; @@ -2692,7 +2699,8 @@ static int validate_and_copy_clone(struct net *net, return err; err = __ovs_nla_copy_actions(net, attr, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3061,7 +3069,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, - bool log, bool last) + bool log, bool last, u32 depth) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; @@ -3109,7 +3117,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3122,7 +3131,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3150,12 +3160,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; + if (depth > OVS_COPY_ACTIONS_MAX_DEPTH) + return -EOVERFLOW; + nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -3350,7 +3364,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3421,7 +3435,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3435,7 +3449,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, + depth); if (err) return err; skip_copy = true; @@ -3445,7 +3460,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_DEC_TTL: err = validate_and_copy_dec_ttl(net, a, key, sfa, eth_type, vlan_tci, - mpls_label_count, log); + mpls_label_count, log, + depth); if (err) return err; skip_copy = true; @@ -3485,7 +3501,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, - key->eth.vlan.tci, mpls_label_count, log); + key->eth.vlan.tci, mpls_label_count, log, + 0); if (err) ovs_nla_free_flow_actions(*sfa); -- GitLab From b9357489c46c7a43999964628db8b47d3a1f8672 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 6 Feb 2024 13:30:54 +0100 Subject: [PATCH 0087/2290] lan966x: Fix crash when adding interface under a lag [ Upstream commit 15faa1f67ab405d47789d4702f587ec7df7ef03e ] There is a crash when adding one of the lan966x interfaces under a lag interface. The issue can be reproduced like this: ip link add name bond0 type bond miimon 100 mode balance-xor ip link set dev eth0 master bond0 The reason is because when adding a interface under the lag it would go through all the ports and try to figure out which other ports are under that lag interface. And the issue is that lan966x can have ports that are NULL pointer as they are not probed. So then iterating over these ports it would just crash as they are NULL pointers. The fix consists in actually checking for NULL pointers before accessing something from the ports. Like we do in other places. Fixes: cabc9d49333d ("net: lan966x: Add lag support for lan966x") Signed-off-by: Horatiu Vultur Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240206123054.3052966-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan966x/lan966x_lag.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c index 41fa2523d91d3..5f2cd9a8cf8fb 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c @@ -37,19 +37,24 @@ static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < lan966x->num_phys_ports; ++lag) { - struct net_device *bond = lan966x->ports[lag]->bond; + struct lan966x_port *port = lan966x->ports[lag]; int num_active_ports = 0; + struct net_device *bond; unsigned long bond_mask; u8 aggr_idx[16]; - if (!bond || (visited & BIT(lag))) + if (!port || !port->bond || (visited & BIT(lag))) continue; + bond = port->bond; bond_mask = lan966x_lag_get_mask(lan966x, bond); for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) { struct lan966x_port *port = lan966x->ports[p]; + if (!port) + continue; + lan_wr(ANA_PGID_PGID_SET(bond_mask), lan966x, ANA_PGID(p)); if (port->lag_tx_active) -- GitLab From 5ad627faed136089e27bcd15e0c33760e575c8c3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Jun 2023 19:19:11 +0100 Subject: [PATCH 0088/2290] tls/sw: Use splice_eof() to flush [ Upstream commit df720d288dbb1793e82b6ccbfc670ec871e9def4 ] Allow splice to end a TLS record after prematurely ending a splice/sendfile due to getting an EOF condition (->splice_read() returned 0) after splice had called TLS with a sendmsg() with MSG_MORE set when the user didn't set MSG_MORE. Suggested-by: Linus Torvalds Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/ Signed-off-by: David Howells Reviewed-by: Jakub Kicinski cc: Chuck Lever cc: Boris Pismenny cc: John Fastabend cc: Jens Axboe cc: Matthew Wilcox Signed-off-by: Jakub Kicinski Stable-dep-of: aec7961916f3 ("tls: fix race between async notify and socket close") Signed-off-by: Sasha Levin --- net/tls/tls.h | 1 + net/tls/tls_main.c | 2 ++ net/tls/tls_sw.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/net/tls/tls.h b/net/tls/tls.h index 0672acab27731..4922668fefaa8 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -97,6 +97,7 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +void tls_sw_splice_eof(struct socket *sock); int tls_sw_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tls_sw_sendpage(struct sock *sk, struct page *page, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 338a443fa47b2..80b42a3e78830 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -922,6 +922,7 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] ops[TLS_BASE][TLS_BASE] = *base; ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; + ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof; ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked; ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; @@ -990,6 +991,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; + prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof; prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0323040d34bc6..fbe6aab5f5b26 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1158,6 +1158,80 @@ send_end: return copied > 0 ? copied : ret; } +/* + * Handle unexpected EOF during splice without SPLICE_F_MORE set. + */ +void tls_sw_splice_eof(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_rec *rec; + struct sk_msg *msg_pl; + ssize_t copied = 0; + bool retrying = false; + int ret = 0; + int pending; + + if (!ctx->open_rec) + return; + + mutex_lock(&tls_ctx->tx_lock); + lock_sock(sk); + +retry: + rec = ctx->open_rec; + if (!rec) + goto unlock; + + msg_pl = &rec->msg_plaintext; + + /* Check the BPF advisor and perform transmission. */ + ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, + &copied, 0); + switch (ret) { + case 0: + case -EAGAIN: + if (retrying) + goto unlock; + retrying = true; + goto retry; + case -EINPROGRESS: + break; + default: + goto unlock; + } + + /* Wait for pending encryptions to get completed */ + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); + + /* There can be no concurrent accesses, since we have no pending + * encrypt operations + */ + WRITE_ONCE(ctx->async_notify, false); + + if (ctx->async_wait.err) + goto unlock; + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, 0); + } + +unlock: + release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); +} + static int tls_sw_do_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { -- GitLab From d55eb0b495a9e4de1c61394087bb06f12d18b6bc Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:46 +0200 Subject: [PATCH 0089/2290] tls: extract context alloc/initialization out of tls_set_sw_offload [ Upstream commit 615580cbc99af0da2d1c7226fab43a3d5003eb97 ] Simplify tls_set_sw_offload a bit. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Stable-dep-of: aec7961916f3 ("tls: fix race between async notify and socket close") Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 86 ++++++++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 35 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fbe6aab5f5b26..47ae429e50e30 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2587,6 +2587,48 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx) tls_ctx->prot_info.version != TLS_1_3_VERSION; } +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + init_waitqueue_head(&sw_ctx_rx->wq); + skb_queue_head_init(&sw_ctx_rx->rx_list); + skb_queue_head_init(&sw_ctx_rx->async_hold); + + return sw_ctx_rx; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2608,48 +2650,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); - init_waitqueue_head(&sw_ctx_rx->wq); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); - skb_queue_head_init(&sw_ctx_rx->async_hold); aead = &sw_ctx_rx->aead_recv; } -- GitLab From 2c6841c88201e13967583f0f8a9f9b54b9cde404 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:18 -0800 Subject: [PATCH 0090/2290] net: tls: factor out tls_*crypt_async_wait() [ Upstream commit c57ca512f3b68ddcd62bda9cc24a8f5584ab01b1 ] Factor out waiting for async encrypt and decrypt to finish. There are already multiple copies and a subsequent fix will need more. No functional changes. Note that crypto_wait_req() returns wait->err Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller Stable-dep-of: aec7961916f3 ("tls: fix race between async notify and socket close") Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 96 +++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 47ae429e50e30..b146be099a3fc 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -229,6 +229,20 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) spin_unlock_bh(&ctx->decrypt_compl_lock); } +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + int pending; + + spin_lock_bh(&ctx->decrypt_compl_lock); + reinit_completion(&ctx->async_wait.completion); + pending = atomic_read(&ctx->decrypt_pending); + spin_unlock_bh(&ctx->decrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + + return ctx->async_wait.err; +} + static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, @@ -496,6 +510,28 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) schedule_delayed_work(&ctx->tx_work.work, 1); } +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + int pending; + + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); + + /* There can be no concurrent accesses, since we have no + * pending encrypt operations + */ + WRITE_ONCE(ctx->async_notify, false); + + return ctx->async_wait.err; +} + static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, @@ -953,7 +989,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int num_zc = 0; int orig_size; int ret = 0; - int pending; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_COMPAT)) @@ -1122,24 +1157,12 @@ trim_sgl: if (!num_async) { goto send_end; } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1171,7 +1194,6 @@ void tls_sw_splice_eof(struct socket *sock) ssize_t copied = 0; bool retrying = false; int ret = 0; - int pending; if (!ctx->open_rec) return; @@ -1203,22 +1225,7 @@ retry: } /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no pending - * encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - - if (ctx->async_wait.err) + if (tls_encrypt_async_wait(ctx)) goto unlock; /* Transmit if any encryptions have completed */ @@ -2197,16 +2204,10 @@ put_on_rx_list: recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { @@ -2425,16 +2426,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); -- GitLab From 7a3ca06d04d589deec81f56229a9a9d62352ce01 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:19 -0800 Subject: [PATCH 0091/2290] tls: fix race between async notify and socket close [ Upstream commit aec7961916f3f9e88766e2688992da6980f11b8d ] The submitting thread (one which called recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete() so any code past that point risks touching already freed data. Try to avoid the locking and extra flags altogether. Have the main thread hold an extra reference, this way we can depend solely on the atomic ref counter for synchronization. Don't futz with reiniting the completion, either, we are now tightly controlling when completion fires. Reported-by: valis Fixes: 0cada33241d9 ("net/tls: fix race condition causing kernel panic") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/tls.h | 5 ----- net/tls/tls_sw.c | 43 ++++++++++--------------------------------- 2 files changed, 10 insertions(+), 38 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index c36bf4c50027e..899c863aba02c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -108,9 +108,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -147,8 +144,6 @@ struct tls_sw_context_rx { struct tls_strparser strp; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; struct sk_buff_head async_hold; struct wait_queue_head wq; }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index b146be099a3fc..ee11932237c07 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -223,22 +223,15 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); } static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) { - int pending; - - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - if (pending) + if (!atomic_dec_and_test(&ctx->decrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); return ctx->async_wait.err; } @@ -266,6 +259,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -455,7 +449,6 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) struct tls_rec *rec; bool ready = false; struct sock *sk; - int pending; rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; @@ -495,12 +488,8 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) ready = true; } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); if (!ready) return; @@ -512,22 +501,9 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) { - int pending; - - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) + if (!atomic_dec_and_test(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + atomic_inc(&ctx->encrypt_pending); return ctx->async_wait.err; } @@ -578,6 +554,7 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); @@ -2594,7 +2571,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc } crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); INIT_LIST_HEAD(&sw_ctx_tx->tx_list); INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); sw_ctx_tx->tx_work.sk = sk; @@ -2615,7 +2592,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) } crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); init_waitqueue_head(&sw_ctx_rx->wq); skb_queue_head_init(&sw_ctx_rx->rx_list); skb_queue_head_init(&sw_ctx_rx->async_hold); -- GitLab From 20b4ed034872b4d024b26e2bc1092c3f80e5db96 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 6 Feb 2024 17:18:22 -0800 Subject: [PATCH 0092/2290] net: tls: fix use-after-free with partial reads and async decrypt [ Upstream commit 32b55c5ff9103b8508c1e04bfa5a08c64e7a925f ] tls_decrypt_sg doesn't take a reference on the pages from clear_skb, so the put_page() in tls_decrypt_done releases them, and we trigger a use-after-free in process_rx_list when we try to read from the partially-read skb. Fixes: fd31f3996af2 ("tls: rx: decrypt into a fresh skb") Signed-off-by: Sabrina Dubroca Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index ee11932237c07..d651c50746a8b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -62,6 +62,7 @@ struct tls_decrypt_ctx { u8 iv[MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -186,7 +187,6 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) struct aead_request *aead_req = crypto_get_completion_data(data); struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; @@ -212,7 +212,7 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -1653,6 +1653,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, -- GitLab From 727cdd2f3dca2dd0528e7b2b711cb8529b1bea20 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:24 -0800 Subject: [PATCH 0093/2290] net: tls: fix returned read length with async decrypt [ Upstream commit ac437a51ce662364062f704e321227f6728e6adc ] We double count async, non-zc rx data. The previous fix was lucky because if we fully zc async_copy_bytes is 0 so we add 0. Decrypted already has all the bytes we handled, in all cases. We don't have to adjust anything, delete the erroneous line. Fixes: 4d42cd6bc2ac ("tls: rx: fix return value for async crypto") Co-developed-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d651c50746a8b..09d258bb2df75 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2202,7 +2202,6 @@ recv_end: else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek); - decrypted += max(err, 0); } copied += decrypted; -- GitLab From 251145e504370d1f9db0c8aa70a5b898d2f5ba56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 10 Feb 2024 17:40:08 +0100 Subject: [PATCH 0094/2290] spi: ppc4xx: Drop write-only variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b3aa619a8b4706f35cb62f780c14e68796b37f3f ] Since commit 24778be20f87 ("spi: convert drivers to use bits_per_word_mask") the bits_per_word variable is only written to. The check that was there before isn't needed any more as the spi core ensures that only 8 bit transfers are used, so the variable can go away together with all assignments to it. Fixes: 24778be20f87 ("spi: convert drivers to use bits_per_word_mask") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240210164006.208149-8-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-ppc4xx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index d65f047b6c823..1179a1115137f 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -166,10 +166,8 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) int scr; u8 cdm = 0; u32 speed; - u8 bits_per_word; /* Start with the generic configuration for this device. */ - bits_per_word = spi->bits_per_word; speed = spi->max_speed_hz; /* @@ -177,9 +175,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) * the transfer to overwrite the generic configuration with zeros. */ if (t) { - if (t->bits_per_word) - bits_per_word = t->bits_per_word; - if (t->speed_hz) speed = min(t->speed_hz, spi->max_speed_hz); } -- GitLab From 1f0d7792e9023e8658e901b7b76a555f6aa052ec Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 11 Feb 2024 12:58:34 +0300 Subject: [PATCH 0095/2290] ASoC: rt5645: Fix deadlock in rt5645_jack_detect_work() [ Upstream commit 6ef5d5b92f7117b324efaac72b3db27ae8bb3082 ] There is a path in rt5645_jack_detect_work(), where rt5645->jd_mutex is left locked forever. That may lead to deadlock when rt5645_jack_detect_work() is called for the second time. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: cdba4301adda ("ASoC: rt5650: add mutex to avoid the jack detection failure") Signed-off-by: Alexey Khoroshilov Link: https://lore.kernel.org/r/1707645514-21196-1-git-send-email-khoroshilov@ispras.ru Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5645.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index fd3dca08460ba..844d14d4c9a51 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3288,6 +3288,7 @@ static void rt5645_jack_detect_work(struct work_struct *work) report, SND_JACK_HEADPHONE); snd_soc_jack_report(rt5645->mic_jack, report, SND_JACK_MICROPHONE); + mutex_unlock(&rt5645->jd_mutex); return; case 4: val = snd_soc_component_read(rt5645->component, RT5645_A_JD_CTRL1) & 0x0020; -- GitLab From 20f378f92971f4d48bba59518d02911d63f23820 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 9 Feb 2024 01:55:18 -0800 Subject: [PATCH 0096/2290] net: sysfs: Fix /sys/class/net/ path for statistics [ Upstream commit 5b3fbd61b9d1f4ed2db95aaf03f9adae0373784d ] The Documentation/ABI/testing/sysfs-class-net-statistics documentation is pointing to the wrong path for the interface. Documentation is pointing to /sys/class/, instead of /sys/class/net/. Fix it by adding the `net/` directory before the interface. Fixes: 6044f9700645 ("net: sysfs: document /sys/class/net/statistics/*") Signed-off-by: Breno Leitao Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ABI/testing/sysfs-class-net-statistics | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics index 55db27815361b..53e508c6936a5 100644 --- a/Documentation/ABI/testing/sysfs-class-net-statistics +++ b/Documentation/ABI/testing/sysfs-class-net-statistics @@ -1,4 +1,4 @@ -What: /sys/class//statistics/collisions +What: /sys/class/net//statistics/collisions Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -6,7 +6,7 @@ Description: Indicates the number of collisions seen by this network device. This value might not be relevant with all MAC layers. -What: /sys/class//statistics/multicast +What: /sys/class/net//statistics/multicast Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -14,7 +14,7 @@ Description: Indicates the number of multicast packets received by this network device. -What: /sys/class//statistics/rx_bytes +What: /sys/class/net//statistics/rx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -23,7 +23,7 @@ Description: See the network driver for the exact meaning of when this value is incremented. -What: /sys/class//statistics/rx_compressed +What: /sys/class/net//statistics/rx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: network device. This value might only be relevant for interfaces that support packet compression (e.g: PPP). -What: /sys/class//statistics/rx_crc_errors +What: /sys/class/net//statistics/rx_crc_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -41,7 +41,7 @@ Description: by this network device. Note that the specific meaning might depend on the MAC layer used by the interface. -What: /sys/class//statistics/rx_dropped +What: /sys/class/net//statistics/rx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -51,7 +51,7 @@ Description: packet processing. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_errors +What: /sys/class/net//statistics/rx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -59,7 +59,7 @@ Description: Indicates the number of receive errors on this network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_fifo_errors +What: /sys/class/net//statistics/rx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -68,7 +68,7 @@ Description: network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_frame_errors +What: /sys/class/net//statistics/rx_frame_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -78,7 +78,7 @@ Description: on the MAC layer protocol used. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_length_errors +What: /sys/class/net//statistics/rx_length_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -87,7 +87,7 @@ Description: error, oversized or undersized. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_missed_errors +What: /sys/class/net//statistics/rx_missed_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -96,7 +96,7 @@ Description: due to lack of capacity in the receive side. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_nohandler +What: /sys/class/net//statistics/rx_nohandler Date: February 2016 KernelVersion: 4.6 Contact: netdev@vger.kernel.org @@ -104,7 +104,7 @@ Description: Indicates the number of received packets that were dropped on an inactive device by the network core. -What: /sys/class//statistics/rx_over_errors +What: /sys/class/net//statistics/rx_over_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -114,7 +114,7 @@ Description: (e.g: larger than MTU). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_packets +What: /sys/class/net//statistics/rx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -122,7 +122,7 @@ Description: Indicates the total number of good packets received by this network device. -What: /sys/class//statistics/tx_aborted_errors +What: /sys/class/net//statistics/tx_aborted_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -132,7 +132,7 @@ Description: a medium collision). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_bytes +What: /sys/class/net//statistics/tx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -143,7 +143,7 @@ Description: transmitted packets or all packets that have been queued for transmission. -What: /sys/class//statistics/tx_carrier_errors +What: /sys/class/net//statistics/tx_carrier_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -152,7 +152,7 @@ Description: because of carrier errors (e.g: physical link down). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_compressed +What: /sys/class/net//statistics/tx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -161,7 +161,7 @@ Description: this might only be relevant for devices that support compression (e.g: PPP). -What: /sys/class//statistics/tx_dropped +What: /sys/class/net//statistics/tx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -170,7 +170,7 @@ Description: See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_errors +What: /sys/class/net//statistics/tx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -179,7 +179,7 @@ Description: a network device. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_fifo_errors +What: /sys/class/net//statistics/tx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -188,7 +188,7 @@ Description: FIFO error. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_heartbeat_errors +What: /sys/class/net//statistics/tx_heartbeat_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -197,7 +197,7 @@ Description: reported as heartbeat errors. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_packets +What: /sys/class/net//statistics/tx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -206,7 +206,7 @@ Description: device. See the driver for whether this reports the number of all attempted or successful transmissions. -What: /sys/class//statistics/tx_window_errors +What: /sys/class/net//statistics/tx_window_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org -- GitLab From 4b02c89327f7b58b453c339e2743706a871b87e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:22:17 +0100 Subject: [PATCH 0097/2290] nouveau/svm: fix kvcalloc() argument order [ Upstream commit 2c80a2b715df75881359d07dbaacff8ad411f40e ] The conversion to kvcalloc() mixed up the object size and count arguments, causing a warning: drivers/gpu/drm/nouveau/nouveau_svm.c: In function 'nouveau_svm_fault_buffer_ctor': drivers/gpu/drm/nouveau/nouveau_svm.c:1010:40: error: 'kvcalloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 1010 | buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); | ^ drivers/gpu/drm/nouveau/nouveau_svm.c:1010:40: note: earlier argument should specify number of elements, later size of each element The behavior is still correct aside from the warning, but fixing it avoids the warnings and can help the compiler track the individual objects better. Fixes: 71e4bbca070e ("nouveau/svm: Use kvcalloc() instead of kvzalloc()") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240212112230.1117284-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 31a5b81ee9fc4..be6674fb1af71 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -997,7 +997,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) if (ret) return ret; - buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); + buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL); if (!buffer->fault) return -ENOMEM; -- GitLab From c638b4afc750a0c7087b51065e2461d81b02049f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 11 Feb 2024 08:08:37 -0800 Subject: [PATCH 0098/2290] MIPS: Add 'memory' clobber to csum_ipv6_magic() inline assembler [ Upstream commit d55347bfe4e66dce2e1e7501e5492f4af3e315f8 ] After 'lib: checksum: Use aligned accesses for ip_fast_csum and csum_ipv6_magic tests' was applied, the test_csum_ipv6_magic unit test started failing for all mips platforms, both little and bit endian. Oddly enough, adding debug code into test_csum_ipv6_magic() made the problem disappear. The gcc manual says: "The "memory" clobber tells the compiler that the assembly code performs memory reads or writes to items other than those listed in the input and output operands (for example, accessing the memory pointed to by one of the input parameters) " This is definitely the case for csum_ipv6_magic(). Indeed, adding the 'memory' clobber fixes the problem. Cc: Charlie Jenkins Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Guenter Roeck Reviewed-by: Charlie Jenkins Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/include/asm/checksum.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index 4044eaf989ac7..0921ddda11a4b 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " .set pop" : "=&r" (sum), "=&r" (tmp) : "r" (saddr), "r" (daddr), - "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)); + "0" (htonl(len)), "r" (htonl(proto)), "r" (sum) + : "memory"); return csum_fold(sum); } -- GitLab From 1c981792e4ccbc134b468797acdd7781959e6893 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 8 Feb 2024 10:03:33 -0800 Subject: [PATCH 0099/2290] i40e: Do not allow untrusted VF to remove administratively set MAC [ Upstream commit 73d9629e1c8c1982f13688c4d1019c3994647ccc ] Currently when PF administratively sets VF's MAC address and the VF is put down (VF tries to delete all MACs) then the MAC is removed from MAC filters and primary VF MAC is zeroed. Do not allow untrusted VF to remove primary MAC when it was set administratively by PF. Reproducer: 1) Create VF 2) Set VF interface up 3) Administratively set the VF's MAC 4) Put VF interface down [root@host ~]# echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs [root@host ~]# ip link set enp2s0f0v0 up [root@host ~]# ip link set enp2s0f0 vf 0 mac fe:6c:b5:da:c7:7d [root@host ~]# ip link show enp2s0f0 23: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether 3c:ec:ef:b7:dd:04 brd ff:ff:ff:ff:ff:ff vf 0 link/ether fe:6c:b5:da:c7:7d brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off [root@host ~]# ip link set enp2s0f0v0 down [root@host ~]# ip link show enp2s0f0 23: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether 3c:ec:ef:b7:dd:04 brd ff:ff:ff:ff:ff:ff vf 0 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off Fixes: 700bbf6c1f9e ("i40e: allow VF to remove any MAC filter") Fixes: ceb29474bbbc ("i40e: Add support for VF to specify its primary MAC address") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240208180335.1844996-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 3d3db58090ed1..ed4be80fec2a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2846,6 +2846,24 @@ error_param: (u8 *)&stats, sizeof(stats)); } +/** + * i40e_can_vf_change_mac + * @vf: pointer to the VF info + * + * Return true if the VF is allowed to change its MAC filters, false otherwise + */ +static bool i40e_can_vf_change_mac(struct i40e_vf *vf) +{ + /* If the VF MAC address has been set administratively (via the + * ndo_set_vf_mac command), then deny permission to the VF to + * add/delete unicast MAC addresses, unless the VF is trusted + */ + if (vf->pf_set_mac && !vf->trusted) + return false; + + return true; +} + #define I40E_MAX_MACVLAN_PER_HW 3072 #define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \ (num_ports)) @@ -2905,8 +2923,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, * The VF may request to set the MAC address filter already * assigned to it so do not return an error in that case. */ - if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && - !is_multicast_ether_addr(addr) && vf->pf_set_mac && + if (!i40e_can_vf_change_mac(vf) && + !is_multicast_ether_addr(addr) && !ether_addr_equal(addr, vf->default_lan_addr.addr)) { dev_err(&pf->pdev->dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); @@ -3049,19 +3067,29 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } - if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr)) - was_unimac_deleted = true; } vsi = pf->vsi[vf->lan_vsi_idx]; spin_lock_bh(&vsi->mac_filter_hash_lock); /* delete addresses from the list */ - for (i = 0; i < al->num_elements; i++) + for (i = 0; i < al->num_elements; i++) { + const u8 *addr = al->list[i].addr; + + /* Allow to delete VF primary MAC only if it was not set + * administratively by PF or if VF is trusted. + */ + if (ether_addr_equal(addr, vf->default_lan_addr.addr) && + i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = I40E_ERR_INVALID_MAC_ADDR; spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; } + } spin_unlock_bh(&vsi->mac_filter_hash_lock); -- GitLab From 7a245b8a2fa96b7cc202df4a7cf6303486e18790 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Nov 2023 17:01:03 +0100 Subject: [PATCH 0100/2290] i40e: Fix waiting for queues of all VSIs to be disabled [ Upstream commit c73729b64bb692186da080602cd13612783f52ac ] The function i40e_pf_wait_queues_disabled() iterates all PF's VSIs up to 'pf->hw.func_caps.num_vsis' but this is incorrect because the real number of VSIs can be up to 'pf->num_alloc_vsi' that can be higher. Fix this loop. Fixes: 69129dc39fac ("i40e: Modify Tx disable wait flow in case of DCB reconfiguration") Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Reviewed-by: Wojciech Drewek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 63d43ef86f9b9..76455405a6d8e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5333,7 +5333,7 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { int v, ret = 0; - for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { + for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v]) { ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); if (ret) -- GitLab From 76a42074d0b8b342f4c1abb0553a64271c5d2bb2 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 22 Jan 2024 09:52:01 -0800 Subject: [PATCH 0101/2290] scs: add CONFIG_MMU dependency for vfree_atomic() commit 6f9dc684cae638dda0570154509884ee78d0f75c upstream. The shadow call stack implementation fails to build without CONFIG_MMU: ld.lld: error: undefined symbol: vfree_atomic >>> referenced by scs.c >>> kernel/scs.o:(scs_free) in archive vmlinux.a Link: https://lkml.kernel.org/r/20240122175204.2371009-1-samuel.holland@sifive.com Fixes: a2abe7cbd8fe ("scs: switch to vmapped shadow stacks") Signed-off-by: Samuel Holland Reviewed-by: Sami Tolvanen Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index 14273a6203dfc..f99fd9a4ca778 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -642,6 +642,7 @@ config SHADOW_CALL_STACK bool "Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + depends on MMU help This option enables the compiler's Shadow Call Stack, which uses a shadow stack to protect function return addresses from -- GitLab From 36be97e9eb535fe3008a5cb040b1e56f29f2e398 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 26 Jan 2024 09:42:58 +0900 Subject: [PATCH 0102/2290] tracing/trigger: Fix to return error if failed to alloc snapshot commit 0958b33ef5a04ed91f61cef4760ac412080c4e08 upstream. Fix register_snapshot_trigger() to return error code if it failed to allocate a snapshot instead of 0 (success). Unless that, it will register snapshot trigger without an error. Link: https://lore.kernel.org/linux-trace-kernel/170622977792.270660.2789298642759362200.stgit@devnote2 Fixes: 0bbe7f719985 ("tracing: Fix the race between registering 'snapshot' event trigger and triggering 'snapshot' operation") Cc: stable@vger.kernel.org Cc: Vincent Donnefort Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 918730d749325..f941ce01ee351 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1455,8 +1455,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } -- GitLab From 16b1025eaa8fc223ab4273ece20d1c3a4211a95d Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 18 Jan 2024 10:19:53 -0800 Subject: [PATCH 0103/2290] mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again commit 9319b647902cbd5cc884ac08a8a6d54ce111fc78 upstream. (struct dirty_throttle_control *)->thresh is an unsigned long, but is passed as the u32 divisor argument to div_u64(). On architectures where unsigned long is 64 bytes, the argument will be implicitly truncated. Use div64_u64() instead of div_u64() so that the value used in the "is this a safe division" check is the same as the divisor. Also, remove redundant cast of the numerator to u64, as that should happen implicitly. This would be difficult to exploit in memcg domain, given the ratio-based arithmetic domain_drity_limits() uses, but is much easier in global writeback domain with a BDI_CAP_STRICTLIMIT-backing device, using e.g. vm.dirty_bytes=(1<<32)*PAGE_SIZE so that dtc->thresh == (1<<32) Link: https://lkml.kernel.org/r/20240118181954.1415197-1-zokeefe@google.com Fixes: f6789593d5ce ("mm/page-writeback.c: fix divide by zero in bdi_dirty_limits()") Signed-off-by: Zach O'Keefe Cc: Maxim Patlasov Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index de5f69921b946..d3e9d12860b9f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1526,7 +1526,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need -- GitLab From 221da504a55b15c5b4eb7363b53d88d41163fe00 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 22 Jan 2024 09:09:56 -0800 Subject: [PATCH 0104/2290] scsi: storvsc: Fix ring buffer size calculation commit f4469f3858352ad1197434557150b1f7086762a0 upstream. Current code uses the specified ring buffer size (either the default of 128 Kbytes or a module parameter specified value) to encompass the one page ring buffer header plus the actual ring itself. When the page size is 4K, carving off one page for the header isn't significant. But when the page size is 64K on ARM64, only half of the default 128 Kbytes is left for the actual ring. While this doesn't break anything, the smaller ring size could be a performance bottleneck. Fix this by applying the VMBUS_RING_SIZE macro to the specified ring buffer size. This macro adds a page for the header, and rounds up the size to a page boundary, using the page size for which the kernel is built. Use this new size for subsequent ring buffer calculations. For example, on ARM64 with 64K page size and the default ring size, this results in the actual ring being 128 Kbytes, which is intended. Cc: stable@vger.kernel.org # 5.15.x Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240122170956.496436-1-mhklinux@outlook.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index c2d981d5a2dd5..4fad9d85bd6f9 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -326,6 +326,7 @@ enum storvsc_request_type { */ static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -683,8 +684,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -1964,7 +1965,7 @@ static int storvsc_probe(struct hv_device *device, dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -2157,7 +2158,7 @@ static int storvsc_resume(struct hv_device *hv_dev) { int ret; - ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, hv_dev_is_fc(hv_dev)); return ret; } @@ -2191,8 +2192,9 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); -- GitLab From 30884a44e0cedc3dfda8c22432f3ba4078ec2d94 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 31 Jan 2024 21:57:27 +0100 Subject: [PATCH 0105/2290] dm-crypt, dm-verity: disable tasklets commit 0a9bab391e336489169b95cb0d4553d921302189 upstream. Tasklets have an inherent problem with memory corruption. The function tasklet_action_common calls tasklet_trylock, then it calls the tasklet callback and then it calls tasklet_unlock. If the tasklet callback frees the structure that contains the tasklet or if it calls some code that may free it, tasklet_unlock will write into free memory. The commits 8e14f610159d and d9a02e016aaf try to fix it for dm-crypt, but it is not a sufficient fix and the data corruption can still happen [1]. There is no fix for dm-verity and dm-verity will write into free memory with every tasklet-processed bio. There will be atomic workqueues implemented in the kernel 6.9 [2]. They will have better interface and they will not suffer from the memory corruption problem. But we need something that stops the memory corruption now and that can be backported to the stable kernels. So, I'm proposing this commit that disables tasklets in both dm-crypt and dm-verity. This commit doesn't remove the tasklet support, because the tasklet code will be reused when atomic workqueues will be implemented. [1] https://lore.kernel.org/all/d390d7ee-f142-44d3-822a-87949e14608b@suse.de/T/ [2] https://lore.kernel.org/lkml/20240130091300.2968534-1-tj@kernel.org/ Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: 39d42fa96ba1b ("dm crypt: add flags to optionally bypass kcryptd workqueues") Fixes: 5721d4e5a9cdb ("dm verity: Add optional "try_verify_in_tasklet" feature") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 37 ++--------------------------------- drivers/md/dm-verity-target.c | 26 ++---------------------- drivers/md/dm-verity.h | 1 - 3 files changed, 4 insertions(+), 60 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ff515437d81e7..0e6068ee783e7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -72,10 +72,8 @@ struct dm_crypt_io { struct bio *base_bio; u8 *integrity_metadata; bool integrity_metadata_from_pool:1; - bool in_tasklet:1; struct work_struct work; - struct tasklet_struct tasklet; struct convert_context ctx; @@ -1729,7 +1727,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; - io->in_tasklet = false; atomic_set(&io->io_pending, 0); } @@ -1738,12 +1735,6 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } -static void kcryptd_io_bio_endio(struct work_struct *work) -{ - struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - bio_endio(io->base_bio); -} - /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1767,20 +1758,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io) base_bio->bi_status = error; - /* - * If we are running this function from our tasklet, - * we can't call bio_endio() here, because it will call - * clone_endio() from dm.c, which in turn will - * free the current struct dm_crypt_io structure with - * our tasklet. In this case we need to delay bio_endio() - * execution to after the tasklet is done and dequeued. - */ - if (io->in_tasklet) { - INIT_WORK(&io->work, kcryptd_io_bio_endio); - queue_work(cc->io_queue, &io->work); - return; - } - bio_endio(base_bio); } @@ -2213,11 +2190,6 @@ static void kcryptd_crypt(struct work_struct *work) kcryptd_crypt_write_convert(io); } -static void kcryptd_crypt_tasklet(unsigned long work) -{ - kcryptd_crypt((struct work_struct *)work); -} - static void kcryptd_queue_crypt(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2229,15 +2201,10 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) * irqs_disabled(): the kernel may run some IO completion from the idle thread, but * it is being executed with irqs disabled. */ - if (in_hardirq() || irqs_disabled()) { - io->in_tasklet = true; - tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); - tasklet_schedule(&io->tasklet); + if (!(in_hardirq() || irqs_disabled())) { + kcryptd_crypt(&io->work); return; } - - kcryptd_crypt(&io->work); - return; } INIT_WORK(&io->work, kcryptd_crypt); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 24df610a2c438..4669923f4cfb4 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -634,23 +634,6 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } -static void verity_tasklet(unsigned long data) -{ - struct dm_verity_io *io = (struct dm_verity_io *)data; - int err; - - io->in_tasklet = true; - err = verity_verify_io(io); - if (err == -EAGAIN || err == -ENOMEM) { - /* fallback to retrying with work-queue */ - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - return; - } - - verity_finish_io(io, errno_to_blk_status(err)); -} - static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; @@ -663,13 +646,8 @@ static void verity_end_io(struct bio *bio) return; } - if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) { - tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io); - tasklet_schedule(&io->tasklet); - } else { - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - } + INIT_WORK(&io->work, verity_work); + queue_work(io->v->verify_wq, &io->work); } /* diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f9d522c870e61..f3f6070084196 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -83,7 +83,6 @@ struct dm_verity_io { struct bvec_iter iter; struct work_struct work; - struct tasklet_struct tasklet; /* * Three variably-size fields follow this struct: -- GitLab From 607385d75a0bb229998528078b563572a56db81e Mon Sep 17 00:00:00 2001 From: Techno Mooney Date: Mon, 29 Jan 2024 15:11:47 +0700 Subject: [PATCH 0106/2290] ASoC: amd: yc: Add DMI quirk for MSI Bravo 15 C7VF commit c6dce23ec993f7da7790a9eadb36864ceb60e942 upstream. The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: Techno Mooney Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218402 Cc: stable@vger.kernel.org Signed-off-by: Techno Mooney Signed-off-by: Bagas Sanjaya Link: https://msgid.link/r/20240129081148.1044891-1-bagasdotme@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 808d002826233..3b43595aa87a7 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -297,6 +297,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 15 2022"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From f00e8d0fccf64c3646bae71a58764f4ee90ac0f5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Jan 2024 21:16:39 +0100 Subject: [PATCH 0107/2290] parisc: Prevent hung tasks when printing inventory on serial console commit c8708d758e715c3824a73bf0cda97292b52be44d upstream. Printing the inventory on a serial console can be quite slow and thus may trigger the hung task detector (CONFIG_DETECT_HUNG_TASK=y) and possibly reboot the machine. Adding a cond_resched() prevents this. Signed-off-by: Helge Deller Cc: # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/drivers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 8f12b9f318ae6..a582928739dd5 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -1003,6 +1003,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) pr_info("\n"); + /* Prevent hung task messages when printing on serial console */ + cond_resched(); + pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n", hpa, parisc_hardware_description(&dev->id)); -- GitLab From b23c431e505207c7f201d57fc4e041a47a96224f Mon Sep 17 00:00:00 2001 From: David Senoner Date: Fri, 26 Jan 2024 16:56:26 +0100 Subject: [PATCH 0108/2290] ALSA: hda/realtek: Fix the external mic not being recognised for Acer Swift 1 SF114-32 commit efb56d84dd9c3de3c99fc396abb57c6d330038b5 upstream. If you connect an external headset/microphone to the 3.5mm jack on the Acer Swift 1 SF114-32 it does not recognize the microphone. This fixes that and gives the user the ability to choose between internal and headset mic. Signed-off-by: David Senoner Cc: Link: https://lore.kernel.org/r/20240126155626.2304465-1-seda18@rolmail.net Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 62f2137044923..ef060f2c25438 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9431,6 +9431,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), -- GitLab From ea09996b3711d9f7bb9597c7bdc95f28475d2f8b Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Sun, 28 Jan 2024 16:57:04 +0100 Subject: [PATCH 0109/2290] ALSA: hda/realtek: Enable Mute LED on HP Laptop 14-fq0xxx commit f0d78972f27dc1d1d51fbace2713ad3cdc60a877 upstream. This HP Laptop uses ALC236 codec with COEF 0x07 controlling the mute LED. Enable existing quirk for this device. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20240128155704.2333812-1-l.guzenko@web.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ef060f2c25438..274f528eba0ba 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9618,6 +9618,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), -- GitLab From d7d7a0e3b6f5adc45f23667cbb919e99093a5b5c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Jan 2024 18:09:01 +0100 Subject: [PATCH 0110/2290] HID: i2c-hid-of: fix NULL-deref on failed power up commit 00aab7dcb2267f2aef59447602f34501efe1a07f upstream. A while back the I2C HID implementation was split in an ACPI and OF part, but the new OF driver never initialises the client pointer which is dereferenced on power-up failures. Fixes: b33752c30023 ("HID: i2c-hid: Reorganize so ACPI and OF are separate modules") Cc: stable@vger.kernel.org # 5.12 Cc: Douglas Anderson Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid-of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c index 97a27a803f58d..6feb812fce375 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of.c +++ b/drivers/hid/i2c-hid/i2c-hid-of.c @@ -80,6 +80,7 @@ static int i2c_hid_of_probe(struct i2c_client *client, if (!ihid_of) return -ENOMEM; + ihid_of->client = client; ihid_of->ops.power_up = i2c_hid_of_power_up; ihid_of->ops.power_down = i2c_hid_of_power_down; -- GitLab From b71a906a72ddebb511891c76fb7d2b0cdcf85f79 Mon Sep 17 00:00:00 2001 From: Tatsunosuke Tobita Date: Thu, 1 Feb 2024 13:40:55 +0900 Subject: [PATCH 0111/2290] HID: wacom: generic: Avoid reporting a serial of '0' to userspace commit ab41a31dd5e2681803642b6d08590b61867840ec upstream. The xf86-input-wacom driver does not treat '0' as a valid serial number and will drop any input report which contains an MSC_SERIAL = 0 event. The kernel driver already takes care to avoid sending any MSC_SERIAL event if the value of serial[0] == 0 (which is the case for devices that don't actually report a serial number), but this is not quite sufficient. Only the lower 32 bits of the serial get reported to userspace, so if this portion of the serial is zero then there can still be problems. This commit allows the driver to report either the lower 32 bits if they are non-zero or the upper 32 bits otherwise. Signed-off-by: Jason Gerecke Signed-off-by: Tatsunosuke Tobita Fixes: f85c9dc678a5 ("HID: wacom: generic: Support tool ID and additional tool types") CC: stable@vger.kernel.org # v4.10 Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 165ed872fa4e7..53235b276bb24 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2571,7 +2571,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev, wacom_wac->hid_data.tipswitch); input_report_key(input, wacom_wac->tool[0], sense); if (wacom_wac->serial[0]) { - input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]); + /* + * xf86-input-wacom does not accept a serial number + * of '0'. Report the low 32 bits if possible, but + * if they are zero, report the upper ones instead. + */ + __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu; + __u32 serial_hi = wacom_wac->serial[0] >> 32; + input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi)); input_report_abs(input, ABS_MISC, sense ? id : 0); } -- GitLab From e5c6c8ef3e4d3a88407404daafbf2c6be62f2185 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 29 Jan 2024 14:35:45 -0800 Subject: [PATCH 0112/2290] HID: wacom: Do not register input devices until after hid_hw_start commit c1d6708bf0d3dd976460d435373cf5abf21ce258 upstream. If a input device is opened before hid_hw_start is called, events may not be received from the hardware. In the case of USB-backed devices, for example, the hid_hw_start function is responsible for filling in the URB which is submitted when the input device is opened. If a device is opened prematurely, polling will never start because the device will not have been in the correct state to send the URB. Because the wacom driver registers its input devices before calling hid_hw_start, there is a window of time where a device can be opened and end up in an inoperable state. Some ARM-based Chromebooks in particular reliably trigger this bug. This commit splits the wacom_register_inputs function into two pieces. One which is responsible for setting up the allocated inputs (and runs prior to hid_hw_start so that devices are ready for any input events they may end up receiving) and another which only registers the devices (and runs after hid_hw_start to ensure devices can be immediately opened without issue). Note that the functions to initialize the LEDs and remotes are also moved after hid_hw_start to maintain their own dependency chains. Fixes: 7704ac937345 ("HID: wacom: implement generic HID handling for pen generic devices") Cc: stable@vger.kernel.org # v3.18+ Suggested-by: Dmitry Torokhov Signed-off-by: Jason Gerecke Tested-by: Dmitry Torokhov Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_sys.c | 63 ++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index af163e8dfec07..12d4c28741d7e 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2080,7 +2080,7 @@ static int wacom_allocate_inputs(struct wacom *wacom) return 0; } -static int wacom_register_inputs(struct wacom *wacom) +static int wacom_setup_inputs(struct wacom *wacom) { struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; struct wacom_wac *wacom_wac = &(wacom->wacom_wac); @@ -2099,10 +2099,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pen_input_dev); wacom_wac->pen_input = NULL; pen_input_dev = NULL; - } else { - error = input_register_device(pen_input_dev); - if (error) - goto fail; } error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac); @@ -2111,10 +2107,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(touch_input_dev); wacom_wac->touch_input = NULL; touch_input_dev = NULL; - } else { - error = input_register_device(touch_input_dev); - if (error) - goto fail; } error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac); @@ -2123,7 +2115,34 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pad_input_dev); wacom_wac->pad_input = NULL; pad_input_dev = NULL; - } else { + } + + return 0; +} + +static int wacom_register_inputs(struct wacom *wacom) +{ + struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; + struct wacom_wac *wacom_wac = &(wacom->wacom_wac); + int error = 0; + + pen_input_dev = wacom_wac->pen_input; + touch_input_dev = wacom_wac->touch_input; + pad_input_dev = wacom_wac->pad_input; + + if (pen_input_dev) { + error = input_register_device(pen_input_dev); + if (error) + goto fail; + } + + if (touch_input_dev) { + error = input_register_device(touch_input_dev); + if (error) + goto fail; + } + + if (pad_input_dev) { error = input_register_device(pad_input_dev); if (error) goto fail; @@ -2379,6 +2398,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } + error = wacom_setup_inputs(wacom); + if (error) + goto fail; + + if (features->type == HID_GENERIC) + connect_mask |= HID_CONNECT_DRIVER; + + /* Regular HID work starts now */ + error = hid_hw_start(hdev, connect_mask); + if (error) { + hid_err(hdev, "hw start failed\n"); + goto fail; + } + error = wacom_register_inputs(wacom); if (error) goto fail; @@ -2393,16 +2426,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } - if (features->type == HID_GENERIC) - connect_mask |= HID_CONNECT_DRIVER; - - /* Regular HID work starts now */ - error = hid_hw_start(hdev, connect_mask); - if (error) { - hid_err(hdev, "hw start failed\n"); - goto fail; - } - if (!wireless) { /* Note that if query fails it is not a hard failure */ wacom_query_tablet_data(wacom); -- GitLab From 711beb8acf5d007302d167c4aa63cb5ff517cea9 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 4 Feb 2024 04:56:17 -0800 Subject: [PATCH 0113/2290] iio: hid-sensor-als: Return 0 for HID_USAGE_SENSOR_TIME_TIMESTAMP commit 621c6257128149e45b36ffb973a01c3f3461b893 upstream. When als_capture_sample() is called with usage ID HID_USAGE_SENSOR_TIME_TIMESTAMP, return 0. The HID sensor core ignores the return value for capture_sample() callback, so return value doesn't make difference. But correct the return value to return success instead of -EINVAL. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20240204125617.2635574-1-srinivas.pandruvada@linux.intel.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/hid-sensor-als.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 5a1a625d8d16d..85097b769c209 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -228,6 +228,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev, case HID_USAGE_SENSOR_TIME_TIMESTAMP: als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes, *(s64 *)raw_data); + ret = 0; break; default: break; -- GitLab From 3fd6c16ec78da4349de068529e2a175723dbdd0a Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:21 +0100 Subject: [PATCH 0114/2290] usb: ucsi: Add missing ppm_lock commit c9aed03a0a683fd1600ea92f2ad32232d4736272 upstream. Calling ->sync_write must be done while holding the PPM lock as the mailbox logic does not support concurrent commands. At least since the addition of partner task this means that ucsi_acknowledge_connector_change should be called with the PPM lock held as it calls ->sync_write. Thus protect the only call to ucsi_acknowledge_connector_change with the PPM. All other calls to ->sync_write already happen under the PPM lock. Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking") Cc: stable@vger.kernel.org Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-2-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dc2dea3768fb6..0695ee54ff781 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -831,7 +831,9 @@ static void ucsi_handle_connector_change(struct work_struct *work) clear_bit(EVENT_PENDING, &con->ucsi->flags); + mutex_lock(&ucsi->ppm_lock); ret = ucsi_acknowledge_connector_change(ucsi); + mutex_unlock(&ucsi->ppm_lock); if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); -- GitLab From d31b886ed6a5095214062ee4fb55037eb930adb6 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 26 Jan 2024 17:38:00 -0500 Subject: [PATCH 0115/2290] usb: ulpi: Fix debugfs directory leak commit 3caf2b2ad7334ef35f55b95f3e1b138c6f77b368 upstream. The ULPI per-device debugfs root is named after the ulpi device's parent, but ulpi_unregister_interface tries to remove a debugfs directory named after the ulpi device itself. This results in the directory sticking around and preventing subsequent (deferred) probes from succeeding. Change the directory name to match the ulpi device. Fixes: bd0a0a024f2a ("usb: ulpi: Add debugfs support") Cc: stable@vger.kernel.org Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20240126223800.2864613-1-sean.anderson@seco.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 38703781ee2d1..1283c427cdf88 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; } - root = debugfs_create_dir(dev_name(dev), ulpi_root); + root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root); debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops); dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", -- GitLab From 9f754d009483791c14eef98095369a3a4ea66cac Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:22 +0100 Subject: [PATCH 0116/2290] usb: ucsi_acpi: Fix command completion handling commit 2840143e393a4ddc1caab4372969ea337371168c upstream. In case of a spurious or otherwise delayed notification it is possible that CCI still reports the previous completion. The UCSI spec is aware of this and provides two completion bits in CCI, one for normal commands and one for acks. As acks and commands alternate the notification handler can determine if the completion bit is from the current command. The initial UCSI code correctly handled this but the distinction between the two completion bits was lost with the introduction of the new API. To fix this revive the ACK_PENDING bit for ucsi_acpi and only complete commands if the completion bit matches. Fixes: f56de278e8ec ("usb: typec: ucsi: acpi: Move to the new API") Cc: stable@vger.kernel.org Signed-off-by: "Christian A. Ehrhardt" Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-3-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 217355f1f9b94..26171c5d3c61c 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -73,9 +73,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI; int ret; - set_bit(COMMAND_PENDING, &ua->flags); + if (ack) + set_bit(ACK_PENDING, &ua->flags); + else + set_bit(COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -85,7 +89,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, ret = -ETIMEDOUT; out_clear_bit: - clear_bit(COMMAND_PENDING, &ua->flags); + if (ack) + clear_bit(ACK_PENDING, &ua->flags); + else + clear_bit(COMMAND_PENDING, &ua->flags); return ret; } @@ -142,8 +149,10 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (UCSI_CCI_CONNECTOR(cci)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); - if (test_bit(COMMAND_PENDING, &ua->flags) && - cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) + if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) + complete(&ua->complete); + if (cci & UCSI_CCI_COMMAND_COMPLETE && + test_bit(COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } -- GitLab From 2888258144e08d1f7905c0ee5df8c14d26d23699 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 22 Jan 2024 16:35:32 +0100 Subject: [PATCH 0117/2290] USB: hub: check for alternate port before enabling A_ALT_HNP_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f17c34ffc792bbb520e4b61baa16b6cfc7d44b13 upstream. The OTG 1.3 spec has the feature A_ALT_HNP_SUPPORT, which tells a device that it is connected to the wrong port. Some devices refuse to operate if you enable that feature, because it indicates to them that they ought to request to be connected to another port. According to the spec this feature may be used based only the following three conditions: 6.5.3 a_alt_hnp_support Setting this feature indicates to the B-device that it is connected to an A-device port that is not capable of HNP, but that the A-device does have an alternate port that is capable of HNP. The A-device is required to set this feature under the following conditions: • the A-device has multiple receptacles • the A-device port that connects to the B-device does not support HNP • the A-device has another port that does support HNP A check for the third and first condition is missing. Add it. Signed-off-by: Oliver Neukum Cc: stable Fixes: 7d2d641c44269 ("usb: otg: don't set a_alt_hnp_support feature for OTG 2.0 device") Link: https://lore.kernel.org/r/20240122153545.12284-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4f181110d00db..d960a56b760ec 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2389,17 +2389,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev) } } else if (desc->bLength == sizeof (struct usb_otg_descriptor)) { - /* Set a_alt_hnp_support for legacy otg device */ - err = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_SET_FEATURE, 0, - USB_DEVICE_A_ALT_HNP_SUPPORT, - 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); - if (err < 0) - dev_err(&udev->dev, - "set a_alt_hnp_support failed: %d\n", - err); + /* + * We are operating on a legacy OTP device + * These should be told that they are operating + * on the wrong port if we have another port that does + * support HNP + */ + if (bus->otg_port != 0) { + /* Set a_alt_hnp_support for legacy otg device */ + err = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_SET_FEATURE, 0, + USB_DEVICE_A_ALT_HNP_SUPPORT, + 0, NULL, 0, + USB_CTRL_SET_TIMEOUT); + if (err < 0) + dev_err(&udev->dev, + "set a_alt_hnp_support failed: %d\n", + err); + } } } #endif -- GitLab From 0ecc97c81e3aea3335ea4a0b0f1777aae42b71ec Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Tue, 23 Jan 2024 11:48:29 +0800 Subject: [PATCH 0118/2290] usb: f_mass_storage: forbid async queue when shutdown happen commit b2d2d7ea0dd09802cf5a0545bf54d8ad8987d20c upstream. When write UDC to empty and unbind gadget driver from gadget device, it is possible that there are many queue failures for mass storage function. The root cause is mass storage main thread alaways try to queue request to receive a command from host if running flag is on, on platform like dwc3, if pull down called, it will not queue request again and return -ESHUTDOWN, but it not affect running flag of mass storage function. Check return code from mass storage function and clear running flag if it is -ESHUTDOWN, also indicate start in/out transfer failure to break loops. Cc: stable Signed-off-by: yuan linyu Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20240123034829.3848409-1-yuanlinyu@hihonor.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 7b9a4cf9b100c..d35f30a9cae2c 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -544,21 +544,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep, static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_SENDING; - if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq); + if (rc) { bh->state = BUF_STATE_EMPTY; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_RECEIVING; - if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq); + if (rc) { bh->state = BUF_STATE_FULL; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } -- GitLab From 57e2e42ccd3cd6183228269715ed032f44536751 Mon Sep 17 00:00:00 2001 From: Uttkarsh Aggarwal Date: Fri, 19 Jan 2024 15:18:25 +0530 Subject: [PATCH 0119/2290] usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend commit 61a348857e869432e6a920ad8ea9132e8d44c316 upstream. In current scenario if Plug-out and Plug-In performed continuously there could be a chance while checking for dwc->gadget_driver in dwc3_gadget_suspend, a NULL pointer dereference may occur. Call Stack: CPU1: CPU2: gadget_unbind_driver dwc3_suspend_common dwc3_gadget_stop dwc3_gadget_suspend dwc3_disconnect_gadget CPU1 basically clears the variable and CPU2 checks the variable. Consider CPU1 is running and right before gadget_driver is cleared and in parallel CPU2 executes dwc3_gadget_suspend where it finds dwc->gadget_driver which is not NULL and resumes execution and then CPU1 completes execution. CPU2 executes dwc3_disconnect_gadget where it checks dwc->gadget_driver is already NULL because of which the NULL pointer deference occur. Cc: stable@vger.kernel.org Fixes: 9772b47a4c29 ("usb: dwc3: gadget: Fix suspend/resume during device mode") Acked-by: Thinh Nguyen Signed-off-by: Uttkarsh Aggarwal Link: https://lore.kernel.org/r/20240119094825.26530-1-quic_uaggarwa@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c4703f6b20894..576c21bf77cda 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4583,15 +4583,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) unsigned long flags; int ret; - if (!dwc->gadget_driver) - return 0; - ret = dwc3_gadget_soft_disconnect(dwc); if (ret) goto err; spin_lock_irqsave(&dwc->lock, flags); - dwc3_disconnect_gadget(dwc); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return 0; -- GitLab From 6616d3c4f8284a7b3ef978c916566bd240cea1c7 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 10 Jan 2024 15:16:26 +0200 Subject: [PATCH 0120/2290] interconnect: qcom: sc8180x: Mark CO0 BCM keepalive [ Upstream commit 85e985a4f46e462a37f1875cb74ed380e7c0c2e0 ] The CO0 BCM needs to be up at all times, otherwise some hardware (like the UFS controller) loses its connection to the rest of the SoC, resulting in a hang of the platform, accompanied by a spectacular logspam. Mark it as keepalive to prevent such cases. Fixes: 9c8c6bac1ae8 ("interconnect: qcom: Add SC8180x providers") Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231214-topic-sc8180_fixes-v1-1-421904863006@linaro.org Signed-off-by: Georgi Djakov Signed-off-by: Sasha Levin --- drivers/interconnect/qcom/sc8180x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c index 83461e31774ec..d9ee193fb18bd 100644 --- a/drivers/interconnect/qcom/sc8180x.c +++ b/drivers/interconnect/qcom/sc8180x.c @@ -1387,6 +1387,7 @@ static struct qcom_icc_bcm bcm_mm0 = { static struct qcom_icc_bcm bcm_co0 = { .name = "CO0", + .keepalive = true, .num_nodes = 1, .nodes = { &slv_qns_cdsp_mem_noc } }; -- GitLab From be76ad74a43f90f340f9f479e6b04f02125f6aef Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 17 Jan 2024 09:14:19 +0100 Subject: [PATCH 0121/2290] media: ir_toy: fix a memleak in irtoy_tx [ Upstream commit dc9ceb90c4b42c6e5c6757df1d6257110433788e ] When irtoy_command fails, buf should be freed since it is allocated by irtoy_tx, or there is a memleak. Fixes: 4114978dcd24 ("media: ir_toy: prevent device from hanging during transmit") Signed-off-by: Zhipeng Lu Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/rc/ir_toy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 1968067092594..69e630d85262f 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP); if (err) { dev_err(irtoy->dev, "exit sample mode: %d\n", err); + kfree(buf); return err; } @@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "enter sample mode: %d\n", err); + kfree(buf); return err; } -- GitLab From 2aaa9239c981d849129bd2f18dc98a1f6faa23c3 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 2 Feb 2024 01:56:34 -0800 Subject: [PATCH 0122/2290] driver core: fw_devlink: Improve detection of overlapping cycles [ Upstream commit 6442d79d880cf7a2fff18779265d657fef0cce4c ] fw_devlink can detect most overlapping/intersecting cycles. However it was missing a few corner cases because of an incorrect optimization logic that tries to avoid repeating cycle detection for devices that are already marked as part of a cycle. Here's an example provided by Xu Yang (edited for clarity): usb +-----+ tcpc | | +-----+ | +--| | |----------->|EP| |--+ | | +--| |EP|<-----------| | |--+ | | B | | | +-----+ | A | | +-----+ | ^ +-----+ | | | | | +-----| C |<--+ | | +-----+ usb-phy Node A (tcpc) will be populated as device 1-0050. Node B (usb) will be populated as device 38100000.usb. Node C (usb-phy) will be populated as device 381f0040.usb-phy. The description below uses the notation: consumer --> supplier child ==> parent 1. Node C is populated as device C. No cycles detected because cycle detection is only run when a fwnode link is converted to a device link. 2. Node B is populated as device B. As we convert B --> C into a device link we run cycle detection and find and mark the device link/fwnode link cycle: C--> A --> B.EP ==> B --> C 3. Node A is populated as device A. As we convert C --> A into a device link, we see it's already part of a cycle (from step 2) and don't run cycle detection. Thus we miss detecting the cycle: A --> B.EP ==> B --> A.EP ==> A Looking at it another way, A depends on B in one way: A --> B.EP ==> B But B depends on A in two ways and we only detect the first: B --> C --> A B --> A.EP ==> A To detect both of these, we remove the incorrect optimization attempt in step 3 and run cycle detection even if the fwnode link from which the device link is being created has already been marked as part of a cycle. Reported-by: Xu Yang Closes: https://lore.kernel.org/lkml/DU2PR04MB8822693748725F85DC0CB86C8C792@DU2PR04MB8822.eurprd04.prod.outlook.com/ Fixes: 3fb16866b51d ("driver core: fw_devlink: Make cycle detection more robust") Signed-off-by: Saravana Kannan Tested-by: Xu Yang Link: https://lore.kernel.org/r/20240202095636.868578-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 191590055932f..3078f44dc1861 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2056,9 +2056,14 @@ static int fw_devlink_create_devlink(struct device *con, /* * SYNC_STATE_ONLY device links don't block probing and supports cycles. - * So cycle detection isn't necessary and shouldn't be done. + * So, one might expect that cycle detection isn't necessary for them. + * However, if the device link was marked as SYNC_STATE_ONLY because + * it's part of a cycle, then we still need to do cycle detection. This + * is because the consumer and supplier might be part of multiple cycles + * and we need to detect all those cycles. */ - if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) { + if (!device_link_flag_is_sync_state_only(flags) || + flags & DL_FLAG_CYCLE) { device_links_write_lock(); if (__fw_devlink_relax_cycles(con, sup_handle)) { __fwnode_link_cycle(link); -- GitLab From 0c09912dd8387e228afcc5e34ac5d79b1e3a1058 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Tue, 23 Jan 2024 09:45:59 +0800 Subject: [PATCH 0123/2290] powerpc/kasan: Fix addr error caused by page alignment [ Upstream commit 4a7aee96200ad281a5cc4cf5c7a2e2a49d2b97b0 ] In kasan_init_region, when k_start is not page aligned, at the begin of for loop, k_cur = k_start & PAGE_MASK is less than k_start, and then `va = block + k_cur - k_start` is less than block, the addr va is invalid, because the memory address space from va to block is not alloced by memblock_alloc, which will not be reserved by memblock_reserve later, it will be used by other places. As a result, memory overwriting occurs. for example: int __init __weak kasan_init_region(void *start, size_t size) { [...] /* if say block(dcd97000) k_start(feef7400) k_end(feeff3fe) */ block = memblock_alloc(k_end - k_start, PAGE_SIZE); [...] for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { /* at the begin of for loop * block(dcd97000) va(dcd96c00) k_cur(feef7000) k_start(feef7400) * va(dcd96c00) is less than block(dcd97000), va is invalid */ void *va = block + k_cur - k_start; [...] } [...] } Therefore, page alignment is performed on k_start before memblock_alloc() to ensure the validity of the VA address. Fixes: 663c0c9496a6 ("powerpc/kasan: Fix shadow area set up for modules.") Signed-off-by: Jiangfeng Xiao Signed-off-by: Michael Ellerman Link: https://msgid.link/1705974359-43790-1-git-send-email-xiaojiangfeng@huawei.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/kasan/init_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index a70828a6d9357..aa9aa11927b2f 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size) if (ret) return ret; + k_start = k_start & PAGE_MASK; block = memblock_alloc(k_end - k_start, PAGE_SIZE); if (!block) return -ENOMEM; -- GitLab From 7190353835b4a219abb70f90b06cdcae97f11512 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:18:46 +0300 Subject: [PATCH 0124/2290] cifs: fix underflow in parse_server_interfaces() [ Upstream commit cffe487026be13eaf37ea28b783d9638ab147204 ] In this loop, we step through the buffer and after each item we check if the size_left is greater than the minimum size we need. However, the problem is that "bytes_left" is type ssize_t while sizeof() is type size_t. That means that because of type promotion, the comparison is done as an unsigned and if we have negative bytes left the loop continues instead of ending. Fixes: fe856be475f7 ("CIFS: parse and store info on iface queries") Signed-off-by: Dan Carpenter Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 5a157000bdfe6..34d1262004dfb 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -613,7 +613,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } - while (bytes_left >= sizeof(*p)) { + while (bytes_left >= (ssize_t)sizeof(*p)) { memset(&tmp_iface, 0, sizeof(tmp_iface)); tmp_iface.speed = le64_to_cpu(p->LinkSpeed); tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; -- GitLab From 083870b029c06da6a9a49340dd78637eec35a1d4 Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Mon, 12 Feb 2024 18:22:39 +0530 Subject: [PATCH 0125/2290] i2c: qcom-geni: Correct I2C TRE sequence [ Upstream commit 83ef106fa732aea8558253641cd98e8a895604d7 ] For i2c read operation in GSI mode, we are getting timeout due to malformed TRE basically incorrect TRE sequence in gpi(drivers/dma/qcom/gpi.c) driver. I2C driver has geni_i2c_gpi(I2C_WRITE) function which generates GO TRE and geni_i2c_gpi(I2C_READ)generates DMA TRE. Hence to generate GO TRE before DMA TRE, we should move geni_i2c_gpi(I2C_WRITE) before geni_i2c_gpi(I2C_READ) inside the I2C GSI mode transfer function i.e. geni_i2c_gpi_xfer(). TRE stands for Transfer Ring Element - which is basically an element with size of 4 words. It contains all information like slave address, clk divider, dma address value data size etc). Mainly we have 3 TREs(Config, GO and DMA tre). - CONFIG TRE : consists of internal register configuration which is required before start of the transfer. - DMA TRE : contains DDR/Memory address, called as DMA descriptor. - GO TRE : contains Transfer directions, slave ID, Delay flags, Length of the transfer. I2c driver calls GPI driver API to config each TRE depending on the protocol. For read operation tre sequence will be as below which is not aligned to hardware programming guide. - CONFIG tre - DMA tre - GO tre As per Qualcomm's internal Hardware Programming Guide, we should configure TREs in below sequence for any RX only transfer. - CONFIG tre - GO tre - DMA tre Fixes: d8703554f4de ("i2c: qcom-geni: Add support for GPI DMA") Reviewed-by: Andi Shyti Reviewed-by: Bryan O'Donoghue Tested-by: Bryan O'Donoghue # qrb5165-rb5 Co-developed-by: Mukesh Kumar Savaliya Signed-off-by: Mukesh Kumar Savaliya Signed-off-by: Viken Dadhaniya Reviewed-by: Dmitry Baryshkov Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-qcom-geni.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 8fce98bb77ff9..75b9c3f26bba6 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -605,20 +605,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i peripheral.addr = msgs[i].addr; + ret = geni_i2c_gpi(gi2c, &msgs[i], &config, + &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); + if (ret) + goto err; + if (msgs[i].flags & I2C_M_RD) { ret = geni_i2c_gpi(gi2c, &msgs[i], &config, &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); if (ret) goto err; - } - - ret = geni_i2c_gpi(gi2c, &msgs[i], &config, - &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); - if (ret) - goto err; - if (msgs[i].flags & I2C_M_RD) dma_async_issue_pending(gi2c->rx_c); + } + dma_async_issue_pending(gi2c->tx_c); timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); -- GitLab From c12920ff9b570e8aefb3f02a41edd33257d30802 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 30 Jan 2024 16:27:20 +0800 Subject: [PATCH 0126/2290] irqchip/loongson-eiointc: Use correct struct type in eiointc_domain_alloc() [ Upstream commit f1c2765c6afcd1f71f76ed8c9bf94acedab4cecb ] eiointc_domain_alloc() uses struct eiointc, which is not defined, for a pointer. Older compilers treat that as a forward declaration and due to assignment of a void pointer there is no warning emitted. As the variable is then handed in as a void pointer argument to irq_domain_set_info() the code is functional. Use struct eiointc_priv instead. [ tglx: Rewrote changelog ] Fixes: dd281e1a1a93 ("irqchip: Add Loongson Extended I/O interrupt controller support") Signed-off-by: Bibo Mao Signed-off-by: Thomas Gleixner Acked-by: Huacai Chen Link: https://lore.kernel.org/r/20240130082722.2912576-2-maobibo@loongson.cn Signed-off-by: Sasha Levin --- drivers/irqchip/irq-loongson-eiointc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 3d99b8bdd8ef1..de115ee6e9ec7 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -242,7 +242,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq, int ret; unsigned int i, type; unsigned long hwirq = 0; - struct eiointc *priv = domain->host_data; + struct eiointc_priv *priv = domain->host_data; ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type); if (ret) -- GitLab From 4297217bcf1f0948a19c2bacc6b68d92e7778ad9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Feb 2024 17:42:44 +1100 Subject: [PATCH 0127/2290] powerpc/kasan: Limit KASAN thread size increase to 32KB [ Upstream commit f1acb109505d983779bbb7e20a1ee6244d2b5736 ] KASAN is seen to increase stack usage, to the point that it was reported to lead to stack overflow on some 32-bit machines (see link). To avoid overflows the stack size was doubled for KASAN builds in commit 3e8635fb2e07 ("powerpc/kasan: Force thread size increase with KASAN"). However with a 32KB stack size to begin with, the doubling leads to a 64KB stack, which causes build errors: arch/powerpc/kernel/switch.S:249: Error: operand out of range (0x000000000000fe50 is not between 0xffffffffffff8000 and 0x0000000000007fff) Although the asm could be reworked, in practice a 32KB stack seems sufficient even for KASAN builds - the additional usage seems to be in the 2-3KB range for a 64-bit KASAN build. So only increase the stack for KASAN if the stack size is < 32KB. Fixes: 18f14afe2816 ("powerpc/64s: Increase default stack size to 32KB") Reported-by: Spoorthy Reported-by: Benjamin Gray Reviewed-by: Benjamin Gray Link: https://lore.kernel.org/linuxppc-dev/bug-207129-206035@https.bugzilla.kernel.org%2F/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20240212064244.3924505-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index af58f1ed3952e..c4b798aa6ce80 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15 #define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) #else #define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT -- GitLab From 0e01ccadfdf7be79ac99276905cb590cce632f10 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:19:04 +0100 Subject: [PATCH 0128/2290] i2c: pasemi: split driver into two separate modules [ Upstream commit f44bff19268517ee98e80e944cad0f04f1db72e3 ] On powerpc, it is possible to compile test both the new apple (arm) and old pasemi (powerpc) drivers for the i2c hardware at the same time, which leads to a warning about linking the same object file twice: scripts/Makefile.build:244: drivers/i2c/busses/Makefile: i2c-pasemi-core.o is added to multiple modules: i2c-apple i2c-pasemi Rework the driver to have an explicit helper module, letting Kbuild take care of whether this should be built-in or a loadable driver. Fixes: 9bc5f4f660ff ("i2c: pasemi: Split pci driver to its own file") Signed-off-by: Arnd Bergmann Reviewed-by: Sven Peter Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/Makefile | 6 ++---- drivers/i2c/busses/i2c-pasemi-core.c | 5 +++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index e73cdb1d2b5a8..784a803279d99 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -89,10 +89,8 @@ obj-$(CONFIG_I2C_NPCM) += i2c-npcm7xx.o obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o obj-$(CONFIG_I2C_OMAP) += i2c-omap.o obj-$(CONFIG_I2C_OWL) += i2c-owl.o -i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o -obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o -i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o -obj-$(CONFIG_I2C_APPLE) += i2c-apple.o +obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi-core.o i2c-pasemi-pci.o +obj-$(CONFIG_I2C_APPLE) += i2c-pasemi-core.o i2c-pasemi-platform.o obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o obj-$(CONFIG_I2C_PNX) += i2c-pnx.o obj-$(CONFIG_I2C_PXA) += i2c-pxa.o diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 9028ffb58cc07..f297e41352e7a 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -356,3 +356,8 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) return 0; } +EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Olof Johansson "); +MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver"); -- GitLab From 491528935c9c48bf341d8b40eabc6c4fc5df6f2c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 14 Feb 2024 15:59:39 +0100 Subject: [PATCH 0129/2290] i2c: i801: Fix block process call transactions [ Upstream commit c1c9d0f6f7f1dbf29db996bd8e166242843a5f21 ] According to the Intel datasheets, software must reset the block buffer index twice for block process call transactions: once before writing the outgoing data to the buffer, and once again before reading the incoming data from the buffer. The driver is currently missing the second reset, causing the wrong portion of the block buffer to be read. Signed-off-by: Jean Delvare Reported-by: Piotr Zakowski Closes: https://lore.kernel.org/linux-i2c/20240213120553.7b0ab120@endymion.delvare/ Fixes: 315cd67c9453 ("i2c: i801: Add Block Write-Block Read Process Call support") Reviewed-by: Alexander Sverdlin Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-i801.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 3159ffbb77a20..9a4e9bf304c28 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -500,11 +500,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, /* Set block buffer mode */ outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv)); - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - if (read_write == I2C_SMBUS_WRITE) { len = data->block[0]; outb_p(len, SMBHSTDAT0(priv)); + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) outb_p(data->block[i+1], SMBBLKDAT(priv)); } @@ -520,6 +519,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, return -EPROTO; data->block[0] = len; + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) data->block[i + 1] = inb_p(SMBBLKDAT(priv)); } -- GitLab From 3e409fb74007a6d1dc85073a0eb67681bd49f6a5 Mon Sep 17 00:00:00 2001 From: Radek Krejci Date: Wed, 14 Feb 2024 10:14:07 +0100 Subject: [PATCH 0130/2290] modpost: trim leading spaces when processing source files list [ Upstream commit 5d9a16b2a4d9e8fa028892ded43f6501bc2969e5 ] get_line() does not trim the leading spaces, but the parse_source_files() expects to get lines with source files paths where the first space occurs after the file path. Fixes: 70f30cfe5b89 ("modpost: use read_text_file() and get_line() for reading text files") Signed-off-by: Radek Krejci Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/mod/sumversion.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 6bf9caca09684..a72e6cf61a1f0 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) /* Sum all files in the same dir or subdirs. */ while ((line = get_line(&pos))) { - char* p = line; + char* p; + + /* trim the leading spaces away */ + while (isspace(*line)) + line++; + p = line; if (strncmp(line, "source_", sizeof("source_")-1) == 0) { p = strrchr(line, ' '); -- GitLab From 7857e35ef10e1a9cd81204a6250f820477a17fb8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 11 Aug 2023 17:57:26 +0200 Subject: [PATCH 0131/2290] mptcp: get rid of msk->subflow commit 39880bd808ad2ddfb9b7fee129568c3b814f0609 upstream. This is a partial backport of the upstram commit 39880bd808ad ("mptcp: get rid of msk->subflow"). It's partial to avoid a long a complex dependency chain not suitable for stable. The only bit remaning from the original commit is the introduction of a new field avoid a race at close time causing an UaF: BUG: KASAN: use-after-free in mptcp_subflow_queue_clean+0x2c9/0x390 include/net/mptcp.h:104 Read of size 1 at addr ffff88803bf72884 by task syz-executor.6/23092 CPU: 0 PID: 23092 Comm: syz-executor.6 Not tainted 6.1.65-gc6114c845984 #50 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x125/0x18f lib/dump_stack.c:106 print_report+0x163/0x4f0 mm/kasan/report.c:284 kasan_report+0xc4/0x100 mm/kasan/report.c:495 mptcp_subflow_queue_clean+0x2c9/0x390 include/net/mptcp.h:104 mptcp_check_listen_stop+0x190/0x2a0 net/mptcp/protocol.c:3009 __mptcp_close+0x9a/0x970 net/mptcp/protocol.c:3024 mptcp_close+0x2a/0x130 net/mptcp/protocol.c:3089 inet_release+0x13d/0x190 net/ipv4/af_inet.c:429 sock_close+0xcf/0x230 net/socket.c:652 __fput+0x3a2/0x870 fs/file_table.c:320 task_work_run+0x24e/0x300 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop+0xa4/0xc0 kernel/entry/common.c:171 exit_to_user_mode_prepare+0x51/0x90 kernel/entry/common.c:204 syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:286 do_syscall_64+0x53/0xa0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x64/0xce RIP: 0033:0x41d791 Code: 75 14 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 74 2a 00 00 c3 48 83 ec 08 e8 9a fc ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 e3 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00000000008bfb90 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 000000000041d791 RDX: 0000001b33920000 RSI: ffffffff8139adff RDI: 0000000000000003 RBP: 000000000079d980 R08: 0000001b33d20000 R09: 0000000000000951 R10: 000000008139a955 R11: 0000000000000293 R12: 00000000000c739b R13: 000000000079bf8c R14: 00007fa301053000 R15: 00000000000c705a Allocated by task 22528: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x40/0x70 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0xa0/0xb0 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:955 [inline] __kmalloc+0xaa/0x1c0 mm/slab_common.c:968 kmalloc include/linux/slab.h:558 [inline] sk_prot_alloc+0xac/0x200 net/core/sock.c:2038 sk_clone_lock+0x56/0x1090 net/core/sock.c:2236 inet_csk_clone_lock+0x26/0x420 net/ipv4/inet_connection_sock.c:1141 tcp_create_openreq_child+0x30/0x1910 net/ipv4/tcp_minisocks.c:474 tcp_v6_syn_recv_sock+0x413/0x1a90 net/ipv6/tcp_ipv6.c:1283 subflow_syn_recv_sock+0x621/0x1300 net/mptcp/subflow.c:730 tcp_get_cookie_sock+0xf0/0x5f0 net/ipv4/syncookies.c:201 cookie_v6_check+0x130f/0x1c50 net/ipv6/syncookies.c:261 tcp_v6_do_rcv+0x7e0/0x12b0 net/ipv6/tcp_ipv6.c:1147 tcp_v6_rcv+0x2494/0x2f50 net/ipv6/tcp_ipv6.c:1743 ip6_protocol_deliver_rcu+0xba3/0x1620 net/ipv6/ip6_input.c:438 ip6_input+0x1bc/0x470 net/ipv6/ip6_input.c:483 ipv6_rcv+0xef/0x2c0 include/linux/netfilter.h:302 __netif_receive_skb+0x1ea/0x6a0 net/core/dev.c:5525 process_backlog+0x353/0x660 net/core/dev.c:5967 __napi_poll+0xc6/0x5a0 net/core/dev.c:6534 net_rx_action+0x652/0xea0 net/core/dev.c:6601 __do_softirq+0x176/0x525 kernel/softirq.c:571 Freed by task 23093: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x40/0x70 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x50 mm/kasan/generic.c:516 ____kasan_slab_free+0x13a/0x1b0 mm/kasan/common.c:236 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1724 [inline] slab_free_freelist_hook mm/slub.c:1750 [inline] slab_free mm/slub.c:3661 [inline] __kmem_cache_free+0x1eb/0x340 mm/slub.c:3674 sk_prot_free net/core/sock.c:2074 [inline] __sk_destruct+0x4ad/0x620 net/core/sock.c:2160 tcp_v6_rcv+0x269c/0x2f50 net/ipv6/tcp_ipv6.c:1761 ip6_protocol_deliver_rcu+0xba3/0x1620 net/ipv6/ip6_input.c:438 ip6_input+0x1bc/0x470 net/ipv6/ip6_input.c:483 ipv6_rcv+0xef/0x2c0 include/linux/netfilter.h:302 __netif_receive_skb+0x1ea/0x6a0 net/core/dev.c:5525 process_backlog+0x353/0x660 net/core/dev.c:5967 __napi_poll+0xc6/0x5a0 net/core/dev.c:6534 net_rx_action+0x652/0xea0 net/core/dev.c:6601 __do_softirq+0x176/0x525 kernel/softirq.c:571 The buggy address belongs to the object at ffff88803bf72000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 2180 bytes inside of 4096-byte region [ffff88803bf72000, ffff88803bf73000) The buggy address belongs to the physical page: page:00000000a72e4e51 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x3bf70 head:00000000a72e4e51 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x100000000010200(slab|head|node=0|zone=1) raw: 0100000000010200 ffffea0000a0ea00 dead000000000002 ffff888100042140 raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88803bf72780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803bf72800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88803bf72880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88803bf72900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803bf72980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Prevent the MPTCP worker from freeing the first subflow for unaccepted socket when such sockets transition to TCP_CLOSE state, and let that happen at accept() or listener close() time. Fixes: b6985b9b8295 ("mptcp: use the workqueue to destroy unaccepted sockets") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Reported-by: Christoph Paasch Tested-by: Christoph Paasch Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 9 ++++----- net/mptcp/protocol.h | 3 ++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 76539d1004ebb..db5369b6442d0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2422,7 +2422,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out_release; } - dispose_it = !msk->subflow || ssk != msk->subflow->sk; + dispose_it = msk->free_first || ssk != msk->first; if (dispose_it) list_del(&subflow->node); @@ -2440,7 +2440,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { __mptcp_subflow_disconnect(ssk, subflow, flags); - msk->subflow->state = SS_UNCONNECTED; release_sock(ssk); goto out; @@ -3341,10 +3340,10 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - /* clears msk->subflow, allowing the following to close - * even the initial subflow - */ mptcp_dispose_initial_subflow(msk); + + /* allow the following to close even the initial subflow */ + msk->free_first = 1; mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 4ec8e0a81b5a4..e5d553dfc13fd 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -287,7 +287,8 @@ struct mptcp_sock { cork:1, nodelay:1, fastopening:1, - in_accept_queue:1; + in_accept_queue:1, + free_first:1; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; -- GitLab From 6673d9f1c2cd984390550dbdf7d5ae07b20abbf8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jan 2024 22:49:46 +0100 Subject: [PATCH 0132/2290] mptcp: fix data re-injection from stale subflow commit b6c620dc43ccb4e802894e54b651cf81495e9598 upstream. When the MPTCP PM detects that a subflow is stale, all the packet scheduler must re-inject all the mptcp-level unacked data. To avoid acquiring unneeded locks, it first try to check if any unacked data is present at all in the RTX queue, but such check is currently broken, as it uses TCP-specific helper on an MPTCP socket. Funnily enough fuzzers and static checkers are happy, as the accessed memory still belongs to the mptcp_sock struct, and even from a functional perspective the recovery completed successfully, as the short-cut test always failed. A recent unrelated TCP change - commit d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") - exposed the issue, as the tcp field reorganization makes the mptcp code always skip the re-inection. Fix the issue dropping the bogus call: we are on a slow path, the early optimization proved once again to be evil. Fixes: 1e1d9d6f119c ("mptcp: handle pending data on closed subflow") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/468 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-1-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index db5369b6442d0..0bff0105093f6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2336,9 +2336,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(mptcp_sk(sk))) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue -- GitLab From 66e142fbe13fa57ea305a7ba1f60297d0a467f44 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:47 +0100 Subject: [PATCH 0133/2290] selftests: mptcp: add missing kconfig for NF Filter commit 3645c844902bd4e173d6704fc2a37e8746904d67 upstream. Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table. It is then required to have IP_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 8d014eaa9254 ("selftests: mptcp: add ADD_ADDR timeout test case") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44dae8..2a00bf4acdfa8 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,6 +22,7 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_ACT_CSUM=m -- GitLab From 2c7337ec22bc1a34f2fc4b4c2ae0f5e4b1a45b63 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:48 +0100 Subject: [PATCH 0134/2290] selftests: mptcp: add missing kconfig for NF Filter in v6 commit 8c86fad2cecdc6bf7283ecd298b4d0555bd8b8aa upstream. Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table for IPv6. It is then required to have IP6_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 523514ed0a99 ("selftests: mptcp: add ADD_ADDR IPv6 test cases") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-3-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2a00bf4acdfa8..26fe466f803dd 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -25,6 +25,7 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y -- GitLab From fc0e9cff9db02701dcb09bb70cf1531abab9b422 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:49 +0100 Subject: [PATCH 0135/2290] selftests: mptcp: add missing kconfig for NF Mangle commit 2d41f10fa497182df9012d3e95d9cea24eb42e61 upstream. Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Mangle table, only in IPv4. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-4-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 26fe466f803dd..4f80014cae494 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -23,6 +23,7 @@ CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP6_NF_FILTER=m -- GitLab From d50d031919b259e49957616f3e79b72a7e7b99aa Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:50 +0100 Subject: [PATCH 0136/2290] selftests: mptcp: increase timeout to 30 min commit 4d4dfb2019d7010efb65926d9d1c1793f9a367c6 upstream. On very slow environments -- e.g. when QEmu is used without KVM --, mptcp_join.sh selftest can take a bit more than 20 minutes. Bump the default timeout by 50% as it seems normal to take that long on some environments. When a debug kernel config is used, this selftest will take even longer, but that's certainly not a common test env to consider for the timeout. The Fixes tag that has been picked here is there simply to help having this patch backported to older stable versions. It is difficult to point to the exact commit that made some env reaching the timeout from time to time. Fixes: d17b968b9876 ("selftests: mptcp: increase timeout to 20 minutes") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-5-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf05db6..abc5648b59abd 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 -- GitLab From d288d2e3e65a3a3b7b4e0eb708c394145e5120d5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:49 +0100 Subject: [PATCH 0137/2290] mptcp: drop the push_pending field commit bdd70eb68913c960acb895b00a8c62eb64715b1f upstream. Such field is there to avoid acquiring the data lock in a few spots, but it adds complexity to the already non trivial locking schema. All the relevant call sites (mptcp-level re-injection, set socket options), are slow-path, drop such field in favor of 'cb_flags', adding the relevant locking. This patch could be seen as an improvement, instead of a fix. But it simplifies the next patch. The 'Fixes' tag has been added to help having this series backported to stable. Fixes: e9d09baca676 ("mptcp: avoid atomic bit manipulation when possible") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 12 ++++++------ net/mptcp/protocol.h | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0bff0105093f6..859b18cb8e4f6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1582,8 +1582,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, void mptcp_check_and_set_pending(struct sock *sk) { - if (mptcp_send_head(sk)) - mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); + if (mptcp_send_head(sk)) { + mptcp_data_lock(sk); + mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING); + mptcp_data_unlock(sk); + } } void __mptcp_push_pending(struct sock *sk, unsigned int flags) @@ -3140,7 +3143,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->last_snd = NULL; WRITE_ONCE(msk->flags, 0); msk->cb_flags = 0; - msk->push_pending = 0; msk->recovery = false; msk->can_ack = false; msk->fully_established = false; @@ -3384,8 +3386,7 @@ static void mptcp_release_cb(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); for (;;) { - unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | - msk->push_pending; + unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED); struct list_head join_list; if (!flags) @@ -3401,7 +3402,6 @@ static void mptcp_release_cb(struct sock *sk) * datapath acquires the msk socket spinlock while helding * the subflow socket lock */ - msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e5d553dfc13fd..259672cc344f3 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -272,7 +272,6 @@ struct mptcp_sock { int rmem_released; unsigned long flags; unsigned long cb_flags; - unsigned long push_pending; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; -- GitLab From e373bfc8ec3d6496ec7e11dd7f4d087a44b1009a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 8 Feb 2024 19:03:53 +0100 Subject: [PATCH 0138/2290] mptcp: check addrs list in userspace_pm_get_local_id commit f012d796a6de662692159c539689e47e662853a8 upstream. Before adding a new entry in mptcp_userspace_pm_get_local_id(), it's better to check whether this address is already in userspace pm local address list. If it's in the list, no need to add a new entry, just return it's address ID and use this address. Fixes: 8b20137012d9 ("mptcp: read attributes of addr entries managed by userspace PMs") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 38cbdc66d8bff..2e1e0d0e3ec60 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -132,10 +132,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { - struct mptcp_pm_addr_entry new_entry; + struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&e->addr, skc, false)) { + entry = e; + break; + } + } + spin_unlock_bh(&msk->pm.lock); + if (entry) + return entry->addr.id; + memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); new_entry.addr = *skc; new_entry.addr.id = 0; -- GitLab From 75500e7ba2a2719ff8a6721a5ac17954eae55bf9 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 18 Dec 2023 08:54:00 +0100 Subject: [PATCH 0139/2290] media: Revert "media: rkisp1: Drop IRQF_SHARED" commit a107d643b2a3382e0a2d2c4ef08bf8c6bff4561d upstream. This reverts commit 85d2a31fe4d9be1555f621ead7a520d8791e0f74. The rkisp1 does share interrupt lines on some platforms, after all. Thus we need to revert this, and implement a fix for the rkisp1 shared irq handling in a follow-up patch. Closes: https://lore.kernel.org/all/87o7eo8vym.fsf@gmail.com/ Link: https://lore.kernel.org/r/20231218-rkisp-shirq-fix-v1-1-173007628248@ideasonboard.com Reported-by: Mikhail Rudenko Signed-off-by: Tomi Valkeinen Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index aeb6bb63667eb..41abb18b00acb 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -559,7 +559,7 @@ static int rkisp1_probe(struct platform_device *pdev) rkisp1->irqs[il] = irq; } - ret = devm_request_irq(dev, irq, info->isrs[i].isr, 0, + ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED, dev_driver_string(dev), dev); if (ret) { dev_err(dev, "request irq failed: %d\n", ret); -- GitLab From 6bb22ac1d11d7d20f91e7fd2e657a9e5f6db65e0 Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Fri, 9 Feb 2024 10:07:34 -0800 Subject: [PATCH 0140/2290] scsi: Revert "scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock" commit 977fe773dcc7098d8eaf4ee6382cb51e13e784cb upstream. This reverts commit 1a1975551943f681772720f639ff42fbaa746212. This commit causes interrupts to be lost for FCoE devices, since it changed sping locks from "bh" to "irqsave". Instead, a work queue should be used, and will be addressed in a separate commit. Fixes: 1a1975551943 ("scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock") Signed-off-by: Lee Duncan Link: https://lore.kernel.org/r/c578cdcd46b60470535c4c4a953e6a1feca0dffd.1707500786.git.lduncan@suse.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/fcoe/fcoe_ctlr.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 8a4124e7d2043..ddc048069af25 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip) { struct fcoe_fcf *sel; struct fcoe_fcf *fcf; - unsigned long flags; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = NULL; list_for_each_entry(fcf, &fip->fcfs, list) fcf->flogi_sent = 0; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); sel = fip->sel_fcf; if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr)) @@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, { struct fc_frame *fp; struct fc_frame_header *fh; - unsigned long flags; u16 old_xid; u8 op; u8 mac[ETH_ALEN]; @@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, op = FIP_DT_FLOGI; if (fip->mode == FIP_MODE_VN2VN) break; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = skb; fip->flogi_req_send = 1; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); schedule_work(&fip->timer_work); return -EINPROGRESS; case ELS_FDISC: @@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip) static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; int error; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n"); fcf = fcoe_ctlr_select(fip); if (!fcf || fcf->flogi_sent) { @@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) fcoe_ctlr_solicit(fip, NULL); error = fcoe_ctlr_flogi_send_locked(fip); } - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); mutex_unlock(&fip->ctlr_mutex); return error; } @@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); fcf = fip->sel_fcf; if (!fcf || !fip->flogi_req_send) goto unlock; @@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) } else /* XXX */ LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n"); unlock: - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); } /** -- GitLab From ff70e6ff6fc2413caf33410af7462d1f584d927e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 7 Feb 2024 23:52:54 -0600 Subject: [PATCH 0141/2290] Revert "drm/amd: flush any delayed gfxoff on suspend entry" commit 916361685319098f696b798ef1560f69ed96e934 upstream. commit ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") caused GFXOFF control to be used more heavily and the codepath that was removed from commit 0dee72639533 ("drm/amd: flush any delayed gfxoff on suspend entry") now can be exercised at suspend again. Users report that by using GNOME to suspend the lockscreen trigger will cause SDMA traffic and the system can deadlock. This reverts commit 0dee726395333fea833eaaf838bc80962df886c8. Acked-by: Alex Deucher Fixes: ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 ++++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4b91f95066eca..6a4749c0c5a58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4203,7 +4203,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); - flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 23f0067f92e4e..b803e785d3aff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -585,8 +585,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { -- GitLab From dc3890441c9eb21b28d9831b269d394f4674ab29 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 23 Jan 2024 19:14:14 +0100 Subject: [PATCH 0142/2290] drm/virtio: Set segment size for virtio_gpu device commit 9c64e749cebd9c2d3d55261530a98bcccb83b950 upstream. Set the segment size of the virtio_gpu device to the value used by the drm helpers when allocating sg lists to fix the following complaint from DMA_API debug code: DMA-API: virtio-pci 0000:07:00.0: mapping sg segment longer than device claims to support [len=262144] [max=65536] Cc: stable@vger.kernel.org Tested-by: Zhenyu Zhang Acked-by: Vivek Kasireddy Signed-off-by: Sebastian Ott Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/7258a4cc-da16-5c34-a042-2a23ee396d56@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/virtio/virtgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 0035affc3e590..9b2d235168bb6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -93,6 +93,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev) goto err_free; } + dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX); ret = virtio_gpu_init(vdev, dev); if (ret) goto err_free; -- GitLab From d0302e2a5732fdc9706bc5d21ac349a58f6f7d15 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 11:44:03 +0100 Subject: [PATCH 0143/2290] lsm: fix the logic in security_inode_getsecctx() commit 99b817c173cd213671daecd25ca27f56b0c7c4ec upstream. The inode_getsecctx LSM hook has previously been corrected to have -EOPNOTSUPP instead of 0 as the default return value to fix BPF LSM behavior. However, the call_int_hook()-generated loop in security_inode_getsecctx() was left treating 0 as the neutral value, so after an LSM returns 0, the loop continues to try other LSMs, and if one of them returns a non-zero value, the function immediately returns with said value. So in a situation where SELinux and the BPF LSMs registered this hook, -EOPNOTSUPP would be incorrectly returned whenever SELinux returned 0. Fix this by open-coding the call_int_hook() loop and making it use the correct LSM_RET_DEFAULT() value as the neutral one, similar to what other hooks do. Cc: stable@vger.kernel.org Reported-by: Stephen Smalley Link: https://lore.kernel.org/selinux/CAEjxPJ4ev-pasUwGx48fDhnmjBnq_Wh90jYPwRQRAqXxmOKD4Q@mail.gmail.com/ Link: https://bugzilla.redhat.com/show_bug.cgi?id=2257983 Fixes: b36995b8609a ("lsm: fix default return value for inode_getsecctx") Signed-off-by: Ondrej Mosnacek Reviewed-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/security.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/security/security.c b/security/security.c index fc15b963e1028..1b504c296551c 100644 --- a/security/security.c +++ b/security/security.c @@ -2186,7 +2186,19 @@ EXPORT_SYMBOL(security_inode_setsecctx); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) { + rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(inode_getsecctx)) + return rc; + } + + return LSM_RET_DEFAULT(inode_getsecctx); } EXPORT_SYMBOL(security_inode_getsecctx); -- GitLab From e1aae84f42ecaca62afff24aa741ca3b7a8c2906 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 1 Feb 2024 20:53:18 +0900 Subject: [PATCH 0144/2290] firewire: core: correct documentation of fw_csr_string() kernel API commit 5f9ab17394f831cb7986ec50900fa37507a127f1 upstream. Against its current description, the kernel API can accepts all types of directory entries. This commit corrects the documentation. Cc: stable@vger.kernel.org Fixes: 3c2c58cb33b3 ("firewire: core: fw_csr_string addendum") Link: https://lore.kernel.org/r/20240130100409.30128-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/core-device.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 74bab06283b71..1879ec27c0236 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -100,10 +100,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) * @buf: where to put the string * @size: size of @buf, in bytes * - * The string is taken from a minimal ASCII text descriptor leaf after - * the immediate entry with @key. The string is zero-terminated. - * An overlong string is silently truncated such that it and the - * zero byte fit into @size. + * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the + * @key. The string is zero-terminated. An overlong string is silently truncated such that it + * and the zero byte fit into @size. * * Returns strlen(buf) or a negative error code. */ -- GitLab From 38acb2e9be54afbc2914ff4eaf3b4b2ab5e67320 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Relvas?= Date: Wed, 31 Jan 2024 11:34:09 +0000 Subject: [PATCH 0145/2290] ALSA: hda/realtek: Apply headset jack quirk for non-bass alc287 thinkpads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2468e8922d2f6da81a6192b73023eff67e3fefdd upstream. There currently exists two thinkpad headset jack fixups: ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK ALC285_FIXUP_THINKPAD_HEADSET_JACK The latter is applied to alc285 and alc287 thinkpads which contain bass speakers. However, the former was only being applied to alc285 thinkpads, leaving non-bass alc287 thinkpads with no headset button controls. This patch fixes that by adding ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK to the alc287 chains, allowing the detection of headset buttons. Signed-off-by: José Relvas Cc: Link: https://lore.kernel.org/r/20240131113407.34698-3-josemonsantorelvas@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 274f528eba0ba..0c386820861cf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9377,7 +9377,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cs35l41_fixup_i2c_two, .chained = true, - .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC245_FIXUP_HP_MUTE_LED_COEFBIT] = { .type = HDA_FIXUP_FUNC, @@ -9392,6 +9392,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC287_FIXUP_THINKPAD_I2S_SPK] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_bind_dacs, + .chained = true, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { .type = HDA_FIXUP_FUNC, -- GitLab From 76690354e6ace5ed2e0c579906bce44d7ccc9e4e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 12 Feb 2024 19:05:10 -0700 Subject: [PATCH 0146/2290] kbuild: Fix changing ELF file type for output of gen_btf for big endian commit e3a9ee963ad8ba677ca925149812c5932b49af69 upstream. Commit 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") changed the ELF type of .btf.vmlinux.bin.o to ET_REL via dd, which works fine for little endian platforms: 00000000 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 03 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................| +00000010 01 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................| However, for big endian platforms, it changes the wrong byte, resulting in an invalid ELF file type, which ld.lld rejects: 00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| +00000010 01 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| Type: : 103 ld.lld: error: .btf.vmlinux.bin.o: unknown file type Fix this by updating the entire 16-bit e_type field rather than just a single byte, so that everything works correctly for all platforms and linkers. 00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| +00000010 00 01 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| Type: REL (Relocatable file) While in the area, update the comment to mention that binutils 2.35+ matches LLD's behavior of rejecting an ET_EXEC input, which occurred after the comment was added. Cc: stable@vger.kernel.org Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") Link: https://github.com/llvm/llvm-project/pull/75643 Suggested-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Reviewed-by: Fangrui Song Reviewed-by: Nicolas Schier Reviewed-by: Kees Cook Reviewed-by: Justin Stitt Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/link-vmlinux.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 32e573943cf03..458b2948b580d 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -135,8 +135,13 @@ gen_btf() ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \ --strip-all ${1} ${2} 2>/dev/null # Change e_type to ET_REL so that it can be used to link final vmlinux. - # Unlike GNU ld, lld does not allow an ET_EXEC input. - printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none + # GNU ld 2.35+ and lld do not allow an ET_EXEC input. + if is_enabled CONFIG_CPU_BIG_ENDIAN; then + et_rel='\0\1' + else + et_rel='\1\0' + fi + printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none } # Create ${2} .S file with all symbols from the ${1} object file -- GitLab From 5c0c5ffaed73cbae6c317374dc32ba6cacc60895 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 25 Jan 2024 12:53:09 +0300 Subject: [PATCH 0147/2290] nfc: nci: free rx_data_reassembly skb on NCI device cleanup commit bfb007aebe6bff451f7f3a4be19f4f286d0d5d9c upstream. rx_data_reassembly skb is stored during NCI data exchange for processing fragmented packets. It is dropped only when the last fragment is processed or when an NTF packet with NCI_OP_RF_DEACTIVATE_NTF opcode is received. However, the NCI device may be deallocated before that which leads to skb leak. As by design the rx_data_reassembly skb is bound to the NCI device and nothing prevents the device to be freed before the skb is processed in some way and cleaned, free it on the NCI device cleanup. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Cc: stable@vger.kernel.org Reported-by: syzbot+6b7c68d9c21e4ee4251b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000f43987060043da7b@google.com/ Signed-off-by: Fedor Pchelkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 7535afd1537e9..b5071a2f597d4 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1207,6 +1207,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); -- GitLab From 56440799fc4621c279df16176f83a995d056023a Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jan 2024 02:21:47 -0800 Subject: [PATCH 0148/2290] net: hsr: remove WARN_ONCE() in send_hsr_supervision_frame() commit 37e8c97e539015637cb920d3e6f1e404f707a06e upstream. Syzkaller reported [1] hitting a warning after failing to allocate resources for skb in hsr_init_skb(). Since a WARN_ONCE() call will not help much in this case, it might be prudent to switch to netdev_warn_once(). At the very least it will suppress syzkaller reports such as [1]. Just in case, use netdev_warn_once() in send_prp_supervision_frame() for similar reasons. [1] HSR: Could not send supervision frame WARNING: CPU: 1 PID: 85 at net/hsr/hsr_device.c:294 send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 RIP: 0010:send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 ... Call Trace: hsr_announce+0x114/0x370 net/hsr/hsr_device.c:382 call_timer_fn+0x193/0x590 kernel/time/timer.c:1700 expire_timers kernel/time/timer.c:1751 [inline] __run_timers+0x764/0xb20 kernel/time/timer.c:2022 run_timer_softirq+0x58/0xd0 kernel/time/timer.c:2035 __do_softirq+0x21a/0x8de kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0xb7/0x120 kernel/softirq.c:644 sysvec_apic_timer_interrupt+0x95/0xb0 arch/x86/kernel/apic/apic.c:1076 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:649 ... This issue is also found in older kernels (at least up to 5.10). Cc: stable@vger.kernel.org Reported-by: syzbot+3ae0a3f42c84074b7c8e@syzkaller.appspotmail.com Fixes: 121c33b07b31 ("net: hsr: introduce common code for skb initialization") Signed-off-by: Nikita Zhandarovich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b1e86a7265b32..83906d093f0ae 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -291,7 +291,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -338,7 +338,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } -- GitLab From 55e891f4a27269847b151b6bfd77ac720408d176 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 26 Jan 2024 10:10:41 +0100 Subject: [PATCH 0149/2290] net: stmmac: do not clear TBS enable bit on link up/down commit 4896bb7c0b31a0a3379b290ea7729900c59e0c69 upstream. With the dma conf being reallocated on each call to stmmac_open(), any information in there is lost, unless we specifically handle it. The STMMAC_TBS_EN bit is set when adding an etf qdisc, and the etf qdisc therefore would stop working when link was set down and then back up. Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open") Cc: stable@vger.kernel.org Signed-off-by: Esben Haabendal Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e988a60c8561b..66178ce6d000e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3826,6 +3826,9 @@ static int __stmmac_open(struct net_device *dev, priv->rx_copybreak = STMMAC_RX_COPYBREAK; buf_sz = dma_conf->dma_buf_sz; + for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) + if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) + dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf)); stmmac_reset_queues_param(priv); -- GitLab From 6286435cd06c560d46bcc22e5d6b411eec464c43 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 29 Jan 2024 14:03:08 +0100 Subject: [PATCH 0150/2290] xen-netback: properly sync TX responses commit 7b55984c96ffe9e236eb9c82a2196e0b1f84990d upstream. Invoking the make_tx_response() / push_tx_responses() pair with no lock held would be acceptable only if all such invocations happened from the same context (NAPI instance or dealloc thread). Since this isn't the case, and since the interface "spec" also doesn't demand that multicast operations may only be performed with no in-flight transmits, MCAST_{ADD,DEL} processing also needs to acquire the response lock around the invocations. To prevent similar mistakes going forward, "downgrade" the present functions to private helpers of just the two remaining ones using them directly, with no forward declarations anymore. This involves renaming what so far was make_tx_response(), for the new function of that name to serve the new (wrapper) purpose. While there, - constify the txp parameters, - correct xenvif_idx_release()'s status parameter's type, - rename {,_}make_tx_response()'s status parameters for consistency with xenvif_idx_release()'s. Fixes: 210c34dcd8d9 ("xen-netback: add support for multicast control") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/980c6c3d-e10e-4459-8565-e8fbde122f00@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/netback.c | 84 +++++++++++++++---------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 2716040985748..0d51c900c5538 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,13 +104,12 @@ bool provides_xdp_headroom = true; module_param(provides_xdp_headroom, bool, 0644); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); @@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; nr_slots--) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); ++txp; continue; } @@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, - flags); continue; } @@ -997,7 +981,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -1009,7 +992,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } @@ -1444,8 +1426,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + RING_IDX i = queue->tx.rsp_prod_pvt; + struct xen_netif_tx_response *resp; + + resp = RING_GET_RESPONSE(&queue->tx, i); + resp->id = txp->id; + resp->status = status; + + while (extra_count-- != 0) + RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + + queue->tx.rsp_prod_pvt = ++i; +} + +static void push_tx_responses(struct xenvif_queue *queue) +{ + int notify; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); + if (notify) + notify_remote_via_irq(queue->tx_irq); +} + static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) + s8 status) { struct pending_tx_info *pending_tx_info; pending_ring_idx_t index; @@ -1455,8 +1464,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_lock_irqsave(&queue->response_lock, flags); - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); /* Release the pending index before pusing the Tx response so * its available before a new Tx request is pushed by the @@ -1470,32 +1479,19 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_unlock_irqrestore(&queue->response_lock, flags); } - static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { - RING_IDX i = queue->tx.rsp_prod_pvt; - struct xen_netif_tx_response *resp; - - resp = RING_GET_RESPONSE(&queue->tx, i); - resp->id = txp->id; - resp->status = st; - - while (extra_count-- != 0) - RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + unsigned long flags; - queue->tx.rsp_prod_pvt = ++i; -} + spin_lock_irqsave(&queue->response_lock, flags); -static void push_tx_responses(struct xenvif_queue *queue) -{ - int notify; + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); - if (notify) - notify_remote_via_irq(queue->tx_irq); + spin_unlock_irqrestore(&queue->response_lock, flags); } static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) -- GitLab From a5767decf74343fd4808b751c100161e2b489ffe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Jun 2023 18:41:59 +0900 Subject: [PATCH 0151/2290] modpost: propagate W=1 build option to modpost commit 20ff36856fe00879f82de71fe6f1482ca1b72334 upstream. "No build warning" is a strong requirement these days, so you must fix all issues before enabling a new warning flag. We often add a new warning to W=1 first so that the kbuild test robot blocks new breakages. This commit allows modpost to show extra warnings only when W=1 (or KBUILD_EXTRA_WARN=1) is given. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Greg Kroah-Hartman --- scripts/Makefile.modpost | 1 + scripts/mod/modpost.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index e41dee64d429c..39aea753d0bdc 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -44,6 +44,7 @@ modpost-args = \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_NSDEPS),-d $(MODULES_NSDEPS)) \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ + $(if $(findstring 1, $(KBUILD_EXTRA_WARN)),-W) \ -o $@ # 'make -i -k' ignores compile errors, and builds as many modules as possible. diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e6be7fc2625fd..9ef2f6423e808 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -41,6 +41,8 @@ static bool allow_missing_ns_imports; static bool error_occurred; +static bool extra_warn; + /* * Cut off the warnings when there are too many. This typically occurs when * vmlinux is missing. ('make modules' without building vmlinux.) @@ -2290,7 +2292,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:mnT:o:awENd:")) != -1) { + while ((opt = getopt(argc, argv, "ei:mnT:o:aWwENd:")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2315,6 +2317,9 @@ int main(int argc, char **argv) case 'T': files_source = optarg; break; + case 'W': + extra_warn = true; + break; case 'w': warn_unresolved = true; break; -- GitLab From 999ecc936a99cdd3392a652fa3686e74bae5d671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 30 Sep 2023 18:52:04 +0200 Subject: [PATCH 0152/2290] modpost: Don't let "driver"s reference .exit.* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f177cd0c15fcc7bdbb68d8d1a3166dead95314c8 upstream. Drivers must not reference functions marked with __exit as these likely are not available when the code is built-in. There are few creative offenders uncovered for example in ARCH=amd64 allmodconfig builds. So only trigger the section mismatch warning for W=1 builds. The dual rule that drivers must not reference .init.* is implemented since commit 0db252452378 ("modpost: don't allow *driver to reference .init.*") which however missed that .exit.* should be handled in the same way. Thanks to Masahiro Yamada and Arnd Bergmann who gave valuable hints to find this improvement. Signed-off-by: Uwe Kleine-König Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 9ef2f6423e808..9a4a220ecbc26 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1087,9 +1087,20 @@ static int secref_whitelist(const struct sectioncheck *mismatch, "*_console"))) return 0; - /* symbols in data sections that may refer to meminit/exit sections */ + /* symbols in data sections that may refer to meminit sections */ if (match(fromsec, PATTERNS(DATA_SECTIONS)) && - match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_EXIT_SECTIONS)) && + match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_XXXEXIT_SECTIONS)) && + match(fromsym, PATTERNS("*driver"))) + return 0; + + /* + * symbols in data sections must not refer to .exit.*, but there are + * quite a few offenders, so hide these unless for W=1 builds until + * these are fixed. + */ + if (!extra_warn && + match(fromsec, PATTERNS(DATA_SECTIONS)) && + match(tosec, PATTERNS(EXIT_SECTIONS)) && match(fromsym, PATTERNS("*driver"))) return 0; -- GitLab From 519b7da44ee4d84402522e860642a20183cb86db Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 23 Oct 2023 02:06:05 +0900 Subject: [PATCH 0153/2290] linux/init: remove __memexit* annotations commit 6a4e59eeedc3018cb57722eecfcbb49431aeb05f upstream. We have never used __memexit, __memexitdata, or __memexitconst. These were unneeded. Signed-off-by: Masahiro Yamada Acked-by: Arnd Bergmann [nathan: Remove additional case of XXXEXIT_TO_SOME_EXIT due to lack of 78dac1a22944 in 6.1] Signed-off-by: Nathan Chancellor Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 6 ------ include/linux/init.h | 3 --- scripts/mod/modpost.c | 16 +++------------- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7ad6f51b3d914..1d1f480a5e9e4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -351,7 +351,6 @@ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data*) \ - MEM_KEEP(exit.data*) \ *(.data.unlikely) \ __start_once = .; \ *(.data.once) \ @@ -546,7 +545,6 @@ __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ *(.ref.rodata) \ MEM_KEEP(init.rodata) \ - MEM_KEEP(exit.rodata) \ } \ \ /* Built-in module parameters. */ \ @@ -601,7 +599,6 @@ *(.ref.text) \ *(.text.asan.* .text.tsan.*) \ MEM_KEEP(init.text*) \ - MEM_KEEP(exit.text*) \ /* sched.text is aling to function alignment to secure we have same @@ -751,13 +748,10 @@ *(.exit.data .exit.data.*) \ *(.fini_array .fini_array.*) \ *(.dtors .dtors.*) \ - MEM_DISCARD(exit.data*) \ - MEM_DISCARD(exit.rodata*) #define EXIT_TEXT \ *(.exit.text) \ *(.text.exit) \ - MEM_DISCARD(exit.text) #define EXIT_CALL \ *(.exitcall.exit) diff --git a/include/linux/init.h b/include/linux/init.h index 077d7f93b402f..c96aea3229ca1 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -87,9 +87,6 @@ __latent_entropy #define __meminitdata __section(".meminit.data") #define __meminitconst __section(".meminit.rodata") -#define __memexit __section(".memexit.text") __exitused __cold notrace -#define __memexitdata __section(".memexit.data") -#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 9a4a220ecbc26..61a13d55c0e31 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -811,7 +811,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_INIT_TEXT_SECTIONS \ ".init.text", ".meminit.text" #define ALL_EXIT_TEXT_SECTIONS \ - ".exit.text", ".memexit.text" + ".exit.text" #define ALL_PCI_INIT_SECTIONS \ ".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \ @@ -819,10 +819,9 @@ static void check_section(const char *modname, struct elf_info *elf, ".pci_fixup_resume_early", ".pci_fixup_suspend" #define ALL_XXXINIT_SECTIONS MEM_INIT_SECTIONS -#define ALL_XXXEXIT_SECTIONS MEM_EXIT_SECTIONS #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS -#define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS +#define ALL_EXIT_SECTIONS EXIT_SECTIONS #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ @@ -835,7 +834,6 @@ static void check_section(const char *modname, struct elf_info *elf, #define MEM_INIT_SECTIONS ".meminit.*" #define EXIT_SECTIONS ".exit.*" -#define MEM_EXIT_SECTIONS ".memexit.*" #define ALL_TEXT_SECTIONS ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \ TEXT_SECTIONS, OTHER_TEXT_SECTIONS @@ -864,7 +862,6 @@ enum mismatch { TEXT_TO_ANY_EXIT, DATA_TO_ANY_EXIT, XXXINIT_TO_SOME_INIT, - XXXEXIT_TO_SOME_EXIT, ANY_INIT_TO_ANY_EXIT, ANY_EXIT_TO_ANY_INIT, EXPORT_TO_INIT_EXIT, @@ -939,12 +936,6 @@ static const struct sectioncheck sectioncheck[] = { .bad_tosec = { INIT_SECTIONS, NULL }, .mismatch = XXXINIT_TO_SOME_INIT, }, -/* Do not reference exit code/data from memexit code/data */ -{ - .fromsec = { ALL_XXXEXIT_SECTIONS, NULL }, - .bad_tosec = { EXIT_SECTIONS, NULL }, - .mismatch = XXXEXIT_TO_SOME_EXIT, -}, /* Do not use exit code/data from init code */ { .fromsec = { ALL_INIT_SECTIONS, NULL }, @@ -1089,7 +1080,7 @@ static int secref_whitelist(const struct sectioncheck *mismatch, /* symbols in data sections that may refer to meminit sections */ if (match(fromsec, PATTERNS(DATA_SECTIONS)) && - match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_XXXEXIT_SECTIONS)) && + match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS)) && match(fromsym, PATTERNS("*driver"))) return 0; @@ -1267,7 +1258,6 @@ static void report_sec_mismatch(const char *modname, case TEXT_TO_ANY_EXIT: case DATA_TO_ANY_EXIT: case XXXINIT_TO_SOME_INIT: - case XXXEXIT_TO_SOME_EXIT: case ANY_INIT_TO_ANY_EXIT: case ANY_EXIT_TO_ANY_INIT: warn("%s: section mismatch in reference: %s (section: %s) -> %s (section: %s)\n", -- GitLab From 36fbcadc208e968008ed3ffb89cb2a265c4f276c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 13 Dec 2022 11:35:29 -0700 Subject: [PATCH 0154/2290] modpost: Include '.text.*' in TEXT_SECTIONS commit 19331e84c3873256537d446afec1f6c507f8c4ef upstream. Commit 6c730bfc894f ("modpost: handle -ffunction-sections") added ".text.*" to the OTHER_TEXT_SECTIONS macro to fix certain section mismatch warnings. Unfortunately, this makes it impossible for modpost to warn about section mismatches with LTO, which implies '-ffunction-sections', as all functions are put in their own '.text.' sections, which may still reference functions in sections they are not supposed to, such as __init. Fix this by moving ".text.*" into TEXT_SECTIONS, so that configurations with '-ffunction-sections' will see warnings about mismatched sections. Link: https://lore.kernel.org/Y39kI3MOtVI5BAnV@google.com/ Reported-by: Vincent Donnefort Reviewed-and-tested-by: Alexander Lobakin Reviewed-by: Sami Tolvanen Tested-by: Vincent Donnefort Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 61a13d55c0e31..233f8eae09a67 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -824,10 +824,10 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_EXIT_SECTIONS EXIT_SECTIONS #define DATA_SECTIONS ".data", ".data.rel" -#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ +#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" #define INIT_SECTIONS ".init.*" -- GitLab From 064cb9dd10ffd637c212ca9afa3617961975d1fb Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:54 -0700 Subject: [PATCH 0155/2290] um: Fix adding '-no-pie' for clang commit 846cfbeed09b45d985079a9173cf390cc053715b upstream. The kernel builds with -fno-PIE, so commit 883354afbc10 ("um: link vmlinux with -no-pie") added the compiler linker flag '-no-pie' via cc-option because '-no-pie' was only supported in GCC 6.1.0 and newer. While this works for GCC, this does not work for clang because cc-option uses '-c', which stops the pipeline right before linking, so '-no-pie' is unconsumed and clang warns, causing cc-option to fail just as it would if the option was entirely unsupported: $ clang -Werror -no-pie -c -o /dev/null -x c /dev/null clang-16: error: argument unused during compilation: '-no-pie' [-Werror,-Wunused-command-line-argument] A recent version of clang exposes this because it generates a relocation under '-mcmodel=large' that is not supported in PIE mode: /usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE /usr/sbin/ld: failed to set dynamic section sizes: bad value clang: error: linker command failed with exit code 1 (use -v to see invocation) Remove the cc-option check altogether. It is wasteful to invoke the compiler to check for '-no-pie' because only one supported compiler version does not support it, GCC 5.x (as it is supported with the minimum version of clang and GCC 6.1.0+). Use a combination of the gcc-min-version macro and CONFIG_CC_IS_CLANG to unconditionally add '-no-pie' with CONFIG_LD_SCRIPT_DYN=y, so that it is enabled with all compilers that support this. Furthermore, using gcc-min-version can help turn this back into LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie when the minimum version of GCC is bumped past 6.1.0. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1982 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/um/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 3dbd0e3b660ea..778c50f273992 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -118,7 +118,9 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +ifdef CONFIG_LD_SCRIPT_DYN +LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie +endif LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ -- GitLab From 5953f2c7f84d633fd9cbe5975c298fa04308bcb5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:55 -0700 Subject: [PATCH 0156/2290] modpost: Add '.ltext' and '.ltext.*' to TEXT_SECTIONS commit 397586506c3da005b9333ce5947ad01e8018a3be upstream. After the linked LLVM change, building ARCH=um defconfig results in a segmentation fault in modpost. Prior to commit a23e7584ecf3 ("modpost: unify 'sym' and 'to' in default_mismatch_handler()"), there was a warning: WARNING: modpost: vmlinux.o(__ex_table+0x88): Section mismatch in reference to the .ltext:(unknown) WARNING: modpost: The relocation at __ex_table+0x88 references section ".ltext" which is not in the list of authorized sections. If you're adding a new section and/or if this reference is valid, add ".ltext" to the list of authorized sections to jump to on fault. This can be achieved by adding ".ltext" to OTHER_TEXT_SECTIONS in scripts/mod/modpost.c. The linked LLVM change moves global objects to the '.ltext' (and '.ltext.*' with '-ffunction-sections') sections with '-mcmodel=large', which ARCH=um uses. These sections should be handled just as '.text' and '.text.*' are, so add them to TEXT_SECTIONS. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1981 Link: https://github.com/llvm/llvm-project/commit/4bf8a688956a759b7b6b8d94f42d25c13c7af130 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 233f8eae09a67..686eed37f9781 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -825,7 +825,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ - ".kprobes.text", ".cpuidle.text", ".noinstr.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text", \ + ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" -- GitLab From d27f6d6eacacd8d25584e8c5cc59cfaebc81ca70 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Thu, 1 Feb 2024 09:21:14 -0300 Subject: [PATCH 0157/2290] ALSA: hda/realtek: Enable headset mic on Vaio VJFE-ADL commit c7de2d9bb68a5fc71c25ff96705a80a76c8436eb upstream. Vaio VJFE-ADL is equipped with ALC269VC, and it needs ALC298_FIXUP_SPK_VOLUME quirk to make its headset mic work. Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20240201122114.30080-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0c386820861cf..6e94161e46ce1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10053,6 +10053,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), + SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), -- GitLab From a6bc85847272c4d7bd887ab9abdcd7213ff5dee7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 16:12:06 +0100 Subject: [PATCH 0158/2290] ASoC: codecs: wcd938x: handle deferred probe commit 086df711d9b886194481b4fbe525eb43e9ae7403 upstream. WCD938x sound codec driver ignores return status of getting regulators and returns EINVAL instead of EPROBE_DEFER. If regulator provider probes after the codec, system is left without probed audio: wcd938x_codec audio-codec: wcd938x_probe: Fail to obtain platform data wcd938x_codec: probe of audio-codec failed with error -22 Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117151208.1219755-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index a2abd1a111612..e80be4e4fa8b4 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3588,7 +3588,7 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_populate_dt_data(wcd938x, dev); if (ret) { dev_err(dev, "%s: Fail to obtain platform data\n", __func__); - return -EINVAL; + return ret; } ret = wcd938x_add_slave_components(wcd938x, dev, &match); -- GitLab From 6c65eb988d4adff9f3a472d8484de4503641ee43 Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Mon, 22 Jan 2024 18:47:10 +0000 Subject: [PATCH 0159/2290] ALSA: hda/cs8409: Suppress vmaster control for Dolphin models commit a2ed0a44d637ef9deca595054c206da7d6cbdcbc upstream. Customer has reported an issue with specific desktop platform where two CS42L42 codecs are connected to CS8409 HDA bridge. If "Master Volume Control" is created then on Ubuntu OS UCM left/right balance slider in UI audio settings has no effect. This patch will fix this issue for a target paltform. Fixes: 20e507724113 ("ALSA: hda/cs8409: Add support for dolphin") Signed-off-by: Vitaly Rodionov Cc: Link: https://lore.kernel.org/r/20240122184710.5802-1-vitalyr@opensource.cirrus.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_cs8409.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 627899959ffe8..e41316e2e9833 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1; spec->scodecs[CS8409_CODEC1]->codec = codec; spec->num_scodecs = 2; + spec->gen.suppress_vmaster = 1; codec->patch_ops = cs8409_dolphin_patch_ops; -- GitLab From f33789ca65d5e93c40389caffac927aa712d22a8 Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Mon, 22 Jan 2024 15:48:24 +0800 Subject: [PATCH 0160/2290] ALSA: hda/realtek: fix mute/micmute LEDs for HP ZBook Power commit 1513664f340289cf10402753110f3cff12a738aa upstream. The HP ZBook Power using ALC236 codec which using 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20240122074826.1020964-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6e94161e46ce1..153a9605d3a97 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9726,6 +9726,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), -- GitLab From 90e09c016d72b91e76de25f71c7b93d94cc3c769 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Wed, 31 Jan 2024 21:53:46 +0000 Subject: [PATCH 0161/2290] binder: signal epoll threads of self-work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 97830f3c3088638ff90b20dfba2eb4d487bf14d7 upstream. In (e)poll mode, threads often depend on I/O events to determine when data is ready for consumption. Within binder, a thread may initiate a command via BINDER_WRITE_READ without a read buffer and then make use of epoll_wait() or similar to consume any responses afterwards. It is then crucial that epoll threads are signaled via wakeup when they queue their own work. Otherwise, they risk waiting indefinitely for an event leaving their work unhandled. What is worse, subsequent commands won't trigger a wakeup either as the thread has pending work. Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Cc: Arve Hjønnevåg Cc: Martijn Coenen Cc: Alice Ryhl Cc: Steven Moreland Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20240131215347.1808751-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d933ef6cc65af..55cd17a13e758 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -477,6 +477,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread, { WARN_ON(!list_empty(&thread->waiting_thread_node)); binder_enqueue_work_ilocked(work, &thread->todo); + + /* (e)poll-based threads require an explicit wakeup signal when + * queuing their own work; they rely on these events to consume + * messages without I/O block. Without it, threads risk waiting + * indefinitely without handling the work. + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL && + thread->pid == current->pid && !thread->process_todo) + wake_up_interruptible_sync(&thread->wait); + thread->process_todo = true; } -- GitLab From 4ab56381ac2621594270f165c211f0954af3ea2e Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Mon, 8 Jan 2024 17:18:33 +0530 Subject: [PATCH 0162/2290] misc: fastrpc: Mark all sessions as invalid in cb_remove commit a4e61de63e34860c36a71d1a364edba16fb6203b upstream. In remoteproc shutdown sequence, rpmsg_remove will get called which would depopulate all the child nodes that have been created during rpmsg_probe. This would result in cb_remove call for all the context banks for the remoteproc. In cb_remove function, session 0 is getting skipped which is not correct as session 0 will never become available again. Add changes to mark session 0 also as invalid. Fixes: f6f9279f2bf0 ("misc: fastrpc: Add Qualcomm fastrpc basic driver model") Cc: stable Signed-off-by: Ekansh Gupta Link: https://lore.kernel.org/r/20240108114833.20480-1-quic_ekangupt@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index cc57cc8204328..69cc24962706c 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1990,7 +1990,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev) int i; spin_lock_irqsave(&cctx->lock, flags); - for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) { + for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) { if (cctx->session[i].sid == sess->sid) { cctx->session[i].valid = false; cctx->sesscount--; -- GitLab From 185eab30486ba3e7bf8b9c2e049c79a06ffd2bc1 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:33 +0800 Subject: [PATCH 0163/2290] ext4: fix double-free of blocks due to wrong extents moved_len commit 55583e899a5357308274601364741a83e78d6ac4 upstream. In ext4_move_extents(), moved_len is only updated when all moves are successfully executed, and only discards orig_inode and donor_inode preallocations when moved_len is not zero. When the loop fails to exit after successfully moving some extents, moved_len is not updated and remains at 0, so it does not discard the preallocations. If the moved extents overlap with the preallocated extents, the overlapped extents are freed twice in ext4_mb_release_inode_pa() and ext4_process_freed_data() (as described in commit 94d7c16cbbbd ("ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT")), and bb_free is incremented twice. Hence when trim is executed, a zero-division bug is triggered in mb_update_avg_fragment_size() because bb_free is not zero and bb_fragments is zero. Therefore, update move_len after each extent move to avoid the issue. Reported-by: Wei Chen Reported-by: xingwei lee Closes: https://lore.kernel.org/r/CAO4mrferzqBUnCag8R3m2zf897ts9UEuhjFQGPtODT92rYyR2Q@mail.gmail.com Fixes: fcf6b1b729bc ("ext4: refactor ext4_move_extents code base") CC: # 3.18 Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/move_extent.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index dedc9d445f243..8e3ff150bc36b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -621,6 +621,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, goto out; o_end = o_start + len; + *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; @@ -675,7 +676,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ - move_extent_per_page(o_filp, donor_inode, + *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); @@ -685,9 +686,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, o_start += cur_len; d_start += cur_len; } - *moved_len = o_start - orig_blk; - if (*moved_len > len) - *moved_len = len; out: if (*moved_len) { -- GitLab From ac894a1e19b9ba2386dca85548a5c4fc106b9392 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:36 +0800 Subject: [PATCH 0164/2290] ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks() commit 2331fd4a49864e1571b4f50aa3aa1536ed6220d0 upstream. After updating bb_free in mb_free_blocks, it is possible to return without updating bb_fragments because the block being freed is found to have already been freed, which leads to inconsistency between bb_free and bb_fragments. Since the group may be unlocked in ext4_grp_locked_error(), this can lead to problems such as dividing by zero when calculating the average fragment length. Hence move the update of bb_free to after the block double-free check guarantees that the corresponding statistics are updated only after the core block bitmap is modified. Fixes: eabe0444df90 ("ext4: speed-up releasing blocks on commit") CC: # 3.10 Suggested-by: Jan Kara Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 40903c172a34f..1a310ee7d9e55 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1785,11 +1785,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); - this_cpu_inc(discard_pa_seq); - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ @@ -1803,23 +1798,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; + /* + * Fastcommit replay can free already freed blocks which + * corrupts allocation info. Regenerate it. + */ + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + mb_regenerate_buddy(e4b); + goto check; + } + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - ext4_grp_locked_error(sb, e4b->bd_group, - inode ? inode->i_ino : 0, - blocknr, - "freeing already freed block (bit %u); block bitmap corrupt.", - block); - ext4_mark_group_bitmap_corrupted( - sb, e4b->bd_group, + ext4_grp_locked_error(sb, e4b->bd_group, + inode ? inode->i_ino : 0, blocknr, + "freeing already freed block (bit %u); block bitmap corrupt.", + block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); - } else { - mb_regenerate_buddy(e4b); - } - goto done; + return; } + this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; + /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; @@ -1844,9 +1847,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); -done: mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); +check: mb_check_buddy(e4b); } -- GitLab From 65bf19f55a87a13417b859965926f54ae16117b3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 9 Feb 2024 06:36:22 -0500 Subject: [PATCH 0165/2290] tracing: Fix wasted memory in saved_cmdlines logic commit 44dc5c41b5b1267d4dd037d26afc0c4d3a568acb upstream. While looking at improving the saved_cmdlines cache I found a huge amount of wasted memory that should be used for the cmdlines. The tracing data saves pids during the trace. At sched switch, if a trace occurred, it will save the comm of the task that did the trace. This is saved in a "cache" that maps pids to comms and exposed to user space via the /sys/kernel/tracing/saved_cmdlines file. Currently it only caches by default 128 comms. The structure that uses this creates an array to store the pids using PID_MAX_DEFAULT (which is usually set to 32768). This causes the structure to be of the size of 131104 bytes on 64 bit machines. In hex: 131104 = 0x20020, and since the kernel allocates generic memory in powers of two, the kernel would allocate 0x40000 or 262144 bytes to store this structure. That leaves 131040 bytes of wasted space. Worse, the structure points to an allocated array to store the comm names, which is 16 bytes times the amount of names to save (currently 128), which is 2048 bytes. Instead of allocating a separate array, make the structure end with a variable length string and use the extra space for that. This is similar to a recommendation that Linus had made about eventfs_inode names: https://lore.kernel.org/all/20240130190355.11486-5-torvalds@linux-foundation.org/ Instead of allocating a separate string array to hold the saved comms, have the structure end with: char saved_cmdlines[]; and round up to the next power of two over sizeof(struct saved_cmdline_buffers) + num_cmdlines * TASK_COMM_LEN It will use this extra space for the saved_cmdline portion. Now, instead of saving only 128 comms by default, by using this wasted space at the end of the structure it can save over 8000 comms and even saves space by removing the need for allocating the other array. Link: https://lore.kernel.org/linux-trace-kernel/20240209063622.1f7b6d5f@rorschach.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Cc: Sven Schnelle Cc: Mete Durlu Fixes: 939c7a4f04fcd ("tracing: Introduce saved_cmdlines_size file") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 75 ++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2b3c4cd8382b3..fa6c193e22f02 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2249,7 +2249,7 @@ struct saved_cmdlines_buffer { unsigned *map_cmdline_to_pid; unsigned cmdline_num; int cmdline_idx; - char *saved_cmdlines; + char saved_cmdlines[]; }; static struct saved_cmdlines_buffer *savedcmd; @@ -2263,47 +2263,58 @@ static inline void set_cmdline(int idx, const char *cmdline) strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); } -static int allocate_cmdlines_buffer(unsigned int val, - struct saved_cmdlines_buffer *s) +static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) { + int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); + + kfree(s->map_cmdline_to_pid); + free_pages((unsigned long)s, order); +} + +static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) +{ + struct saved_cmdlines_buffer *s; + struct page *page; + int orig_size, size; + int order; + + /* Figure out how much is needed to hold the given number of cmdlines */ + orig_size = sizeof(*s) + val * TASK_COMM_LEN; + order = get_order(orig_size); + size = 1 << (order + PAGE_SHIFT); + page = alloc_pages(GFP_KERNEL, order); + if (!page) + return NULL; + + s = page_address(page); + memset(s, 0, sizeof(*s)); + + /* Round up to actual allocation */ + val = (size - sizeof(*s)) / TASK_COMM_LEN; + s->cmdline_num = val; + s->map_cmdline_to_pid = kmalloc_array(val, sizeof(*s->map_cmdline_to_pid), GFP_KERNEL); - if (!s->map_cmdline_to_pid) - return -ENOMEM; - - s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); - if (!s->saved_cmdlines) { - kfree(s->map_cmdline_to_pid); - return -ENOMEM; + if (!s->map_cmdline_to_pid) { + free_saved_cmdlines_buffer(s); + return NULL; } s->cmdline_idx = 0; - s->cmdline_num = val; memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(s->map_pid_to_cmdline)); memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, val * sizeof(*s->map_cmdline_to_pid)); - return 0; + return s; } static int trace_create_savedcmd(void) { - int ret; - - savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); - if (!savedcmd) - return -ENOMEM; + savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT); - ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); - if (ret < 0) { - kfree(savedcmd); - savedcmd = NULL; - return -ENOMEM; - } - - return 0; + return savedcmd ? 0 : -ENOMEM; } int is_tracing_stopped(void) @@ -5972,26 +5983,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) -{ - kfree(s->saved_cmdlines); - kfree(s->map_cmdline_to_pid); - kfree(s); -} - static int tracing_resize_saved_cmdlines(unsigned int val) { struct saved_cmdlines_buffer *s, *savedcmd_temp; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = allocate_cmdlines_buffer(val); if (!s) return -ENOMEM; - if (allocate_cmdlines_buffer(val, s) < 0) { - kfree(s); - return -ENOMEM; - } - preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; -- GitLab From c794117a33699bc2f14748cb4fa088b71804d0d8 Mon Sep 17 00:00:00 2001 From: David Schiller Date: Mon, 22 Jan 2024 14:49:17 +0100 Subject: [PATCH 0166/2290] staging: iio: ad5933: fix type mismatch regression commit 6db053cd949fcd6254cea9f2cd5d39f7bd64379c upstream. Commit 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning") fixed a compiler warning, but introduced a bug that resulted in one of the two 16 bit IIO channels always being zero (when both are enabled). This is because int is 32 bits wide on most architectures and in the case of a little-endian machine the two most significant bytes would occupy the buffer for the second channel as 'val' is being passed as a void pointer to 'iio_push_to_buffers()'. Fix by defining 'val' as u16. Tested working on ARM64. Fixes: 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning") Signed-off-by: David Schiller Link: https://lore.kernel.org/r/20240122134916.2137957-1-david.schiller@jku.at Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/impedance-analyzer/ad5933.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index f177b20f0f2d9..ceba632138940 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work) struct ad5933_state, work.work); struct iio_dev *indio_dev = i2c_get_clientdata(st->client); __be16 buf[2]; - int val[2]; + u16 val[2]; unsigned char status; int ret; -- GitLab From 176256ff8abff29335ecff905a09fb49e8dcf513 Mon Sep 17 00:00:00 2001 From: "zhili.liu" Date: Tue, 2 Jan 2024 09:07:11 +0800 Subject: [PATCH 0167/2290] iio: magnetometer: rm3100: add boundary check for the value read from RM3100_REG_TMRC commit 792595bab4925aa06532a14dd256db523eb4fa5e upstream. Recently, we encounter kernel crash in function rm3100_common_probe caused by out of bound access of array rm3100_samp_rates (because of underlying hardware failures). Add boundary check to prevent out of bound access. Fixes: 121354b2eceb ("iio: magnetometer: Add driver support for PNI RM3100") Suggested-by: Zhouyi Zhou Signed-off-by: zhili.liu Link: https://lore.kernel.org/r/1704157631-3814-1-git-send-email-zhouzhouyi@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/rm3100-core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c index 69938204456f8..42b70cd42b393 100644 --- a/drivers/iio/magnetometer/rm3100-core.c +++ b/drivers/iio/magnetometer/rm3100-core.c @@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) struct rm3100_data *data; unsigned int tmp; int ret; + int samp_rate_index; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) @@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp); if (ret < 0) return ret; + + samp_rate_index = tmp - RM3100_TMRC_OFFSET; + if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) { + dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n"); + return -EINVAL; + } /* Initializing max wait time, which is double conversion time. */ - data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2] - * 2; + data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2; /* Cycle count values may not be what we want. */ if ((tmp - RM3100_TMRC_OFFSET) == 0) -- GitLab From 359f220d0e753bba840eac19ffedcdc816b532f2 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 8 Dec 2023 15:31:19 +0800 Subject: [PATCH 0168/2290] iio: core: fix memleak in iio_device_register_sysfs commit 95a0d596bbd0552a78e13ced43f2be1038883c81 upstream. When iio_device_register_sysfs_group() fails, we should free iio_dev_opaque->chan_attr_group.attrs to prevent potential memleak. Fixes: 32f171724e5c ("iio: core: rework iio device group creation") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231208073119.29283-1-dinghao.liu@zju.edu.cn Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/industrialio-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index c9614982cb671..a2f8278f00856 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1601,10 +1601,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev) ret = iio_device_register_sysfs_group(indio_dev, &iio_dev_opaque->chan_attr_group); if (ret) - goto error_clear_attrs; + goto error_free_chan_attrs; return 0; +error_free_chan_attrs: + kfree(iio_dev_opaque->chan_attr_group.attrs); + iio_dev_opaque->chan_attr_group.attrs = NULL; error_clear_attrs: iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list); -- GitLab From 4f10423c0e34d7eaed61478c56520017d5eddf3e Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 31 Jan 2024 10:16:47 +0100 Subject: [PATCH 0169/2290] iio: commom: st_sensors: ensure proper DMA alignment commit 862cf85fef85becc55a173387527adb4f076fab0 upstream. Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for st_sensors common buffer. While at it, moved the odr_lock before buffer_data as we definitely don't want any other data to share a cacheline with the buffer. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: e031d5f558f1 ("iio:st_sensors: remove buffer allocation at each buffer enable") Signed-off-by: Nuno Sa Cc: Link: https://lore.kernel.org/r/20240131-dev_dma_safety_stm-v2-1-580c07fae51b@analog.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/common/st_sensors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index db4a1b260348c..c34e648f07e28 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -261,9 +261,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER -- GitLab From 77ba1a86ef2208ddfe3b9efc98e6006520cd8ab2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 31 Jan 2024 16:52:46 -0600 Subject: [PATCH 0170/2290] iio: accel: bma400: Fix a compilation problem commit 4cb81840d8f29b66d9d05c6d7f360c9560f7e2f4 upstream. The kernel fails when compiling without `CONFIG_REGMAP_I2C` but with `CONFIG_BMA400`. ``` ld: drivers/iio/accel/bma400_i2c.o: in function `bma400_i2c_probe': bma400_i2c.c:(.text+0x23): undefined reference to `__devm_regmap_init_i2c' ``` Link: https://download.01.org/0day-ci/archive/20240131/202401311634.FE5CBVwe-lkp@intel.com/config Fixes: 465c811f1f20 ("iio: accel: Add driver for the BMA400") Fixes: 9bea10642396 ("iio: accel: bma400: add support for bma400 spi") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240131225246.14169-1-mario.limonciello@amd.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index ffac66db7ac92..1f34747a68bfe 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -219,10 +219,12 @@ config BMA400 config BMA400_I2C tristate + select REGMAP_I2C depends on BMA400 config BMA400_SPI tristate + select REGMAP_SPI depends on BMA400 config BMC150_ACCEL -- GitLab From 18cbe28671e2eacb19ff107ef67c4f74de606528 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 13:41:03 +0100 Subject: [PATCH 0171/2290] iio: adc: ad_sigma_delta: ensure proper DMA alignment commit 59598510be1d49e1cff7fd7593293bb8e1b2398b upstream. Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: 0fb6ee8d0b5e ("iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-dev_sigma_delta_no_irq_flags-v1-1-db39261592cf@analog.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/adc/ad_sigma_delta.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714c..719cf9cc6e1ac 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; -- GitLab From 9e105dd8c070a58c080381491452f038e7b78039 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 14:10:49 +0100 Subject: [PATCH 0172/2290] iio: imu: adis: ensure proper DMA alignment commit 8e98b87f515d8c4bae521048a037b2cc431c3fd5 upstream. Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-adis-improv-v1-1-7f90e9fad200@analog.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/imu/adis.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index bcbefb7574751..af083aa0c4317 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include #include +#include #include #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; -- GitLab From b79e15569d1a65f187344f97c7617e08fbccdf62 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 10 Jan 2024 10:56:11 -0800 Subject: [PATCH 0173/2290] iio: imu: bno055: serdev requires REGMAP commit 35ec2d03b282a939949090bd8c39eb37a5856721 upstream. There are a ton of build errors when REGMAP is not set, so select REGMAP to fix all of them. Examples (not all of them): ../drivers/iio/imu/bno055/bno055_ser_core.c:495:15: error: variable 'bno055_ser_regmap_bus' has initializer but incomplete type 495 | static struct regmap_bus bno055_ser_regmap_bus = { ../drivers/iio/imu/bno055/bno055_ser_core.c:496:10: error: 'struct regmap_bus' has no member named 'write' 496 | .write = bno055_ser_write_reg, ../drivers/iio/imu/bno055/bno055_ser_core.c:497:10: error: 'struct regmap_bus' has no member named 'read' 497 | .read = bno055_ser_read_reg, ../drivers/iio/imu/bno055/bno055_ser_core.c: In function 'bno055_ser_probe': ../drivers/iio/imu/bno055/bno055_ser_core.c:532:18: error: implicit declaration of function 'devm_regmap_init'; did you mean 'vmem_map_init'? [-Werror=implicit-function-declaration] 532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus, ../drivers/iio/imu/bno055/bno055_ser_core.c:532:16: warning: assignment to 'struct regmap *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus, ../drivers/iio/imu/bno055/bno055_ser_core.c: At top level: ../drivers/iio/imu/bno055/bno055_ser_core.c:495:26: error: storage size of 'bno055_ser_regmap_bus' isn't known 495 | static struct regmap_bus bno055_ser_regmap_bus = { Fixes: 2eef5a9cc643 ("iio: imu: add BNO055 serdev driver") Signed-off-by: Randy Dunlap Cc: Andrea Merello Cc: Jonathan Cameron Cc: Lars-Peter Clausen Cc: linux-iio@vger.kernel.org Cc: Link: https://lore.kernel.org/r/20240110185611.19723-1-rdunlap@infradead.org Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/bno055/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig index 83e53acfbe880..c7f5866a177d9 100644 --- a/drivers/iio/imu/bno055/Kconfig +++ b/drivers/iio/imu/bno055/Kconfig @@ -8,6 +8,7 @@ config BOSCH_BNO055 config BOSCH_BNO055_SERIAL tristate "Bosch BNO055 attached via UART" depends on SERIAL_DEV_BUS + select REGMAP select BOSCH_BNO055 help Enable this to support Bosch BNO055 IMUs attached via UART. -- GitLab From 9f6087851ec6dce5b15f694aeaf3e8ec8243224e Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 13 Apr 2023 10:50:32 +0200 Subject: [PATCH 0174/2290] media: rc: bpf attach/detach requires write permission commit 6a9d552483d50953320b9d3b57abdee8d436f23f upstream. Note that bpf attach/detach also requires CAP_NET_ADMIN. Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/bpf-lirc.c | 6 +++--- drivers/media/rc/lirc_dev.c | 5 ++++- drivers/media/rc/rc-core-priv.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index fe17c7f98e810..52d82cbe7685f 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->attach_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); @@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) { bpf_prog_put(prog); return PTR_ERR(rcdev); @@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->query.query_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->query.target_fd); + rcdev = rc_dev_get_from_fd(attr->query.target_fd, false); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 184e0b35744f3..adb8c794a2d7b 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void) unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX); } -struct rc_dev *rc_dev_get_from_fd(int fd) +struct rc_dev *rc_dev_get_from_fd(int fd, bool write) { struct fd f = fdget(fd); struct lirc_fh *fh; @@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd) return ERR_PTR(-EINVAL); } + if (write && !(f.file->f_mode & FMODE_WRITE)) + return ERR_PTR(-EPERM); + fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h index ef1e95e1af7fc..7df949fc65e2b 100644 --- a/drivers/media/rc/rc-core-priv.h +++ b/drivers/media/rc/rc-core-priv.h @@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev); void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc); int lirc_register(struct rc_dev *dev); void lirc_unregister(struct rc_dev *dev); -struct rc_dev *rc_dev_get_from_fd(int fd); +struct rc_dev *rc_dev_get_from_fd(int fd, bool write); #else static inline int lirc_dev_init(void) { return 0; } static inline void lirc_dev_exit(void) {} -- GitLab From 7505a0ce08584b12d5c7ca4559725424972d803c Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 5 Feb 2024 14:19:16 +0300 Subject: [PATCH 0175/2290] ksmbd: free aux buffer if ksmbd_iov_pin_rsp_read fails commit 108a020c64434fed4b69762879d78cd24088b4c7 upstream. ksmbd_iov_pin_rsp_read() doesn't free the provided aux buffer if it fails. Seems to be the caller's responsibility to clear the buffer in error case. Found by Linux Verification Center (linuxtesting.org). Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 4cfa45c2727ea..66d25d0e34d8b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6171,8 +6171,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } kvfree(rpc_resp); } else { err = ksmbd_iov_pin_rsp(work, (void *)rsp, @@ -6382,8 +6384,10 @@ int smb2_read(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } ksmbd_fd_put(work, fp); return 0; -- GitLab From 01e9f82058e208747323513eef3df738051eff0e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Mar 2023 17:40:32 +0800 Subject: [PATCH 0176/2290] xfrm: Remove inner/outer modes from output path commit f4796398f21b9844017a2dac883b1dd6ad6edd60 upstream. The inner/outer modes were added to abstract out common code that were once duplicated between IPv4 and IPv6. As time went on the abstractions have been removed and we are now left with empty shells that only contain duplicate information. These can be removed one-by-one as the same information is already present elsewhere in the xfrm_state object. Just like the input-side, removing this from the output code makes it possible to use transport-mode SAs underneath an inter-family tunnel mode SA. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Cc: Sri Sakthi Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_output.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9a5e79a38c679..07a7ee43b8ae2 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -414,7 +414,7 @@ static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; skb->protocol = htons(ETH_P_IP); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm4_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -437,7 +437,7 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) skb->ignore_df = 1; skb->protocol = htons(ETH_P_IPV6); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm6_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -453,22 +453,22 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) { - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_prepare_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_prepare_output(x, skb); break; case XFRM_MODE_TRANSPORT: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_output(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_ro_output(x, skb); WARN_ON_ONCE(1); break; @@ -866,21 +866,10 @@ static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode; - - if (x->sel.family == AF_UNSPEC) - inner_mode = xfrm_ip2inner_mode(x, - xfrm_af2proto(skb_dst(skb)->ops->family)); - else - inner_mode = &x->inner_mode; - - if (inner_mode == NULL) - return -EAFNOSUPPORT; - - switch (inner_mode->family) { - case AF_INET: + switch (skb->protocol) { + case htons(ETH_P_IP): return xfrm4_extract_output(x, skb); - case AF_INET6: + case htons(ETH_P_IPV6): return xfrm6_extract_output(x, skb); } -- GitLab From ba5f95788345b2740de6db12225354781bf8407e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Mar 2023 17:26:05 +0800 Subject: [PATCH 0177/2290] xfrm: Remove inner/outer modes from input path commit 5f24f41e8ea62a6a9095f9bbafb8b3aebe265c68 upstream. The inner/outer modes were added to abstract out common code that were once duplicated between IPv4 and IPv6. As time went on the abstractions have been removed and we are now left with empty shells that only contain duplicate information. These can be removed one-by-one as the same information is already present elsewhere in the xfrm_state object. Removing them from the input path actually allows certain valid combinations that are currently disallowed. In particular, when a transport mode SA sits beneath a tunnel mode SA that changes address families, at present the transport mode SA cannot have AF_UNSPEC as its selector because it will be erroneously be treated as inter-family itself even though it simply sits beneath one. This is a serious problem because you can't set the selector to non-AF_UNSPEC either as that will cause the selector match to fail as we always match selectors to the inner-most traffic. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Cc: Sri Sakthi Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_input.c | 66 +++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ac1a645afa8df..deda4955c0466 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -232,9 +232,6 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) - goto out; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -270,8 +267,6 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) - goto out; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -332,22 +327,26 @@ out: */ static int xfrm_inner_mode_encap_remove(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: - if (inner_mode->family == AF_INET) + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: + case IPPROTO_BEETPH: return xfrm4_remove_beet_encap(x, skb); - if (inner_mode->family == AF_INET6) + case IPPROTO_IPV6: return xfrm6_remove_beet_encap(x, skb); + } break; case XFRM_MODE_TUNNEL: - if (inner_mode->family == AF_INET) + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: return xfrm4_remove_tunnel_encap(x, skb); - if (inner_mode->family == AF_INET6) + case IPPROTO_IPV6: return xfrm6_remove_tunnel_encap(x, skb); break; + } } WARN_ON_ONCE(1); @@ -356,9 +355,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode = &x->inner_mode; - - switch (x->outer_mode.family) { + switch (x->props.family) { case AF_INET: xfrm4_extract_header(skb); break; @@ -370,17 +367,12 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; } - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (!inner_mode) - return -EAFNOSUPPORT; - } - - switch (inner_mode->family) { - case AF_INET: + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: + case IPPROTO_BEETPH: skb->protocol = htons(ETH_P_IP); break; - case AF_INET6: + case IPPROTO_IPV6: skb->protocol = htons(ETH_P_IPV6); break; default: @@ -388,7 +380,7 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) break; } - return xfrm_inner_mode_encap_remove(x, inner_mode, skb); + return xfrm_inner_mode_encap_remove(x, skb); } /* Remove encapsulation header. @@ -434,17 +426,16 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } static int xfrm_inner_mode_input(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: return xfrm_prepare_input(x, skb); case XFRM_MODE_TRANSPORT: - if (inner_mode->family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_input(x, skb); - if (inner_mode->family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_input(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: @@ -462,7 +453,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { const struct xfrm_state_afinfo *afinfo; struct net *net = dev_net(skb->dev); - const struct xfrm_mode *inner_mode; int err; __be32 seq; __be32 seq_hi; @@ -492,7 +482,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - family = x->outer_mode.family; + family = x->props.family; /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { @@ -676,17 +666,7 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - inner_mode = &x->inner_mode; - - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - } - - if (xfrm_inner_mode_input(x, inner_mode, skb)) { + if (xfrm_inner_mode_input(x, skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -701,7 +681,7 @@ resume: * transport mode so the outer address is identical. */ daddr = &x->id.daddr; - family = x->outer_mode.family; + family = x->props.family; err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { @@ -732,7 +712,7 @@ resume: err = -EAFNOSUPPORT; rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family); + afinfo = xfrm_state_afinfo_get_rcu(x->props.family); if (likely(afinfo)) err = afinfo->transport_finish(skb, xfrm_gro || async); rcu_read_unlock(); -- GitLab From fc811d88fb4e4871f6814e3cb3e95858357280f5 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Feb 2024 09:23:40 -0800 Subject: [PATCH 0178/2290] drm/msm: Wire up tlb ops commit 8c7bfd8262319fd3f127a5380f593ea76f1b88a2 upstream. The brute force iommu_flush_iotlb_all() was good enough for unmap, but in some cases a map operation could require removing a table pte entry to replace with a block entry. This also requires tlb invalidation. Missing this was resulting an obscure iova fault on what should be a valid buffer address. Thanks to Robin Murphy for helping me understand the cause of the fault. Cc: Robin Murphy Cc: stable@vger.kernel.org Fixes: b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/578117/ Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_iommu.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index d12ba47b37c4f..0de3612135e96 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,8 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_flush_ops *tlb; + struct device *iommu_dev; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; @@ -194,11 +196,33 @@ static const struct msm_mmu_funcs pagetable_funcs = { static void msm_iommu_tlb_flush_all(void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, @@ -206,7 +230,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, { } -static const struct iommu_flush_ops null_tlb_ops = { +static const struct iommu_flush_ops tlb_ops = { .tlb_flush_all = msm_iommu_tlb_flush_all, .tlb_flush_walk = msm_iommu_tlb_flush_walk, .tlb_add_page = msm_iommu_tlb_add_page, @@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* The incoming cfg will have the TTBR1 quirk enabled */ ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - ttbr0_cfg.tlb = &null_tlb_ops; + ttbr0_cfg.tlb = &tlb_ops; pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, - &ttbr0_cfg, iommu->domain); + &ttbr0_cfg, pagetable); if (!pagetable->pgtbl_ops) { kfree(pagetable); @@ -282,6 +306,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* Needed later for TLB flush */ pagetable->parent = parent; + pagetable->tlb = ttbr1_cfg->tlb; + pagetable->iommu_dev = ttbr1_cfg->iommu_dev; pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; -- GitLab From 8c22b23a2778c9ab47f030b5eacf8d8c156115a7 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 21 Aug 2023 16:02:01 -0400 Subject: [PATCH 0179/2290] drm/prime: Support page array >= 4GB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b671cd3d456315f63171a670769356a196cf7fd0 upstream. Without unsigned long typecast, the size is passed in as zero if page array size >= 4GB, nr_pages >= 0x100000, then sg list converted will have the first and the last chunk lost. Signed-off-by: Philip Yang Acked-by: Felix Kuehling Reviewed-by: Christian König CC: stable@vger.kernel.org Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230821200201.24685-1-Philip.Yang@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_prime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index eb09e86044c6d..68a6d4b0ead75 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -828,7 +828,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, + (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); -- GitLab From e70123fdbe82e7e4478d1c22eb5e3a025b921917 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 5 Feb 2024 14:54:05 -0700 Subject: [PATCH 0180/2290] drm/amd/display: Increase frame-larger-than for all display_mode_vba files commit e63e35f0164c43fbc1adb481d6604f253b9f9667 upstream. After a recent change in LLVM, allmodconfig (which has CONFIG_KCSAN=y and CONFIG_WERROR=y enabled) has a few new instances of -Wframe-larger-than for the mode support and system configuration functions: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20v2.c:3393:6: error: stack frame size (2144) exceeds limit (2048) in 'dml20v2_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3393 | void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn21/display_mode_vba_21.c:3520:6: error: stack frame size (2192) exceeds limit (2048) in 'dml21_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3520 | void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20.c:3286:6: error: stack frame size (2128) exceeds limit (2048) in 'dml20_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3286 | void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. Without the sanitizers enabled, there are no warnings. This was the catalyst for commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") and that same change was made to dml in commit 5b750b22530f ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml") but the frame_warn_flag variable was not applied to all files. Do so now to clear up the warnings and make all these files consistent. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issue/1990 Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ca7d240006213..6fdf87a6e240f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -60,11 +60,11 @@ ifdef CONFIG_DRM_AMD_DC_DCN CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) -- GitLab From 3ca5a3cdc0c105d976adbb8dc8469026bd07b097 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Tue, 30 Jan 2024 15:34:08 +0800 Subject: [PATCH 0181/2290] drm/amd/display: Preserve original aspect ratio in create stream commit deb110292180cd501f6fde2a0178d65fcbcabb0c upstream. [Why] The original picture aspect ratio in mode struct may have chance be overwritten with wrong aspect ratio data in create_stream_for_sink(). It will create a different VIC output and cause HDMI compliance test failed. [How] Preserve the original picture aspect ratio data during create the stream. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Aurabindo Pillai Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f02e509d5facb..a826c92933199 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6001,7 +6001,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); + saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio; drm_mode_copy(&mode, freesync_mode); + mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio; } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, scale); -- GitLab From 48a8ccccffbae10c91d31fc872db5c31aba07518 Mon Sep 17 00:00:00 2001 From: Souradeep Chakrabarti Date: Tue, 30 Jan 2024 23:35:51 -0800 Subject: [PATCH 0182/2290] hv_netvsc: Fix race condition between netvsc_probe and netvsc_remove commit e0526ec5360a48ad3ab2e26e802b0532302a7e11 upstream. In commit ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel"), napi_disable was getting called for all channels, including all subchannels without confirming if they are enabled or not. This caused hv_netvsc getting hung at napi_disable, when netvsc_probe() has finished running but nvdev->subchan_work has not started yet. netvsc_subchan_work() -> rndis_set_subchannel() has not created the sub-channels and because of that netvsc_sc_open() is not running. netvsc_remove() calls cancel_work_sync(&nvdev->subchan_work), for which netvsc_subchan_work did not run. netif_napi_add() sets the bit NAPI_STATE_SCHED because it ensures NAPI cannot be scheduled. Then netvsc_sc_open() -> napi_enable will clear the NAPIF_STATE_SCHED bit, so it can be scheduled. napi_disable() does the opposite. Now during netvsc_device_remove(), when napi_disable is called for those subchannels, napi_disable gets stuck on infinite msleep. This fix addresses this problem by ensuring that napi_disable() is not getting called for non-enabled NAPI struct. But netif_napi_del() is still necessary for these non-enabled NAPI struct for cleanup purpose. Call trace: [ 654.559417] task:modprobe state:D stack: 0 pid: 2321 ppid: 1091 flags:0x00004002 [ 654.568030] Call Trace: [ 654.571221] [ 654.573790] __schedule+0x2d6/0x960 [ 654.577733] schedule+0x69/0xf0 [ 654.581214] schedule_timeout+0x87/0x140 [ 654.585463] ? __bpf_trace_tick_stop+0x20/0x20 [ 654.590291] msleep+0x2d/0x40 [ 654.593625] napi_disable+0x2b/0x80 [ 654.597437] netvsc_device_remove+0x8a/0x1f0 [hv_netvsc] [ 654.603935] rndis_filter_device_remove+0x194/0x1c0 [hv_netvsc] [ 654.611101] ? do_wait_intr+0xb0/0xb0 [ 654.615753] netvsc_remove+0x7c/0x120 [hv_netvsc] [ 654.621675] vmbus_remove+0x27/0x40 [hv_vmbus] Cc: stable@vger.kernel.org Fixes: ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel") Signed-off-by: Souradeep Chakrabarti Reviewed-by: Dexuan Cui Reviewed-by: Haiyang Zhang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1706686551-28510-1-git-send-email-schakrabarti@linux.microsoft.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index da737d959e81c..3a834d4e1c842 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -740,7 +740,10 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) + napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); } -- GitLab From ef3d50e884d4a765bbb8c73c29b21447196f1c7a Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 31 Jan 2024 14:09:55 +0000 Subject: [PATCH 0183/2290] ring-buffer: Clean ring_buffer_poll_wait() error return commit 66bbea9ed6446b8471d365a22734dc00556c4785 upstream. The return type for ring_buffer_poll_wait() is __poll_t. This is behind the scenes an unsigned where we can set event bits. In case of a non-allocated CPU, we do return instead -EINVAL (0xffffffea). Lucky us, this ends up setting few error bits (EPOLLERR | EPOLLHUP | EPOLLNVAL), so user-space at least is aware something went wrong. Nonetheless, this is an incorrect code. Replace that -EINVAL with a proper EPOLLERR to clean that output. As this doesn't change the behaviour, there's no need to treat this change as a bug fix. Link: https://lore.kernel.org/linux-trace-kernel/20240131140955.3322792-1-vdonnefort@google.com Cc: stable@vger.kernel.org Fixes: 6721cb6002262 ("ring-buffer: Do not poll non allocated cpu buffers") Signed-off-by: Vincent Donnefort Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1285e7fb597ee..e019a9278794f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1095,7 +1095,7 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -EINVAL; + return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; -- GitLab From c96ce4903b624528f6a21fa0b5ebcbc4db7e7849 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:09 +0200 Subject: [PATCH 0184/2290] nfp: flower: fix hardware offload for the transfer layer port commit 3a007b8009b5f8af021021b7a590a6da0dc4c6e0 upstream. The nfp driver will merge the tp source port and tp destination port into one dword which the offset must be zero to do hardware offload. However, the mangle action for the tp source port and tp destination port is separated for tc ct action. Modify the mangle action for the FLOW_ACT_MANGLE_HDR_TYPE_TCP and FLOW_ACT_MANGLE_HDR_TYPE_UDP to satisfy the nfp driver offload check for the tp port. The mangle action provides a 4B value for source, and a 4B value for the destination, but only 2B of each contains the useful information. For offload the 2B of each is combined into a single 4B word. Since the incoming mask for the source is '0xFFFF' the shift-left will throw away the 0xFFFF part. When this gets combined together in the offload it will clear the destination field. Fix this by setting the lower bits back to 0xFFFF, effectively doing a rotate-left operation on the mask. Fixes: 5cee92c6f57a ("nfp: flower: support hw offload for ct nat action") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-3-louis.peens@corigine.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../ethernet/netronome/nfp/flower/conntrack.c | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index f7492be452aed..7af03b45555dd 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1379,10 +1379,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_ mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); return; + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); - mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); + /* The mask of mangle action is inverse mask, + * so clear the dest tp port with 0xFFFF to + * instead of rotate-left operation. + */ + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); + } + if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { + mangle_action->mangle.offset = 0; + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask); + } return; default: -- GitLab From 5d89c48337c78a4cbe15ae0d6aa7ca043dae884e Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:29:58 -0500 Subject: [PATCH 0185/2290] serial: max310x: set default value when reading clock ready bit commit 0419373333c2f2024966d36261fd82a453281e80 upstream. If regmap_read() returns a non-zero value, the 'val' variable can be left uninitialized. Clear it before calling regmap_read() to make sure we properly detect the clock ready bit. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 338cb19dec23c..abae6e245eb29 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -641,7 +641,7 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val; + unsigned int val = 0; msleep(10); regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); if (!(val & MAX310X_STS_CLKREADY_BIT)) { -- GitLab From 7971a029eb23976016d7a9cd94f54e0c35aa26fb Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:29:59 -0500 Subject: [PATCH 0186/2290] serial: max310x: improve crystal stable clock detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 93cd256ab224c2519e7c4e5f58bb4f1ac2bf0965 upstream. Some people are seeing a warning similar to this when using a crystal: max310x 11-006c: clock is not stable yet The datasheet doesn't mention the maximum time to wait for the clock to be stable when using a crystal, and it seems that the 10ms delay in the driver is not always sufficient. Jan Kundrát reported that it took three tries (each separated by 10ms) to get a stable clock. Modify behavior to check stable clock ready bit multiple times (20), and waiting 10ms between each try. Note: the first draft of the driver originally used a 50ms delay, without checking the clock stable bit. Then a loop with 1000 retries was implemented, each time reading the clock stable bit. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Link: https://lore.kernel.org/all/20240110174015.6f20195fde08e5c9e64e5675@hugovil.com/raw Link: https://github.com/boundarydevices/linux/commit/e5dfe3e4a751392515d78051973190301a37ca9a Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-3-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index abae6e245eb29..2ce4cf27580e9 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Crystal-related definitions */ +#define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ + /* MAX3107 specific */ #define MAX3107_REV_ID (0xa0) @@ -641,12 +645,19 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val = 0; - msleep(10); - regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); - if (!(val & MAX310X_STS_CLKREADY_BIT)) { + bool stable = false; + unsigned int try = 0, val = 0; + + do { + msleep(MAX310X_XTAL_WAIT_DELAY_MS); + regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); + + if (val & MAX310X_STS_CLKREADY_BIT) + stable = true; + } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); + + if (!stable) dev_warn(dev, "clock is not stable yet\n"); - } } return bestfreq; -- GitLab From 0046dd2e9ff6d5f12a273046880cad39313bfcf4 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:00 -0500 Subject: [PATCH 0187/2290] serial: max310x: fail probe if clock crystal is unstable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8afa6c6decea37e7cb473d2c60473f37f46cea35 upstream. A stable clock is really required in order to use this UART, so log an error message and bail out if the chip reports that the clock is not stable. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-4-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 2ce4cf27580e9..e91a2a59d9f00 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -587,7 +587,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) return 1; } -static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, +static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, unsigned long freq, bool xtal) { unsigned int div, clksrc, pllcfg = 0; @@ -657,7 +657,8 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); if (!stable) - dev_warn(dev, "clock is not stable yet\n"); + return dev_err_probe(dev, -EAGAIN, + "clock is not stable\n"); } return bestfreq; @@ -1285,7 +1286,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty { int i, ret, fmin, fmax, freq; struct max310x_port *s; - u32 uartclk = 0; + s32 uartclk = 0; bool xtal; for (i = 0; i < devtype->nr; i++) @@ -1363,6 +1364,11 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty } uartclk = max310x_set_ref_clk(dev, s, freq, xtal); + if (uartclk < 0) { + ret = uartclk; + goto out_uart; + } + dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk); for (i = 0; i < devtype->nr; i++) { -- GitLab From 5e2f407646faa2aa45c853d66a07cf4c8afdb59c Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:01 -0500 Subject: [PATCH 0188/2290] serial: max310x: prevent infinite while() loop in port startup commit b35f8dbbce818b02c730dc85133dc7754266e084 upstream. If there is a problem after resetting a port, the do/while() loop that checks the default value of DIVLSB register may run forever and spam the I2C bus. Add a delay before each read of DIVLSB, and a maximum number of tries to prevent that situation from happening. Also fail probe if port reset is unsuccessful. Fixes: 10d8b34a4217 ("serial: max310x: Driver rework") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-5-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index e91a2a59d9f00..163a89f84c9c2 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Port startup definitions */ +#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */ + /* Crystal-related definitions */ #define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ #define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ @@ -1349,6 +1353,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; for (i = 0; i < devtype->nr; i++) { + bool started = false; + unsigned int try = 0, val = 0; + /* Reset port */ regmap_write(regmaps[i], MAX310X_MODE2_REG, MAX310X_MODE2_RST_BIT); @@ -1357,8 +1364,17 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty /* Wait for port startup */ do { - regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); - } while (ret != 0x01); + msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS); + regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val); + + if (val == 0x01) + started = true; + } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES)); + + if (!started) { + ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n"); + goto out_uart; + } regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); } -- GitLab From 76d3ad7d0264d8ffe500ca9fa7f8c4fec8f1f9bd Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Fri, 2 Feb 2024 21:13:16 +0530 Subject: [PATCH 0189/2290] powerpc/64: Set task pt_regs->link to the LR value on scv entry commit aad98efd0b121f63a2e1c221dcb4d4850128c697 upstream. Nysal reported that userspace backtraces are missing in offcputime bcc tool. As an example: $ sudo ./bcc/tools/offcputime.py -uU Tracing off-CPU time (us) of user threads by user stack... Hit Ctrl-C to end. ^C write - python (9107) 8 write - sudo (9105) 9 mmap - python (9107) 16 clock_nanosleep - multipathd (697) 3001604 The offcputime bcc tool attaches a bpf program to a kprobe on finish_task_switch(), which is usually hit on a syscall from userspace. With the switch to system call vectored, we started setting pt_regs->link to zero. This is because system call vectored behaves like a function call with LR pointing to the system call return address, and with no modification to SRR0/SRR1. The LR value does indicate our next instruction, so it is being saved as pt_regs->nip, and pt_regs->link is being set to zero. This is not a problem by itself, but BPF uses perf callchain infrastructure for capturing stack traces, and that stores LR as the second entry in the stack trace. perf has code to cope with the second entry being zero, and skips over it. However, generic userspace unwinders assume that a zero entry indicates end of the stack trace, resulting in a truncated userspace stack trace. Rather than fixing all userspace unwinders to ignore/skip past the second entry, store the real LR value in pt_regs->link so that there continues to be a valid, though duplicate entry in the stack trace. With this change: $ sudo ./bcc/tools/offcputime.py -uU Tracing off-CPU time (us) of user threads by user stack... Hit Ctrl-C to end. ^C write write [unknown] [unknown] [unknown] [unknown] [unknown] PyObject_VectorcallMethod [unknown] [unknown] PyObject_CallOneArg PyFile_WriteObject PyFile_WriteString [unknown] [unknown] PyObject_Vectorcall _PyEval_EvalFrameDefault PyEval_EvalCode [unknown] [unknown] [unknown] _PyRun_SimpleFileObject _PyRun_AnyFileObject Py_RunMain [unknown] Py_BytesMain [unknown] __libc_start_main - python (1293) 7 write write [unknown] sudo_ev_loop_v1 sudo_ev_dispatch_v1 [unknown] [unknown] [unknown] [unknown] __libc_start_main - sudo (1291) 7 syscall syscall bpf_open_perf_buffer_opts [unknown] [unknown] [unknown] [unknown] _PyObject_MakeTpCall PyObject_Vectorcall _PyEval_EvalFrameDefault PyEval_EvalCode [unknown] [unknown] [unknown] _PyRun_SimpleFileObject _PyRun_AnyFileObject Py_RunMain [unknown] Py_BytesMain [unknown] __libc_start_main - python (1293) 11 clock_nanosleep clock_nanosleep nanosleep sleep [unknown] [unknown] __clone - multipathd (698) 3001661 Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Cc: stable@vger.kernel.org Reported-by: "Nysal Jan K.A" Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20240202154316.395276-1-naveen@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/interrupt_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index a019ed6fc8393..26c151e2a7942 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) - std r11,_NIP(r1) + std r11,_LINK(r1) + std r11,_NIP(r1) /* Saved LR is also the next instruction */ std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) @@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r9,GPR13(r1) SAVE_NVGPRS(r1) std r11,_XER(r1) - std r11,_LINK(r1) std r11,_CTR(r1) li r11,\trapnr -- GitLab From 42422f8f8c52af49a5caec4919584e2e8f87997b Mon Sep 17 00:00:00 2001 From: David Engraf Date: Wed, 7 Feb 2024 10:27:58 +0100 Subject: [PATCH 0190/2290] powerpc/cputable: Add missing PPC_FEATURE_BOOKE on PPC64 Book-E commit eb6d871f4ba49ac8d0537e051fe983a3a4027f61 upstream. Commit e320a76db4b0 ("powerpc/cputable: Split cpu_specs[] out of cputable.h") moved the cpu_specs to separate header files. Previously PPC_FEATURE_BOOKE was enabled by CONFIG_PPC_BOOK3E_64. The definition in cpu_specs_e500mc.h for PPC64 no longer enables PPC_FEATURE_BOOKE. This breaks user space reading the ELF hwcaps and expect PPC_FEATURE_BOOKE. Debugging an application with gdb is no longer working on e5500/e6500 because the 64-bit detection relies on PPC_FEATURE_BOOKE for Book-E. Fixes: e320a76db4b0 ("powerpc/cputable: Split cpu_specs[] out of cputable.h") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: David Engraf Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20240207092758.1058893-1-david.engraf@sysgo.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/cpu_specs_e500mc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h index ceb06b109f831..2ae8e9a7b461c 100644 --- a/arch/powerpc/kernel/cpu_specs_e500mc.h +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -8,7 +8,8 @@ #ifdef CONFIG_PPC64 #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_HAS_FPU | PPC_FEATURE_64) + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \ + PPC_FEATURE_BOOKE) #else #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) -- GitLab From c194adaa8a4c404ba5676ac90ff607b3d7b3120d Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Tue, 13 Feb 2024 10:56:35 +0530 Subject: [PATCH 0191/2290] powerpc/pseries: fix accuracy of stolen time commit cbecc9fcbbec60136b0180ba0609c829afed5c81 upstream. powerVM hypervisor updates the VPA fields with stolen time data. It currently reports enqueue_dispatch_tb and ready_enqueue_tb for this purpose. In linux these two fields are used to report the stolen time. The VPA fields are updated at the TB frequency. On powerPC its mostly set at 512Mhz. Hence this needs a conversion to ns when reporting it back as rest of the kernel timings are in ns. This conversion is already handled in tb_to_ns function. So use that function to report accurate stolen time. Observed this issue and used an Capped Shared Processor LPAR(SPLPAR) to simplify the experiments. In all these cases, 100% VP Load is run using stress-ng workload. Values of stolen time is in percentages as reported by mpstat. With the patch values are close to expected. 6.8.rc1 +Patch 12EC/12VP 0.0 0.0 12EC/24VP 25.7 50.2 12EC/36VP 37.3 69.2 12EC/48VP 38.5 78.3 Fixes: 0e8a63132800 ("powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Shrikanth Hegde Reviewed-by: Nicholas Piggin Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20240213052635.231597-1-sshegde@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/lpar.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 541199c6a587d..5186d65d772e2 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -660,8 +660,12 @@ u64 pseries_paravirt_steal_clock(int cpu) { struct lppaca *lppaca = &lppaca_of(cpu); - return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + - be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); + /* + * VPA steal time counters are reported at TB frequency. Hence do a + * conversion to ns before returning + */ + return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + + be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb))); } #endif -- GitLab From 8b4025679e563b407d322fcd13eb287179e1fa32 Mon Sep 17 00:00:00 2001 From: Aleksander Mazur Date: Tue, 23 Jan 2024 14:43:00 +0100 Subject: [PATCH 0192/2290] x86/Kconfig: Transmeta Crusoe is CPU family 5, not 6 commit f6a1892585cd19e63c4ef2334e26cd536d5b678d upstream. The kernel built with MCRUSOE is unbootable on Transmeta Crusoe. It shows the following error message: This kernel requires an i686 CPU, but only detected an i586 CPU. Unable to boot - please use a kernel appropriate for your CPU. Remove MCRUSOE from the condition introduced in commit in Fixes, effectively changing X86_MINIMUM_CPU_FAMILY back to 5 on that machine, which matches the CPU family given by CPUID. [ bp: Massage commit message. ] Fixes: 25d76ac88821 ("x86/Kconfig: Explicitly enumerate i686-class CPUs in Kconfig") Signed-off-by: Aleksander Mazur Signed-off-by: Borislav Petkov (AMD) Acked-by: H. Peter Anvin Cc: Link: https://lore.kernel.org/r/20240123134309.1117782-1-deweloper@wp.pl Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd419d7..ce5ed2c2db0c9 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -375,7 +375,7 @@ config X86_CMOV config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) default "5" if X86_32 && X86_CMPXCHG64 default "4" -- GitLab From 627339cccdc9166792ecf96bc3c9f711a60ce996 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 29 Jan 2024 22:36:03 -0800 Subject: [PATCH 0193/2290] x86/fpu: Stop relying on userspace for info to fault in xsave buffer commit d877550eaf2dc9090d782864c96939397a3c6835 upstream. Before this change, the expected size of the user space buffer was taken from fx_sw->xstate_size. fx_sw->xstate_size can be changed from user-space, so it is possible construct a sigreturn frame where: * fx_sw->xstate_size is smaller than the size required by valid bits in fx_sw->xfeatures. * user-space unmaps parts of the sigrame fpu buffer so that not all of the buffer required by xrstor is accessible. In this case, xrstor tries to restore and accesses the unmapped area which results in a fault. But fault_in_readable succeeds because buf + fx_sw->xstate_size is within the still mapped area, so it goes back and tries xrstor again. It will spin in this loop forever. Instead, fault in the maximum size which can be touched by XRSTOR (taken from fpstate->user_size). [ dhansen: tweak subject / changelog ] Fixes: fcb3635f5018 ("x86/fpu/signal: Handle #PF in the direct restore path") Reported-by: Konstantin Bogomolov Suggested-by: Thomas Gleixner Signed-off-by: Andrei Vagin Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 558076dbde5bf..247f2225aa9f3 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ -static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only, unsigned int size) +static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = ¤t->thread.fpu; int ret; + /* Restore enabled features only. */ + xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ @@ -309,7 +310,7 @@ retry: if (ret != X86_TRAP_PF) return false; - if (!fault_in_readable(buf, size)) + if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } @@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; - unsigned int state_size; u64 user_xfeatures = 0; if (use_xsave()) { @@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, return false; fx_only = !fx_sw_user.magic1; - state_size = fx_sw_user.xstate_size; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; - state_size = fpu->fpstate->user_size; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ - return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, - state_size); + return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* -- GitLab From 3863ca052216d893d21ac42b75c2db8a0bd49689 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Tue, 23 Jan 2024 22:12:20 +0000 Subject: [PATCH 0194/2290] KVM: x86/pmu: Fix type length error when reading pmu->fixed_ctr_ctrl commit 05519c86d6997cfb9bb6c82ce1595d1015b718dc upstream. Use a u64 instead of a u8 when taking a snapshot of pmu->fixed_ctr_ctrl when reprogramming fixed counters, as truncating the value results in KVM thinking fixed counter 2 is already disabled (the bug also affects fixed counters 3+, but KVM doesn't yet support those). As a result, if the guest disables fixed counter 2, KVM will get a false negative and fail to reprogram/disable emulation of the counter, which can leads to incorrect counts and spurious PMIs in the guest. Fixes: 76d287b2342e ("KVM: x86/pmu: Drop "u8 ctrl, int idx" for reprogram_fixed_counter()") Cc: stable@vger.kernel.org Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20240123221220.3911317-1-mizhang@google.com [sean: rewrite changelog to call out the effects of the bug] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/pmu_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9a75a0d5deae1..220cdbe1e286e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -38,7 +38,7 @@ static int fixed_pmc_events[] = {1, 0, 7}; static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) { struct kvm_pmc *pmc; - u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; + u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; int i; pmu->fixed_ctr_ctrl = data; -- GitLab From aedcefae6c18f6d7b2c75c1a40d5731004929621 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 26 Jan 2024 10:48:41 -0600 Subject: [PATCH 0195/2290] x86/mm/ident_map: Use gbpages only where full GB page should be mapped. commit d794734c9bbfe22f86686dc2909c25f5ffe1a572 upstream. When ident_pud_init() uses only gbpages to create identity maps, large ranges of addresses not actually requested can be included in the resulting table; a 4K request will map a full GB. On UV systems, this ends up including regions that will cause hardware to halt the system if accessed (these are marked "reserved" by BIOS). Even processor speculation into these regions is enough to trigger the system halt. Only use gbpages when map creation requests include the full GB page of space. Fall back to using smaller 2M pages when only portions of a GB page are included in the request. No attempt is made to coalesce mapping requests. If a request requires a map entry at the 2M (pmd) level, subsequent mapping requests within the same 1G region will also be at the pmd level, even if adjacent or overlapping such requests could have been combined to map a full gbpage. Existing usage starts with larger regions and then adds smaller regions, so this should not have any great consequence. [ dhansen: fix up comment formatting, simplifty changelog ] Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240126164841.170866-1-steve.wahl%40hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/ident_map.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 968d7005f4a72..f50cc210a9818 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; + bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - if (info->direct_gbpages) { - pud_t pudval; + /* if this is already a gbpage, this portion is already mapped */ + if (pud_large(*pud)) + continue; + + /* Is using a gbpage allowed? */ + use_gbpage = info->direct_gbpages; - if (pud_present(*pud)) - continue; + /* Don't use gbpage if it maps more than the requested region. */ + /* at the begining: */ + use_gbpage &= ((addr & ~PUD_MASK) == 0); + /* ... or at the end: */ + use_gbpage &= ((next & ~PUD_MASK) == 0); + + /* Never overwrite existing mappings */ + use_gbpage &= !pud_present(*pud); + + if (use_gbpage) { + pud_t pudval; - addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; -- GitLab From eae748df18ed25fc155964a51a1533eea29baae7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Feb 2024 08:23:05 -0700 Subject: [PATCH 0196/2290] io_uring/net: fix multishot accept overflow handling commit a37ee9e117ef73bbc2f5c0b31911afd52d229861 upstream. If we hit CQ ring overflow when attempting to post a multishot accept completion, we don't properly save the result or return code. This results in losing the accepted fd value. Instead, we return the result from the poll operation that triggered the accept retry. This is generally POLLIN|POLLPRI|POLLRDNORM|POLLRDBAND which is 0xc3, or 195, which looks like a valid file descriptor, but it really has no connection to that. Handle this like we do for other multishot completions - assign the result, and return IOU_STOP_MULTISHOT to cancel any further completions from this request when overflow is hit. This preserves the result, as we should, and tells the application that the request needs to be re-armed. Cc: stable@vger.kernel.org Fixes: 515e26961295 ("io_uring: revert "io_uring fix multishot accept ordering"") Link: https://github.com/axboe/liburing/issues/1062 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 618ab186fe036..c062ce66af12c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1326,7 +1326,7 @@ retry: * has already been done */ if (issue_flags & IO_URING_F_MULTISHOT) - ret = IOU_ISSUE_SKIP_COMPLETE; + return IOU_ISSUE_SKIP_COMPLETE; return ret; } if (ret == -ERESTARTSYS) @@ -1350,7 +1350,8 @@ retry: if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) goto retry; - return -ECANCELED; + io_req_set_res(req, ret, 0); + return IOU_STOP_MULTISHOT; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -- GitLab From a943c7fbdfebb35a0f58fff94c018ee0e7b3482c Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 6 Feb 2024 09:39:12 +0100 Subject: [PATCH 0197/2290] mmc: slot-gpio: Allow non-sleeping GPIO ro commit cc9432c4fb159a3913e0ce3173b8218cd5bad2e0 upstream. This change uses the appropriate _cansleep or non-sleeping API for reading GPIO read-only state. This allows users with GPIOs that never sleepbeing called in atomic context. Implement the same mechanism as in commit 52af318c93e97 ("mmc: Allow non-sleeping GPIO cd"). Signed-off-by: Alexander Stein Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240206083912.2543142-1-alexander.stein@ew.tq-group.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/slot-gpio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index dd2a4b6ab6adb..e3c69c6b85a6c 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -62,11 +62,15 @@ int mmc_gpio_alloc(struct mmc_host *host) int mmc_gpio_get_ro(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; + int cansleep; if (!ctx || !ctx->ro_gpio) return -ENOSYS; - return gpiod_get_value_cansleep(ctx->ro_gpio); + cansleep = gpiod_cansleep(ctx->ro_gpio); + return cansleep ? + gpiod_get_value_cansleep(ctx->ro_gpio) : + gpiod_get_value(ctx->ro_gpio); } EXPORT_SYMBOL(mmc_gpio_get_ro); -- GitLab From 53e8abc14e579c87c91d10db88d9c34b0d295361 Mon Sep 17 00:00:00 2001 From: Eniac Zhang Date: Thu, 15 Feb 2024 15:49:22 +0000 Subject: [PATCH 0198/2290] ALSA: hda/realtek: fix mute/micmute LED For HP mt645 commit 32f03f4002c5df837fb920eb23fcd2f4af9b0b23 upstream. The HP mt645 G7 Thin Client uses an ALC236 codec and needs the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make the mute and micmute LEDs work. There are two variants of the USB-C PD chip on this device. Each uses a different BIOS and board ID, hence the two entries. Signed-off-by: Eniac Zhang Signed-off-by: Alexandru Gagniuc Cc: Link: https://lore.kernel.org/r/20240215154922.778394-1-alexandru.gagniuc@hp.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 153a9605d3a97..92a656fb53212 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9690,6 +9690,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -9697,6 +9698,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), -- GitLab From 35076e3fb4b8cbe9f24e1b33c4985cbd5ac59c16 Mon Sep 17 00:00:00 2001 From: bo liu Date: Mon, 5 Feb 2024 09:38:02 +0800 Subject: [PATCH 0199/2290] ALSA: hda/conexant: Add quirk for SWS JS201D commit 4639c5021029d49fd2f97fa8d74731f167f98919 upstream. The SWS JS201D need a different pinconfig from windows driver. Add a quirk to use a specific pinconfig to SWS JS201D. Signed-off-by: bo liu Cc: Link: https://lore.kernel.org/r/20240205013802.51907-1-bo.liu@senarytech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index e8819e8a98763..e8209178d87bb 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -344,6 +344,7 @@ enum { CXT_FIXUP_HP_ZBOOK_MUTE_LED, CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, + CXT_PINCFG_SWS_JS201D, }; /* for hda_fixup_thinkpad_acpi() */ @@ -841,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = { {} }; +/* SuoWoSi/South-holding JS201D with sn6140 */ +static const struct hda_pintbl cxt_pincfg_sws_js201d[] = { + { 0x16, 0x03211040 }, /* hp out */ + { 0x17, 0x91170110 }, /* SPK/Class_D */ + { 0x18, 0x95a70130 }, /* Internal mic */ + { 0x19, 0x03a11020 }, /* Headset Mic */ + { 0x1a, 0x40f001f0 }, /* Not used */ + { 0x21, 0x40f001f0 }, /* Not used */ + {} +}; + static const struct hda_fixup cxt_fixups[] = { [CXT_PINCFG_LENOVO_X200] = { .type = HDA_FIXUP_PINS, @@ -996,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = { .chained = true, .chain_id = CXT_FIXUP_HEADSET_MIC, }, + [CXT_PINCFG_SWS_JS201D] = { + .type = HDA_FIXUP_PINS, + .v.pins = cxt_pincfg_sws_js201d, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -1069,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), + SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410), @@ -1109,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, + { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" }, {} }; -- GitLab From 9c9c68d64fd3284f7097ed6ae057c8441f39fcd3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 24 Jan 2024 21:19:36 +0900 Subject: [PATCH 0200/2290] nilfs2: fix data corruption in dsync block recovery for small block sizes commit 67b8bcbaed4777871bb0dcc888fb02a614a98ab1 upstream. The helper function nilfs_recovery_copy_block() of nilfs_recovery_dsync_blocks(), which recovers data from logs created by data sync writes during a mount after an unclean shutdown, incorrectly calculates the on-page offset when copying repair data to the file's page cache. In environments where the block size is smaller than the page size, this flaw can cause data corruption and leak uninitialized memory bytes during the recovery process. Fix these issues by correcting this byte offset calculation on the page. Link: https://lkml.kernel.org/r/20240124121936.10575-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/recovery.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 0955b657938ff..a9b8d77c8c1d5 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, - struct page *page) + loff_t pos, struct page *page) { struct buffer_head *bh_org; + size_t from = pos & ~PAGE_MASK; void *kaddr; bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); @@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, return -EIO; kaddr = kmap_atomic(page); - memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); + memcpy(kaddr + from, bh_org->b_data, bh_org->b_size); kunmap_atomic(kaddr); brelse(bh_org); return 0; @@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, page); if (unlikely(err)) goto failed_page; -- GitLab From 8494ba2c9ea00a54d5b50e69b22c55a8958bce32 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 31 Jan 2024 23:56:57 +0900 Subject: [PATCH 0201/2290] nilfs2: fix hang in nilfs_lookup_dirty_data_buffers() commit 38296afe3c6ee07319e01bb249aa4bb47c07b534 upstream. Syzbot reported a hang issue in migrate_pages_batch() called by mbind() and nilfs_lookup_dirty_data_buffers() called in the log writer of nilfs2. While migrate_pages_batch() locks a folio and waits for the writeback to complete, the log writer thread that should bring the writeback to completion picks up the folio being written back in nilfs_lookup_dirty_data_buffers() that it calls for subsequent log creation and was trying to lock the folio. Thus causing a deadlock. In the first place, it is unexpected that folios/pages in the middle of writeback will be updated and become dirty. Nilfs2 adds a checksum to verify the validity of the log being written and uses it for recovery at mount, so data changes during writeback are suppressed. Since this is broken, an unclean shutdown could potentially cause recovery to fail. Investigation revealed that the root cause is that the wait for writeback completion in nilfs_page_mkwrite() is conditional, and if the backing device does not require stable writes, data may be modified without waiting. Fix these issues by making nilfs_page_mkwrite() wait for writeback to finish regardless of the stable write requirement of the backing device. Link: https://lkml.kernel.org/r/20240131145657.4209-1-konishi.ryusuke@gmail.com Fixes: 1d1d1a767206 ("mm: only enforce stable page writes if the backing device requires it") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+ee2ae68da3b22d04cd8d@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/00000000000047d819061004ad6c@google.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a265d391ffe92..822e8d95d31ef 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -105,7 +105,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - wait_for_stable_page(page); + /* + * Since checksumming including data blocks is performed to determine + * the validity of the log to be written and used for recovery, it is + * necessary to wait for writeback to finish here, regardless of the + * stable write requirement of the backing device. + */ + wait_on_page_writeback(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); -- GitLab From 8731fe001a60581794ed9cf65da8cd304846a6fb Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 25 Jan 2024 17:12:53 -0600 Subject: [PATCH 0202/2290] crypto: ccp - Fix null pointer dereference in __sev_platform_shutdown_locked commit ccb88e9549e7cfd8bcd511c538f437e20026e983 upstream. The SEV platform device can be shutdown with a null psp_master, e.g., using DEBUG_TEST_DRIVER_REMOVE. Found using KASAN: [ 137.148210] ccp 0000:23:00.1: enabling device (0000 -> 0002) [ 137.162647] ccp 0000:23:00.1: no command queues available [ 137.170598] ccp 0000:23:00.1: sev enabled [ 137.174645] ccp 0000:23:00.1: psp enabled [ 137.178890] general protection fault, probably for non-canonical address 0xdffffc000000001e: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN NOPTI [ 137.182693] KASAN: null-ptr-deref in range [0x00000000000000f0-0x00000000000000f7] [ 137.182693] CPU: 93 PID: 1 Comm: swapper/0 Not tainted 6.8.0-rc1+ #311 [ 137.182693] RIP: 0010:__sev_platform_shutdown_locked+0x51/0x180 [ 137.182693] Code: 08 80 3c 08 00 0f 85 0e 01 00 00 48 8b 1d 67 b6 01 08 48 b8 00 00 00 00 00 fc ff df 48 8d bb f0 00 00 00 48 89 f9 48 c1 e9 03 <80> 3c 01 00 0f 85 fe 00 00 00 48 8b 9b f0 00 00 00 48 85 db 74 2c [ 137.182693] RSP: 0018:ffffc900000cf9b0 EFLAGS: 00010216 [ 137.182693] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 000000000000001e [ 137.182693] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 00000000000000f0 [ 137.182693] RBP: ffffc900000cf9c8 R08: 0000000000000000 R09: fffffbfff58f5a66 [ 137.182693] R10: ffffc900000cf9c8 R11: ffffffffac7ad32f R12: ffff8881e5052c28 [ 137.182693] R13: ffff8881e5052c28 R14: ffff8881758e43e8 R15: ffffffffac64abf8 [ 137.182693] FS: 0000000000000000(0000) GS:ffff889de7000000(0000) knlGS:0000000000000000 [ 137.182693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 137.182693] CR2: 0000000000000000 CR3: 0000001cf7c7e000 CR4: 0000000000350ef0 [ 137.182693] Call Trace: [ 137.182693] [ 137.182693] ? show_regs+0x6c/0x80 [ 137.182693] ? __die_body+0x24/0x70 [ 137.182693] ? die_addr+0x4b/0x80 [ 137.182693] ? exc_general_protection+0x126/0x230 [ 137.182693] ? asm_exc_general_protection+0x2b/0x30 [ 137.182693] ? __sev_platform_shutdown_locked+0x51/0x180 [ 137.182693] sev_firmware_shutdown.isra.0+0x1e/0x80 [ 137.182693] sev_dev_destroy+0x49/0x100 [ 137.182693] psp_dev_destroy+0x47/0xb0 [ 137.182693] sp_destroy+0xbb/0x240 [ 137.182693] sp_pci_remove+0x45/0x60 [ 137.182693] pci_device_remove+0xaa/0x1d0 [ 137.182693] device_remove+0xc7/0x170 [ 137.182693] really_probe+0x374/0xbe0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] __driver_probe_device+0x199/0x460 [ 137.182693] driver_probe_device+0x4e/0xd0 [ 137.182693] __driver_attach+0x191/0x3d0 [ 137.182693] ? __pfx___driver_attach+0x10/0x10 [ 137.182693] bus_for_each_dev+0x100/0x190 [ 137.182693] ? __pfx_bus_for_each_dev+0x10/0x10 [ 137.182693] ? __kasan_check_read+0x15/0x20 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? _raw_spin_unlock+0x27/0x50 [ 137.182693] driver_attach+0x41/0x60 [ 137.182693] bus_add_driver+0x2a8/0x580 [ 137.182693] driver_register+0x141/0x480 [ 137.182693] __pci_register_driver+0x1d6/0x2a0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? esrt_sysfs_init+0x1cd/0x5d0 [ 137.182693] ? __pfx_sp_mod_init+0x10/0x10 [ 137.182693] sp_pci_init+0x22/0x30 [ 137.182693] sp_mod_init+0x14/0x30 [ 137.182693] ? __pfx_sp_mod_init+0x10/0x10 [ 137.182693] do_one_initcall+0xd1/0x470 [ 137.182693] ? __pfx_do_one_initcall+0x10/0x10 [ 137.182693] ? parameq+0x80/0xf0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? __kmalloc+0x3b0/0x4e0 [ 137.182693] ? kernel_init_freeable+0x92d/0x1050 [ 137.182693] ? kasan_populate_vmalloc_pte+0x171/0x190 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] kernel_init_freeable+0xa64/0x1050 [ 137.182693] ? __pfx_kernel_init+0x10/0x10 [ 137.182693] kernel_init+0x24/0x160 [ 137.182693] ? __switch_to_asm+0x3e/0x70 [ 137.182693] ret_from_fork+0x40/0x80 [ 137.182693] ? __pfx_kernel_init+0x10/0x10 [ 137.182693] ret_from_fork_asm+0x1b/0x30 [ 137.182693] [ 137.182693] Modules linked in: [ 137.538483] ---[ end trace 0000000000000000 ]--- Fixes: 1b05ece0c931 ("crypto: ccp - During shutdown, check SEV data pointer before using") Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Signed-off-by: Kim Phillips Reviewed-by: Liam Merwick Acked-by: John Allen Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/sev-dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index b8e02c3a19610..bbfb0f288dc35 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -515,10 +515,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init); static int __sev_platform_shutdown_locked(int *error) { - struct sev_device *sev = psp_master->sev_data; + struct psp_device *psp = psp_master; + struct sev_device *sev; int ret; - if (!sev || sev->state == SEV_STATE_UNINIT) + if (!psp || !psp->sev_data) + return 0; + + sev = psp->sev_data; + + if (sev->state == SEV_STATE_UNINIT) return 0; ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); -- GitLab From 57b8478c103ab9992c2fbddc0c6db83f05587ab8 Mon Sep 17 00:00:00 2001 From: Daniel Basilio Date: Fri, 2 Feb 2024 13:37:17 +0200 Subject: [PATCH 0203/2290] nfp: use correct macro for LengthSelect in BAR config commit b3d4f7f2288901ed2392695919b3c0e24c1b4084 upstream. The 1st and 2nd expansion BAR configuration registers are configured, when the driver starts up, in variables 'barcfg_msix_general' and 'barcfg_msix_xpb', respectively. The 'LengthSelect' field is ORed in from bit 0, which is incorrect. The 'LengthSelect' field should start from bit 27. This has largely gone un-noticed because NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT happens to be 0. Fixes: 4cb584e0ee7d ("nfp: add CPP access core") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Daniel Basilio Signed-off-by: Louis Peens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 33b4c28563162..3f10c5365c80e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) const u32 barcfg_msix_general = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT; + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT); const u32 barcfg_msix_xpb = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT | + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) | NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress( NFP_CPP_TARGET_ISLAND_XPB); const u32 barcfg_explicit[4] = { -- GitLab From 685fc1711cdaf879c352a6a1d8787415c000b831 Mon Sep 17 00:00:00 2001 From: Daniel de Villiers Date: Fri, 2 Feb 2024 13:37:18 +0200 Subject: [PATCH 0204/2290] nfp: flower: prevent re-adding mac index for bonded port commit 1a1c13303ff6d64e6f718dc8aa614e580ca8d9b4 upstream. When physical ports are reset (either through link failure or manually toggled down and up again) that are slaved to a Linux bond with a tunnel endpoint IP address on the bond device, not all tunnel packets arriving on the bond port are decapped as expected. The bond dev assigns the same MAC address to itself and each of its slaves. When toggling a slave device, the same MAC address is therefore offloaded to the NFP multiple times with different indexes. The issue only occurs when re-adding the shared mac. The nfp_tunnel_add_shared_mac() function has a conditional check early on that checks if a mac entry already exists and if that mac entry is global: (entry && nfp_tunnel_is_mac_idx_global(entry->index)). In the case of a bonded device (For example br-ex), the mac index is obtained, and no new index is assigned. We therefore modify the conditional in nfp_tunnel_add_shared_mac() to check if the port belongs to the LAG along with the existing checks to prevent a new global mac index from being re-assigned to the slave port. Fixes: 20cce8865098 ("nfp: flower: enable MAC address sharing for offloadable devs") CC: stable@vger.kernel.org # 5.1+ Signed-off-by: Daniel de Villiers Signed-off-by: Louis Peens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 52f67157bd0f7..a3c52c91a575d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -980,7 +980,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); - if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { + if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) { if (entry->bridge_count || !nfp_flower_is_supported_bridge(netdev)) { nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, -- GitLab From 6c84dbe8f8faf1c96a4409accfa8f0337ef2d5d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Jan 2024 09:51:09 +0100 Subject: [PATCH 0205/2290] wifi: cfg80211: fix wiphy delayed work queueing commit b743287d7a0007493f5cada34ed2085d475050b4 upstream. When a wiphy work is queued with timer, and then again without a delay, it's started immediately but *also* started again after the timer expires. This can lead, for example, to warnings in mac80211's offchannel code as reported by Jouni. Running the same work twice isn't expected, of course. Fix this by deleting the timer at this point, when queuing immediately due to delay=0. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Link: https://msgid.link/20240125095108.2feb0eaaa446.I4617f3210ed0e7f252290d5970dac6a876aa595b@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 8809e668ed912..3fcddc8687ed4 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1671,6 +1671,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, unsigned long delay) { if (!delay) { + del_timer(&dwork->timer); wiphy_work_queue(wiphy, &dwork->work); return; } -- GitLab From 783912cbce88be94ac2f9d4c256bd61e708814ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:49:10 +0100 Subject: [PATCH 0206/2290] wifi: mac80211: reload info pointer in ieee80211_tx_dequeue() commit c98d8836b817d11fdff4ca7749cbbe04ff7f0c64 upstream. This pointer can change here since the SKB can change, so we actually later open-coded IEEE80211_SKB_CB() again. Reload the pointer where needed, so the monitor-mode case using it gets fixed, and then use info-> later as well. Cc: stable@vger.kernel.org Fixes: 531682159092 ("mac80211: fix VLAN handling with TXQs") Link: https://msgid.link/20240131164910.b54c28d583bc.I29450cec84ea6773cff5d9c16ff92b836c331471@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2db103a56a28f..322a035f75929 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Transmit and frame generation functions. */ @@ -3838,6 +3838,7 @@ begin: goto begin; skb = __skb_dequeue(&tx.skbs); + info = IEEE80211_SKB_CB(skb); if (!skb_queue_empty(&tx.skbs)) { spin_lock_bh(&fq->lock); @@ -3882,7 +3883,7 @@ begin: } encap_out: - IEEE80211_SKB_CB(skb)->control.vif = vif; + info->control.vif = vif; if (tx.sta && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { -- GitLab From 659311f593188a3d6c6adcb7d9316993f9431a91 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Fri, 9 Feb 2024 17:24:49 -0800 Subject: [PATCH 0207/2290] irqchip/irq-brcmstb-l2: Add write memory barrier before exit commit b0344d6854d25a8b3b901c778b1728885dd99007 upstream. It was observed on Broadcom devices that use GIC v3 architecture L1 interrupt controllers as the parent of brcmstb-l2 interrupt controllers that the deactivation of the parent interrupt could happen before the brcmstb-l2 deasserted its output. This would lead the GIC to reactivate the interrupt only to find that no L2 interrupt was pending. The result was a spurious interrupt invoking handle_bad_irq() with its associated messaging. While this did not create a functional problem it is a waste of cycles. The hazard exists because the memory mapped bus writes to the brcmstb-l2 registers are buffered and the GIC v3 architecture uses a very efficient system register write to deactivate the interrupt. Add a write memory barrier prior to invoking chained_irq_exit() to introduce a dsb(st) on those systems to ensure the system register write cannot be executed until the memory mapped writes are visible to the system. [ florian: Added Fixes tag ] Fixes: 7f646e92766e ("irqchip: brcmstb-l2: Add Broadcom Set Top Box Level-2 interrupt controller") Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli Signed-off-by: Thomas Gleixner Acked-by: Florian Fainelli Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240210012449.3009125-1-florian.fainelli@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-brcmstb-l2.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index 091b0fe7e3242..5d4421f75b43a 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -2,7 +2,7 @@ /* * Generic Broadcom Set Top Box Level 2 Interrupt controller driver * - * Copyright (C) 2014-2017 Broadcom + * Copyright (C) 2014-2024 Broadcom */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -113,6 +113,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc) generic_handle_domain_irq(b->domain, irq); } while (status); out: + /* Don't ack parent before all device writes are done */ + wmb(); + chained_irq_exit(chip, desc); } -- GitLab From ce2b826582f5e8f802ca6314b7a1ae5c4a27fe08 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Feb 2024 10:12:06 +0000 Subject: [PATCH 0208/2290] irqchip/gic-v3-its: Fix GICv4.1 VPE affinity update commit af9acbfc2c4b72c378d0b9a2ee023ed01055d3e2 upstream. When updating the affinity of a VPE, the VMOVP command is currently skipped if the two CPUs are part of the same VPE affinity. But this is wrong, as the doorbell corresponding to this VPE is still delivered on the 'old' CPU, which screws up the balancing. Furthermore, offlining that 'old' CPU results in doorbell interrupts generated for this VPE being discarded. The harsh reality is that VMOVP cannot be elided when a set_affinity() request occurs. It needs to be obeyed, and if an optimisation is to be made, it is at the point where the affinity change request is made (such as in KVM). Drop the VMOVP elision altogether, and only use the vpe_table_mask to try and stay within the same ITS affinity group if at all possible. Fixes: dd3f050a216e (irqchip/gic-v4.1: Implement the v4.1 flavour of VMOVP) Reported-by: Kunkun Jiang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240213101206.2137483-4-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 8956881503d9a..b83b39e93e1a9 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3805,8 +3805,9 @@ static int its_vpe_set_affinity(struct irq_data *d, bool force) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - int from, cpu = cpumask_first(mask_val); + struct cpumask common, *table_mask; unsigned long flags; + int from, cpu; /* * Changing affinity is mega expensive, so let's be as lazy as @@ -3822,19 +3823,22 @@ static int its_vpe_set_affinity(struct irq_data *d, * taken on any vLPI handling path that evaluates vpe->col_idx. */ from = vpe_to_cpuid_lock(vpe, &flags); - if (from == cpu) - goto out; - - vpe->col_idx = cpu; + table_mask = gic_data_rdist_cpu(from)->vpe_table_mask; /* - * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD - * is sharing its VPE table with the current one. + * If we are offered another CPU in the same GICv4.1 ITS + * affinity, pick this one. Otherwise, any CPU will do. */ - if (gic_data_rdist_cpu(cpu)->vpe_table_mask && - cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask)) + if (table_mask && cpumask_and(&common, mask_val, table_mask)) + cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common); + else + cpu = cpumask_first(mask_val); + + if (from == cpu) goto out; + vpe->col_idx = cpu; + its_send_vmovp(vpe); its_vpe_db_proxy_move(vpe, from, cpu); -- GitLab From 09fad23a1a3249088e8f840685d10ae1650519a9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 8 Feb 2024 17:26:59 +0900 Subject: [PATCH 0209/2290] zonefs: Improve error handling commit 14db5f64a971fce3d8ea35de4dfc7f443a3efb92 upstream. Write error handling is racy and can sometime lead to the error recovery path wrongly changing the inode size of a sequential zone file to an incorrect value which results in garbage data being readable at the end of a file. There are 2 problems: 1) zonefs_file_dio_write() updates a zone file write pointer offset after issuing a direct IO with iomap_dio_rw(). This update is done only if the IO succeed for synchronous direct writes. However, for asynchronous direct writes, the update is done without waiting for the IO completion so that the next asynchronous IO can be immediately issued. However, if an asynchronous IO completes with a failure right before the i_truncate_mutex lock protecting the update, the update may change the value of the inode write pointer offset that was corrected by the error path (zonefs_io_error() function). 2) zonefs_io_error() is called when a read or write error occurs. This function executes a report zone operation using the callback function zonefs_io_error_cb(), which does all the error recovery handling based on the current zone condition, write pointer position and according to the mount options being used. However, depending on the zoned device being used, a report zone callback may be executed in a context that is different from the context of __zonefs_io_error(). As a result, zonefs_io_error_cb() may be executed without the inode truncate mutex lock held, which can lead to invalid error processing. Fix both problems as follows: - Problem 1: Perform the inode write pointer offset update before a direct write is issued with iomap_dio_rw(). This is safe to do as partial direct writes are not supported (IOMAP_DIO_PARTIAL is not set) and any failed IO will trigger the execution of zonefs_io_error() which will correct the inode write pointer offset to reflect the current state of the one on the device. - Problem 2: Change zonefs_io_error_cb() into zonefs_handle_io_error() and call this function directly from __zonefs_io_error() after obtaining the zone information using blkdev_report_zones() with a simple callback function that copies to a local stack variable the struct blk_zone obtained from the device. This ensures that error handling is performed holding the inode truncate mutex. This change also simplifies error handling for conventional zone files by bypassing the execution of report zones entirely. This is safe to do because the condition of conventional zones cannot be read-only or offline and conventional zone files are always fully mapped with a constant file size. Reported-by: Shin'ichiro Kawasaki Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Tested-by: Shin'ichiro Kawasaki Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani Signed-off-by: Greg Kroah-Hartman --- fs/zonefs/file.c | 42 +++++++++++++++++++----------- fs/zonefs/super.c | 66 +++++++++++++++++++++++++++-------------------- 2 files changed, 65 insertions(+), 43 deletions(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 63cd50840419c..8d5f4a5a74e65 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -349,7 +349,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, struct zonefs_inode_info *zi = ZONEFS_I(inode); if (error) { - zonefs_io_error(inode, true); + /* + * For Sync IOs, error recovery is called from + * zonefs_file_dio_write(). + */ + if (!is_sync_kiocb(iocb)) + zonefs_io_error(inode, true); return error; } @@ -577,6 +582,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EINVAL; goto inode_unlock; } + /* + * Advance the zone write pointer offset. This assumes that the + * IO will succeed, which is OK to do because we do not allow + * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO + * fails, the error path will correct the write pointer offset. + */ + z->z_wpoffset += count; + zonefs_inode_account_active(inode); mutex_unlock(&zi->i_truncate_mutex); append = sync; } @@ -596,20 +609,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EBUSY; } - if (zonefs_zone_is_seq(z) && - (ret > 0 || ret == -EIOCBQUEUED)) { - if (ret > 0) - count = ret; - - /* - * Update the zone write pointer offset assuming the write - * operation succeeded. If it did not, the error recovery path - * will correct it. Also do active seq file accounting. - */ - mutex_lock(&zi->i_truncate_mutex); - z->z_wpoffset += count; - zonefs_inode_account_active(inode); - mutex_unlock(&zi->i_truncate_mutex); + /* + * For a failed IO or partial completion, trigger error recovery + * to update the zone write pointer offset to a correct value. + * For asynchronous IOs, zonefs_file_write_dio_end_io() may already + * have executed error recovery if the IO already completed when we + * reach here. However, we cannot know that and execute error recovery + * again (that will not change anything). + */ + if (zonefs_zone_is_seq(z)) { + if (ret > 0 && ret != count) + ret = -EIO; + if (ret < 0 && ret != -EIOCBQUEUED) + zonefs_io_error(inode, true); } inode_unlock: diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 270ded209dde5..f6b701261078c 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -245,16 +245,18 @@ static void zonefs_inode_update_mode(struct inode *inode) z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; } -struct zonefs_ioerr_data { - struct inode *inode; - bool write; -}; - static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, void *data) { - struct zonefs_ioerr_data *err = data; - struct inode *inode = err->inode; + struct blk_zone *z = data; + + *z = *zone; + return 0; +} + +static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, + bool write) +{ struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); @@ -269,8 +271,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(sb, z, zone); isize = i_size_read(inode); if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && - !err->write && isize == data_size) - return 0; + !write && isize == data_size) + return; /* * At this point, we detected either a bad zone or an inconsistency @@ -291,7 +293,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, * In all cases, warn about inode size inconsistency and handle the * IO error according to the zone condition and to the mount options. */ - if (zonefs_zone_is_seq(z) && isize != data_size) + if (isize != data_size) zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", inode->i_ino, isize, data_size); @@ -351,8 +353,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, zonefs_i_size_write(inode, data_size); z->z_wpoffset = data_size; zonefs_inode_account_active(inode); - - return 0; } /* @@ -366,23 +366,25 @@ void __zonefs_io_error(struct inode *inode, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; - struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = 1; - struct zonefs_ioerr_data err = { - .inode = inode, - .write = write, - }; + struct blk_zone zone; int ret; /* - * The only files that have more than one zone are conventional zone - * files with aggregated conventional zones, for which the inode zone - * size is always larger than the device zone size. + * Conventional zone have no write pointer and cannot become read-only + * or offline. So simply fake a report for a single or aggregated zone + * and let zonefs_handle_io_error() correct the zone inode information + * according to the mount options. */ - if (z->z_size > bdev_zone_sectors(sb->s_bdev)) - nr_zones = z->z_size >> - (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + if (!zonefs_zone_is_seq(z)) { + zone.start = z->z_sector; + zone.len = z->z_size >> SECTOR_SHIFT; + zone.wp = zone.start + zone.len; + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + zone.cond = BLK_ZONE_COND_NOT_WP; + zone.capacity = zone.len; + goto handle_io_error; + } /* * Memory allocations in blkdev_report_zones() can trigger a memory @@ -393,12 +395,20 @@ void __zonefs_io_error(struct inode *inode, bool write) * the GFP_NOIO context avoids both problems. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, - zonefs_io_error_cb, &err); - if (ret != nr_zones) + ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, + zonefs_io_error_cb, &zone); + memalloc_noio_restore(noio_flag); + + if (ret != 1) { zonefs_err(sb, "Get inode %lu zone information failed %d\n", inode->i_ino, ret); - memalloc_noio_restore(noio_flag); + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + return; + } + +handle_io_error: + zonefs_handle_io_error(inode, &zone, write); } static struct kmem_cache *zonefs_inode_cachep; -- GitLab From 00f9fcc0a109c01b62550abaf063f05635f649fb Mon Sep 17 00:00:00 2001 From: Fred Ai Date: Sat, 3 Feb 2024 02:29:08 -0800 Subject: [PATCH 0210/2290] mmc: sdhci-pci-o2micro: Fix a warm reboot issue that disk can't be detected by BIOS commit 58aeb5623c2ebdadefe6352b14f8076a7073fea0 upstream. Driver shall switch clock source from DLL clock to OPE clock when power off card to ensure that card can be identified with OPE clock by BIOS. Signed-off-by: Fred Ai Fixes:4be33cf18703 ("mmc: sdhci-pci-o2micro: Improve card input timing at SDR104/HS200 mode") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240203102908.4683-1-fredaibayhubtech@126.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-o2micro.c | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index bca1d095b7597..24bb0e9809e76 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -602,6 +602,35 @@ static void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_o2_enable_clk(host, clk); } +static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot = sdhci_priv(host); + u32 scratch_32 = 0; + u8 scratch_8 = 0; + + chip = slot->chip; + + if (mode == MMC_POWER_OFF) { + /* UnLock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 &= 0x7f; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + + /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */ + pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32); + scratch_32 &= ~(O2_SD_SEL_DLL); + pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32); + + /* Lock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 |= 0x80; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + } + + sdhci_set_power(host, mode, vdd); +} + static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot) { struct sdhci_pci_chip *chip; @@ -911,6 +940,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_power = sdhci_pci_o2_set_power, }; const struct sdhci_pci_fixes sdhci_o2 = { -- GitLab From fcf62f94ad80c3c3b602346ca74665d2ee2386a3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 5 Feb 2024 15:48:53 -0600 Subject: [PATCH 0211/2290] ASoC: amd: yc: Add DMI quirk for Lenovo Ideapad Pro 5 16ARP8 commit 610010737f74482a61896596a0116876ecf9e65c upstream. The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: Stanislav Petrov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216925 Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240205214853.2689-1-mario.limonciello@amd.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 3b43595aa87a7..28da4e1858d7e 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -241,6 +241,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82YM"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83AS"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From d627693e5a55f3e15369f28406da08447d118597 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:32 +0100 Subject: [PATCH 0212/2290] tools/rtla: Remove unused sched_getattr() function commit 084ce16df0f060efd371092a09a7ae74a536dc11 upstream. Clang is reporting: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [...] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c src/utils.c:241:19: warning: unused function 'sched_getattr' [-Wunused-function] 241 | static inline int sched_getattr(pid_t pid, struct sched_attr *attr, | ^~~~~~~~~~~~~ 1 warning generated. Which is correct, so remove the unused function. Link: https://lkml.kernel.org/r/eaed7ba122c4ae88ce71277c824ef41cbf789385.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Cc: Donald Zickus Fixes: b1696371d865 ("rtla: Helper functions for rtla") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/utils.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 663a047f794d2..c9c4af0ad267d 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -243,12 +243,6 @@ static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, return syscall(__NR_sched_setattr, pid, attr, flags); } -static inline int sched_getattr(pid_t pid, struct sched_attr *attr, - unsigned int size, unsigned int flags) -{ - return syscall(__NR_sched_getattr, pid, attr, size, flags); -} - int __set_sched_attr(int pid, struct sched_attr *attr) { int flags = 0; -- GitLab From 771b74ce921269bbbcca1505a288ee1a174eb0ef Mon Sep 17 00:00:00 2001 From: limingming3 Date: Wed, 7 Feb 2024 14:51:42 +0800 Subject: [PATCH 0213/2290] tools/rtla: Replace setting prio with nice for SCHED_OTHER commit 14f08c976ffe0d2117c6199c32663df1cbc45c65 upstream. Since the sched_priority for SCHED_OTHER is always 0, it makes no sence to set it. Setting nice for SCHED_OTHER seems more meaningful. Link: https://lkml.kernel.org/r/20240207065142.1753909-1-limingming3@lixiang.com Cc: stable@vger.kernel.org Fixes: b1696371d865 ("rtla: Helper functions for rtla") Signed-off-by: limingming3 Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/utils.c | 6 +++--- tools/tracing/rtla/src/utils.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index c9c4af0ad267d..8c8d63c7196cf 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -478,13 +478,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param) if (prio == INVALID_VAL) return -1; - if (prio < sched_get_priority_min(SCHED_OTHER)) + if (prio < MIN_NICE) return -1; - if (prio > sched_get_priority_max(SCHED_OTHER)) + if (prio > MAX_NICE) return -1; sched_param->sched_policy = SCHED_OTHER; - sched_param->sched_priority = prio; + sched_param->sched_nice = prio; break; default: return -1; diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index 5571afd3b5498..92da41aaf4c4c 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -7,6 +7,8 @@ */ #define BUFF_U64_STR_SIZE 24 #define MAX_PATH 1024 +#define MAX_NICE 20 +#define MIN_NICE -19 #define container_of(ptr, type, member)({ \ const typeof(((type *)0)->member) *__mptr = (ptr); \ -- GitLab From 5ccb527b66e6e021dc780561ed32c11adf767065 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Fri, 2 Feb 2024 19:16:07 -0500 Subject: [PATCH 0214/2290] tools/rtla: Exit with EXIT_SUCCESS when help is invoked commit b5f319360371087d52070d8f3fc7789e80ce69a6 upstream. Fix rtla so that the following commands exit with 0 when help is invoked rtla osnoise top -h rtla osnoise hist -h rtla timerlat top -h rtla timerlat hist -h Link: https://lore.kernel.org/linux-trace-devel/20240203001607.69703-1-jkacur@redhat.com Cc: stable@vger.kernel.org Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") Signed-off-by: John Kacur Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/osnoise_hist.c | 6 +++++- tools/tracing/rtla/src/osnoise_top.c | 6 +++++- tools/tracing/rtla/src/timerlat_hist.c | 6 +++++- tools/tracing/rtla/src/timerlat_top.c | 6 +++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index fe34452fc4ec0..ce49c352b666e 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -472,7 +472,11 @@ static void osnoise_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 76479bfb29224..6c07f360de72c 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -282,7 +282,11 @@ void osnoise_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 4b48af8a83096..cdfe848113456 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -475,7 +475,11 @@ static void timerlat_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 3342719352222..8fc0f6aa19dad 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -305,7 +305,11 @@ static void timerlat_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* -- GitLab From 4ee28d5a4f57f3556c228b9adbaf9049700497c4 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:30 +0100 Subject: [PATCH 0215/2290] tools/rtla: Fix uninitialized bucket/data->bucket_size warning commit 64dc40f7523369912d7adb22c8cb655f71610505 upstream. When compiling rtla with clang, I am getting the following warnings: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [..] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/osnoise_hist.o src/osnoise_hist.c src/osnoise_hist.c:138:6: warning: variable 'bucket' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] 138 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~ src/osnoise_hist.c:149:6: note: uninitialized use occurs here 149 | if (bucket < entries) | ^~~~~~ src/osnoise_hist.c:138:2: note: remove the 'if' if its condition is always true 138 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~~~~~~ 139 | bucket = duration / data->bucket_size; src/osnoise_hist.c:132:12: note: initialize the variable 'bucket' to silence this warning 132 | int bucket; | ^ | = 0 1 warning generated. [...] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/timerlat_hist.o src/timerlat_hist.c src/timerlat_hist.c:181:6: warning: variable 'bucket' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] 181 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~ src/timerlat_hist.c:204:6: note: uninitialized use occurs here 204 | if (bucket < entries) | ^~~~~~ src/timerlat_hist.c:181:2: note: remove the 'if' if its condition is always true 181 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~~~~~~ 182 | bucket = latency / data->bucket_size; src/timerlat_hist.c:175:12: note: initialize the variable 'bucket' to silence this warning 175 | int bucket; | ^ | = 0 1 warning generated. This is a legit warning, but data->bucket_size is always > 0 (see timerlat_hist_parse_args()), so the if is not necessary. Remove the unneeded if (data->bucket_size) to avoid the warning. Link: https://lkml.kernel.org/r/6e1b1665cd99042ae705b3e0fc410858c4c42346.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Cc: Donald Zickus Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") Fixes: 829a6c0b5698 ("rtla/osnoise: Add the hist mode") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/osnoise_hist.c | 3 +-- tools/tracing/rtla/src/timerlat_hist.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index ce49c352b666e..b9658f213cb55 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -129,8 +129,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, if (params->output_divisor) duration = duration / params->output_divisor; - if (data->bucket_size) - bucket = duration / data->bucket_size; + bucket = duration / data->bucket_size; total_duration = duration * count; diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index cdfe848113456..ed08295bfa12c 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -151,8 +151,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu, if (params->output_divisor) latency = latency / params->output_divisor; - if (data->bucket_size) - bucket = latency / data->bucket_size; + bucket = latency / data->bucket_size; if (!thread) { hist = data->hist[cpu].irq; -- GitLab From 3ff3e6a9363ab577ec720f6234c0661ea86be459 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:29 +0100 Subject: [PATCH 0216/2290] tools/rtla: Fix Makefile compiler options for clang commit bc4cbc9d260ba8358ca63662919f4bb223cb603b upstream. The following errors are showing up when compiling rtla with clang: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [...] clang -O -g -DVERSION=\"6.8.0-rc1\" -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c clang: warning: optimization flag '-ffat-lto-objects' is not supported [-Wignored-optimization-argument] warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] 1 warning generated. clang -o rtla -ggdb src/osnoise.o src/osnoise_hist.o src/osnoise_top.o src/rtla.o src/timerlat_aa.o src/timerlat.o src/timerlat_hist.o src/timerlat_top.o src/timerlat_u.o src/trace.o src/utils.o $(pkg-config --libs libtracefs) src/osnoise.o: file not recognized: file format not recognized clang: error: linker command failed with exit code 1 (use -v to see invocation) make: *** [Makefile:110: rtla] Error 1 Solve these issues by: - removing -ffat-lto-objects and -Wno-maybe-uninitialized if using clang - informing the linker about -flto=auto Link: https://lore.kernel.org/linux-trace-kernel/567ac1b94effc228ce9a0225b9df7232a9b35b55.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Fixes: 1a7b22ab15eb ("tools/rtla: Build with EXTRA_{C,LD}FLAGS") Suggested-by: Donald Zickus Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 22e28b76f8004..6912e9577b658 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ -fasynchronous-unwind-tables -fstack-clash-protection WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized +ifeq ($(CC),clang) + FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS)) + WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) +endif + TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -LDFLAGS := -ggdb $(EXTRA_LDFLAGS) +LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS) LIBS := $$($(PKG_CONFIG) --libs libtracefs) SRC := $(wildcard src/*.c) -- GitLab From 95de4ad173ca0e61034f3145d66917970961c210 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 6 Feb 2024 11:22:09 +0100 Subject: [PATCH 0217/2290] fs: relax mount_setattr() permission checks commit 46f5ab762d048dad224436978315cbc2fa79c630 upstream. When we added mount_setattr() I added additional checks compared to the legacy do_reconfigure_mnt() and do_change_type() helpers used by regular mount(2). If that mount had a parent then verify that the caller and the mount namespace the mount is attached to match and if not make sure that it's an anonymous mount. The real rootfs falls into neither category. It is neither an anoymous mount because it is obviously attached to the initial mount namespace but it also obviously doesn't have a parent mount. So that means legacy mount(2) allows changing mount properties on the real rootfs but mount_setattr(2) blocks this. I never thought much about this but of course someone on this planet of earth changes properties on the real rootfs as can be seen in [1]. Since util-linux finally switched to the new mount api in 2.39 not so long ago it also relies on mount_setattr() and that surfaced this issue when Fedora 39 finally switched to it. Fix this. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2256843 Link: https://lore.kernel.org/r/20240206-vfs-mount-rootfs-v1-1-19b335eee133@kernel.org Reviewed-by: Jan Kara Reported-by: Karel Zak Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 29a8d90dd1072..1533550f73567 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4172,10 +4172,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) /* * If this is an attached mount make sure it's located in the callers * mount namespace. If it's not don't let the caller interact with it. - * If this is a detached mount make sure it has an anonymous mount - * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE. + * + * If this mount doesn't have a parent it's most often simply a + * detached mount with an anonymous mount namespace. IOW, something + * that's simply not attached yet. But there are apparently also users + * that do change mount properties on the rootfs itself. That obviously + * neither has a parent nor is it a detached mount so we cannot + * unconditionally check for detached mounts. */ - if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns))) + if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt)) goto out; /* -- GitLab From 058fbaf7716b8b16c92566d5e373fb8f28ba0e89 Mon Sep 17 00:00:00 2001 From: Sinthu Raja Date: Tue, 6 Feb 2024 06:29:28 +0530 Subject: [PATCH 0218/2290] net: ethernet: ti: cpsw: enable mac_managed_pm to fix mdio commit bc4ce46b1e3d1da4309405cd4afc7c0fcddd0b90 upstream. The below commit introduced a WARN when phy state is not in the states: PHY_HALTED, PHY_READY and PHY_UP. commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") When cpsw resumes, there have port in PHY_NOLINK state, so the below warning comes out. Set mac_managed_pm be true to tell mdio that the phy resume/suspend is managed by the mac, to fix the following warning: WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144 CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1 Hardware name: Generic AM33XX (Flattened Device Tree) unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x24/0x2c dump_stack_lvl from __warn+0x84/0x15c __warn from warn_slowpath_fmt+0x1a8/0x1c8 warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144 mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140 dpm_run_callback from device_resume+0xb8/0x2b8 device_resume from dpm_resume+0x144/0x314 dpm_resume from dpm_resume_end+0x14/0x20 dpm_resume_end from suspend_devices_and_enter+0xd0/0x924 suspend_devices_and_enter from pm_suspend+0x2e0/0x33c pm_suspend from state_store+0x74/0xd0 state_store from kernfs_fop_write_iter+0x104/0x1ec kernfs_fop_write_iter from vfs_write+0x1b8/0x358 vfs_write from ksys_write+0x78/0xf8 ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe094dfa8 to 0xe094dff0) dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001 dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000 dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66 Cc: # v6.0+ Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") Signed-off-by: Sinthu Raja Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 13c9c2d6b79bb..d95771ca4e5a3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } } + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); -- GitLab From 5140c4d5f4fd978b0ad8a2471df1ceb03324922a Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 6 Feb 2024 09:58:49 +0100 Subject: [PATCH 0219/2290] s390/qeth: Fix potential loss of L3-IP@ in case of network issues commit 2fe8a236436fe40d8d26a1af8d150fc80f04ee1a upstream. Symptom: In case of a bad cable connection (e.g. dirty optics) a fast sequence of network DOWN-UP-DOWN-UP could happen. UP triggers recovery of the qeth interface. In case of a second DOWN while recovery is still ongoing, it can happen that the IP@ of a Layer3 qeth interface is lost and will not be recovered by the second UP. Problem: When registration of IP addresses with Layer 3 qeth devices fails, (e.g. because of bad address format) the respective IP address is deleted from its hash-table in the driver. If registration fails because of a ENETDOWN condition, the address should stay in the hashtable, so a subsequent recovery can restore it. 3caa4af834df ("qeth: keep ip-address after LAN_OFFLINE failure") fixes this for registration failures during normal operation, but not during recovery. Solution: Keep L3-IP address in case of ENETDOWN in qeth_l3_recover_ip(). For consistency with qeth_l3_add_ip() we also keep it in case of EADDRINUSE, i.e. for some reason the card already/still has this address registered. Fixes: 4a71df50047f ("qeth: new qeth device driver") Cc: stable@vger.kernel.org Signed-off-by: Alexandra Winter Link: https://lore.kernel.org/r/20240206085849.2902775-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l3_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c0f30cefec102..a416011391856 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -254,9 +254,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover) if (!recover) { hash_del(&addr->hnode); kfree(addr); - continue; + } else { + /* prepare for recovery */ + addr->disp_flag = QETH_DISP_ADDR_ADD; } - addr->disp_flag = QETH_DISP_ADDR_ADD; } mutex_unlock(&card->ip_lock); @@ -277,9 +278,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (addr->disp_flag == QETH_DISP_ADDR_ADD) { rc = qeth_l3_register_addr_entry(card, addr); - if (!rc) { + if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { + /* keep it in the records */ addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; } else { + /* bad address */ hash_del(&addr->hnode); kfree(addr); } -- GitLab From 4888754f3dd04154ef85a5535be935161f219315 Mon Sep 17 00:00:00 2001 From: Sinthu Raja Date: Tue, 6 Feb 2024 06:29:27 +0530 Subject: [PATCH 0220/2290] net: ethernet: ti: cpsw_new: enable mac_managed_pm to fix mdio commit 9def04e759caa5a3d741891037ae99f81e2fff01 upstream. The below commit introduced a WARN when phy state is not in the states: PHY_HALTED, PHY_READY and PHY_UP. commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") When cpsw_new resumes, there have port in PHY_NOLINK state, so the below warning comes out. Set mac_managed_pm be true to tell mdio that the phy resume/suspend is managed by the mac, to fix the following warning: WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144 CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1 Hardware name: Generic AM33XX (Flattened Device Tree) unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x24/0x2c dump_stack_lvl from __warn+0x84/0x15c __warn from warn_slowpath_fmt+0x1a8/0x1c8 warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144 mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140 dpm_run_callback from device_resume+0xb8/0x2b8 device_resume from dpm_resume+0x144/0x314 dpm_resume from dpm_resume_end+0x14/0x20 dpm_resume_end from suspend_devices_and_enter+0xd0/0x924 suspend_devices_and_enter from pm_suspend+0x2e0/0x33c pm_suspend from state_store+0x74/0xd0 state_store from kernfs_fop_write_iter+0x104/0x1ec kernfs_fop_write_iter from vfs_write+0x1b8/0x358 vfs_write from ksys_write+0x78/0xf8 ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe094dfa8 to 0xe094dff0) dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001 dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000 dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66 Cc: # v6.0+ Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") Signed-off-by: Sinthu Raja Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw_new.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 83596ec0c7cb9..6e70aa1cc7bf1 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -772,6 +772,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->slave_num); return; } + + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); -- GitLab From 309ef7de5d840e17607e7d65cbf297c0564433ef Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Thu, 1 Feb 2024 20:40:38 -0800 Subject: [PATCH 0221/2290] hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed commit 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 upstream. If hv_netvsc driver is unloaded and reloaded, the NET_DEVICE_REGISTER handler cannot perform VF register successfully as the register call is received before netvsc_probe is finished. This is because we register register_netdevice_notifier() very early( even before vmbus_driver_register()). To fix this, we try to register each such matching VF( if it is visible as a netdevice) at the end of netvsc_probe. Cc: stable@vger.kernel.org Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register") Suggested-by: Dexuan Cui Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Dexuan Cui Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 82 +++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 20 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c1aac6ceb29e6..1b74055399840 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -42,6 +42,10 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) +/* Macros to define the context of vf registration */ +#define VF_REG_IN_PROBE 1 +#define VF_REG_IN_NOTIFIER 2 + static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); @@ -2181,7 +2185,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) } static int netvsc_vf_join(struct net_device *vf_netdev, - struct net_device *ndev) + struct net_device *ndev, int context) { struct net_device_context *ndev_ctx = netdev_priv(ndev); int ret; @@ -2204,7 +2208,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + /* If this registration is called from probe context vf_takeover + * is taken care of later in probe itself. + */ + if (context == VF_REG_IN_NOTIFIER) + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2342,7 +2350,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) return NOTIFY_DONE; } -static int netvsc_register_vf(struct net_device *vf_netdev) +static int netvsc_register_vf(struct net_device *vf_netdev, int context) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; @@ -2382,7 +2390,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - if (netvsc_vf_join(vf_netdev, ndev) != 0) + if (netvsc_vf_join(vf_netdev, ndev, context) != 0) return NOTIFY_DONE; dev_hold(vf_netdev); @@ -2480,10 +2488,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static int check_dev_is_matching_vf(struct net_device *event_ndev) +{ + /* Skip NetVSC interfaces */ + if (event_ndev->netdev_ops == &device_ops) + return -ENODEV; + + /* Avoid non-Ethernet type devices */ + if (event_ndev->type != ARPHRD_ETHER) + return -ENODEV; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_ndev)) + return -ENODEV; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (netif_is_bond_master(event_ndev)) + return -ENODEV; + + return 0; +} + static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - struct net_device *net = NULL; + struct net_device *net = NULL, *vf_netdev; struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info = NULL; struct netvsc_device *nvdev; @@ -2592,6 +2621,30 @@ static int netvsc_probe(struct hv_device *dev, } list_add(&net_device_ctx->list, &netvsc_dev_list); + + /* When the hv_netvsc driver is unloaded and reloaded, the + * NET_DEVICE_REGISTER for the vf device is replayed before probe + * is complete. This is because register_netdevice_notifier() gets + * registered before vmbus_driver_register() so that callback func + * is set before probe and we don't miss events like NETDEV_POST_INIT + * So, in this section we try to register the matching vf device that + * is present as a netdevice, knowing that its register call is not + * processed in the netvsc_netdev_notifier(as probing is progress and + * get_netvsc_byslot fails). + */ + for_each_netdev(dev_net(net), vf_netdev) { + ret = check_dev_is_matching_vf(vf_netdev); + if (ret != 0) + continue; + + if (net != get_netvsc_byslot(vf_netdev)) + continue; + + netvsc_prepare_bonding(vf_netdev); + netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); + __netvsc_vf_setup(net, vf_netdev); + break; + } rtnl_unlock(); netvsc_devinfo_put(device_info); @@ -2748,28 +2801,17 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + int ret = 0; - /* Skip our own events */ - if (event_dev->netdev_ops == &device_ops) - return NOTIFY_DONE; - - /* Avoid non-Ethernet type devices */ - if (event_dev->type != ARPHRD_ETHER) - return NOTIFY_DONE; - - /* Avoid Vlan dev with same MAC registering as VF */ - if (is_vlan_dev(event_dev)) - return NOTIFY_DONE; - - /* Avoid Bonding master dev with same MAC registering as VF */ - if (netif_is_bond_master(event_dev)) + ret = check_dev_is_matching_vf(event_dev); + if (ret != 0) return NOTIFY_DONE; switch (event) { case NETDEV_POST_INIT: return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: - return netvsc_register_vf(event_dev); + return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); case NETDEV_UNREGISTER: return netvsc_unregister_vf(event_dev); case NETDEV_UP: -- GitLab From f3f98d7d84b31828004545e29fd7262b9f444139 Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Thu, 1 Feb 2024 17:07:16 +0530 Subject: [PATCH 0222/2290] ceph: prevent use-after-free in encode_cap_msg() commit cda4672da1c26835dcbd7aec2bfed954eda9b5ef upstream. In fs/ceph/caps.c, in encode_cap_msg(), "use after free" error was caught by KASAN at this line - 'ceph_buffer_get(arg->xattr_buf);'. This implies before the refcount could be increment here, it was freed. In same file, in "handle_cap_grant()" refcount is decremented by this line - 'ceph_buffer_put(ci->i_xattrs.blob);'. It appears that a race occurred and resource was freed by the latter line before the former line could increment it. encode_cap_msg() is called by __send_cap() and __send_cap() is called by ceph_check_caps() after calling __prep_cap(). __prep_cap() is where arg->xattr_buf is assigned to ci->i_xattrs.blob. This is the spot where the refcount must be increased to prevent "use after free" error. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/59259 Signed-off-by: Rishabh Dave Reviewed-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 111938a6307e6..57603782e7e2a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1391,7 +1391,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, if (flushing & CEPH_CAP_XATTR_EXCL) { arg->old_xattr_buf = __ceph_build_xattrs_blob(ci); arg->xattr_version = ci->i_xattrs.version; - arg->xattr_buf = ci->i_xattrs.blob; + arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob); } else { arg->xattr_buf = NULL; arg->old_xattr_buf = NULL; @@ -1457,6 +1457,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) encode_cap_msg(msg, arg); ceph_con_send(&arg->session->s_con, msg); ceph_buffer_put(arg->old_xattr_buf); + ceph_buffer_put(arg->xattr_buf); if (arg->wake) wake_up_all(&ci->i_cap_wq); } -- GitLab From 2e2c07104b4904aed1389a59b25799b95a85b5b9 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 30 Jan 2024 22:04:18 +0100 Subject: [PATCH 0223/2290] fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super commit 79d72c68c58784a3e1cd2378669d51bfd0cb7498 upstream. When configuring a hugetlb filesystem via the fsconfig() syscall, there is a possible NULL dereference in hugetlbfs_fill_super() caused by assigning NULL to ctx->hstate in hugetlbfs_parse_param() when the requested pagesize is non valid. E.g: Taking the following steps: fd = fsopen("hugetlbfs", FSOPEN_CLOEXEC); fsconfig(fd, FSCONFIG_SET_STRING, "pagesize", "1024", 0); fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); Given that the requested "pagesize" is invalid, ctxt->hstate will be replaced with NULL, losing its previous value, and we will print an error: ... ... case Opt_pagesize: ps = memparse(param->string, &rest); ctx->hstate = h; if (!ctx->hstate) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } return 0; ... ... This is a problem because later on, we will dereference ctxt->hstate in hugetlbfs_fill_super() ... ... sb->s_blocksize = huge_page_size(ctx->hstate); ... ... Causing below Oops. Fix this by replacing cxt->hstate value only when then pagesize is known to be valid. kernel: hugetlbfs: Unsupported page size 0 MB kernel: BUG: kernel NULL pointer dereference, address: 0000000000000028 kernel: #PF: supervisor read access in kernel mode kernel: #PF: error_code(0x0000) - not-present page kernel: PGD 800000010f66c067 P4D 800000010f66c067 PUD 1b22f8067 PMD 0 kernel: Oops: 0000 [#1] PREEMPT SMP PTI kernel: CPU: 4 PID: 5659 Comm: syscall Tainted: G E 6.8.0-rc2-default+ #22 5a47c3fef76212addcc6eb71344aabc35190ae8f kernel: Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017 kernel: RIP: 0010:hugetlbfs_fill_super+0xb4/0x1a0 kernel: Code: 48 8b 3b e8 3e c6 ed ff 48 85 c0 48 89 45 20 0f 84 d6 00 00 00 48 b8 ff ff ff ff ff ff ff 7f 4c 89 e7 49 89 44 24 20 48 8b 03 <8b> 48 28 b8 00 10 00 00 48 d3 e0 49 89 44 24 18 48 8b 03 8b 40 28 kernel: RSP: 0018:ffffbe9960fcbd48 EFLAGS: 00010246 kernel: RAX: 0000000000000000 RBX: ffff9af5272ae780 RCX: 0000000000372004 kernel: RDX: ffffffffffffffff RSI: ffffffffffffffff RDI: ffff9af555e9b000 kernel: RBP: ffff9af52ee66b00 R08: 0000000000000040 R09: 0000000000370004 kernel: R10: ffffbe9960fcbd48 R11: 0000000000000040 R12: ffff9af555e9b000 kernel: R13: ffffffffa66b86c0 R14: ffff9af507d2f400 R15: ffff9af507d2f400 kernel: FS: 00007ffbc0ba4740(0000) GS:ffff9b0bd7000000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000028 CR3: 00000001b1ee0000 CR4: 00000000001506f0 kernel: Call Trace: kernel: kernel: ? __die_body+0x1a/0x60 kernel: ? page_fault_oops+0x16f/0x4a0 kernel: ? search_bpf_extables+0x65/0x70 kernel: ? fixup_exception+0x22/0x310 kernel: ? exc_page_fault+0x69/0x150 kernel: ? asm_exc_page_fault+0x22/0x30 kernel: ? __pfx_hugetlbfs_fill_super+0x10/0x10 kernel: ? hugetlbfs_fill_super+0xb4/0x1a0 kernel: ? hugetlbfs_fill_super+0x28/0x1a0 kernel: ? __pfx_hugetlbfs_fill_super+0x10/0x10 kernel: vfs_get_super+0x40/0xa0 kernel: ? __pfx_bpf_lsm_capable+0x10/0x10 kernel: vfs_get_tree+0x25/0xd0 kernel: vfs_cmd_create+0x64/0xe0 kernel: __x64_sys_fsconfig+0x395/0x410 kernel: do_syscall_64+0x80/0x160 kernel: ? syscall_exit_to_user_mode+0x82/0x240 kernel: ? do_syscall_64+0x8d/0x160 kernel: ? syscall_exit_to_user_mode+0x82/0x240 kernel: ? do_syscall_64+0x8d/0x160 kernel: ? exc_page_fault+0x69/0x150 kernel: entry_SYSCALL_64_after_hwframe+0x6e/0x76 kernel: RIP: 0033:0x7ffbc0cb87c9 kernel: Code: 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 97 96 0d 00 f7 d8 64 89 01 48 kernel: RSP: 002b:00007ffc29d2f388 EFLAGS: 00000206 ORIG_RAX: 00000000000001af kernel: RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ffbc0cb87c9 kernel: RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003 kernel: RBP: 00007ffc29d2f3b0 R08: 0000000000000000 R09: 0000000000000000 kernel: R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 kernel: R13: 00007ffc29d2f4c0 R14: 0000000000000000 R15: 0000000000000000 kernel: kernel: Modules linked in: rpcsec_gss_krb5(E) auth_rpcgss(E) nfsv4(E) dns_resolver(E) nfs(E) lockd(E) grace(E) sunrpc(E) netfs(E) af_packet(E) bridge(E) stp(E) llc(E) iscsi_ibft(E) iscsi_boot_sysfs(E) intel_rapl_msr(E) intel_rapl_common(E) iTCO_wdt(E) intel_pmc_bxt(E) sb_edac(E) iTCO_vendor_support(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) rfkill(E) ipmi_ssif(E) kvm(E) acpi_ipmi(E) irqbypass(E) pcspkr(E) igb(E) ipmi_si(E) mei_me(E) i2c_i801(E) joydev(E) intel_pch_thermal(E) i2c_smbus(E) dca(E) lpc_ich(E) mei(E) ipmi_devintf(E) ipmi_msghandler(E) acpi_pad(E) tiny_power_button(E) button(E) fuse(E) efi_pstore(E) configfs(E) ip_tables(E) x_tables(E) ext4(E) mbcache(E) jbd2(E) hid_generic(E) usbhid(E) sd_mod(E) t10_pi(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) polyval_clmulni(E) ahci(E) xhci_pci(E) polyval_generic(E) gf128mul(E) ghash_clmulni_intel(E) sha512_ssse3(E) sha256_ssse3(E) xhci_pci_renesas(E) libahci(E) ehci_pci(E) sha1_ssse3(E) xhci_hcd(E) ehci_hcd(E) libata(E) kernel: mgag200(E) i2c_algo_bit(E) usbcore(E) wmi(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) scsi_common(E) aesni_intel(E) crypto_simd(E) cryptd(E) kernel: Unloaded tainted modules: acpi_cpufreq(E):1 fjes(E):1 kernel: CR2: 0000000000000028 kernel: ---[ end trace 0000000000000000 ]--- kernel: RIP: 0010:hugetlbfs_fill_super+0xb4/0x1a0 kernel: Code: 48 8b 3b e8 3e c6 ed ff 48 85 c0 48 89 45 20 0f 84 d6 00 00 00 48 b8 ff ff ff ff ff ff ff 7f 4c 89 e7 49 89 44 24 20 48 8b 03 <8b> 48 28 b8 00 10 00 00 48 d3 e0 49 89 44 24 18 48 8b 03 8b 40 28 kernel: RSP: 0018:ffffbe9960fcbd48 EFLAGS: 00010246 kernel: RAX: 0000000000000000 RBX: ffff9af5272ae780 RCX: 0000000000372004 kernel: RDX: ffffffffffffffff RSI: ffffffffffffffff RDI: ffff9af555e9b000 kernel: RBP: ffff9af52ee66b00 R08: 0000000000000040 R09: 0000000000370004 kernel: R10: ffffbe9960fcbd48 R11: 0000000000000040 R12: ffff9af555e9b000 kernel: R13: ffffffffa66b86c0 R14: ffff9af507d2f400 R15: ffff9af507d2f400 kernel: FS: 00007ffbc0ba4740(0000) GS:ffff9b0bd7000000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000028 CR3: 00000001b1ee0000 CR4: 00000000001506f0 Link: https://lkml.kernel.org/r/20240130210418.3771-1-osalvador@suse.de Fixes: 32021982a324 ("hugetlbfs: Convert to fs_context") Signed-off-by: Michal Hocko Signed-off-by: Oscar Salvador Acked-by: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/hugetlbfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8eea709e36599..ab0d30af7d9c3 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1350,6 +1350,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par { struct hugetlbfs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; + struct hstate *h; char *rest; unsigned long ps; int opt; @@ -1394,11 +1395,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_pagesize: ps = memparse(param->string, &rest); - ctx->hstate = size_to_hstate(ps); - if (!ctx->hstate) { + h = size_to_hstate(ps); + if (!h) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } + ctx->hstate = h; return 0; case Opt_min_size: -- GitLab From 79081197b4e235fa821302f268400e1c24486022 Mon Sep 17 00:00:00 2001 From: Prakash Sangappa Date: Tue, 23 Jan 2024 12:04:42 -0800 Subject: [PATCH 0224/2290] mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE commit e656c7a9e59607d1672d85ffa9a89031876ffe67 upstream. For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in shmget() call. If SHM_NORESERVE flags is specified then the hugetlb pages are not reserved. However when the shared memory is attached with the shmat() call the hugetlb pages are getting reserved incorrectly for SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug. ------------------------------- Following test shows the issue. $cat shmhtb.c int main() { int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE; int shmid; shmid = shmget(SKEY, SHMSZ, shmflags); if (shmid < 0) { printf("shmat: shmget() failed, %d\n", errno); return 1; } printf("After shmget()\n"); system("cat /proc/meminfo | grep -i hugepages_"); shmat(shmid, NULL, 0); printf("\nAfter shmat()\n"); system("cat /proc/meminfo | grep -i hugepages_"); shmctl(shmid, IPC_RMID, NULL); return 0; } #sysctl -w vm.nr_hugepages=20 #./shmhtb After shmget() HugePages_Total: 20 HugePages_Free: 20 HugePages_Rsvd: 0 HugePages_Surp: 0 After shmat() HugePages_Total: 20 HugePages_Free: 20 HugePages_Rsvd: 5 <-- HugePages_Surp: 0 -------------------------------- Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared memory in the shmat() call. Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangappa@oracle.com Signed-off-by: Prakash Sangappa Acked-by: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/hugetlbfs/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ab0d30af7d9c3..4fe4b3393e71c 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -123,6 +123,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); + vm_flags_t vm_flags; /* * vma address alignment (but not the pgoff alignment) has @@ -164,10 +165,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); ret = -ENOMEM; + + vm_flags = vma->vm_flags; + /* + * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip + * reserving here. Note: only for SHM hugetlbfs file, the inode + * flag S_PRIVATE is set. + */ + if (inode->i_flags & S_PRIVATE) + vm_flags |= VM_NORESERVE; + if (!hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, - vma->vm_flags)) + vm_flags)) goto out; ret = 0; -- GitLab From 08c1948823765a9a11e7782380ad1b6b55fcb595 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 23 Jan 2024 16:14:22 +0100 Subject: [PATCH 0225/2290] of: property: fix typo in io-channels commit 8f7e917907385e112a845d668ae2832f41e64bf5 upstream. The property is io-channels and not io-channel. This was effectively preventing the devlink creation. Fixes: 8e12257dead7 ("of: property: Add device link support for iommus, mboxes and io-channels") Cc: stable@vger.kernel.org Signed-off-by: Nuno Sa Reviewed-by: Saravana Kannan Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240123-iio-backend-v7-1-1bff236b8693@analog.com Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index e1946cc170309..550efd1a58fc6 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1243,7 +1243,7 @@ DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") -DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells") +DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL) DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") -- GitLab From 8a72a4689a8d0d228932985575c76ff5176e1086 Mon Sep 17 00:00:00 2001 From: Maxime Jayat Date: Mon, 6 Nov 2023 19:01:58 +0100 Subject: [PATCH 0226/2290] can: netlink: Fix TDCO calculation using the old data bittiming commit 2aa0a5e65eae27dbd96faca92c84ecbf6f492d42 upstream. The TDCO calculation was done using the currently applied data bittiming, instead of the newly computed data bittiming, which means that the TDCO had an invalid value unless setting the same data bittiming twice. Fixes: d99755f71a80 ("can: netlink: add interface for CAN-FD Transmitter Delay Compensation (TDC)") Signed-off-by: Maxime Jayat Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/all/40579c18-63c0-43a4-8d4c-f3a6c1c0b417@munic.io Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 8efa22d9f214d..053d375eae4f5 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -311,7 +311,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* Neither of TDC parameters nor TDC flags are * provided: do calculation */ - can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming, + can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt, &priv->ctrlmode, priv->ctrlmode_supported); } /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly * turned off. TDC is disabled: do nothing -- GitLab From aedda066d717a0b4335d7e0a00b2e3a61e40afcf Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Fri, 21 Jul 2023 09:22:26 -0700 Subject: [PATCH 0227/2290] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock commit 6cdedc18ba7b9dacc36466e27e3267d201948c8d upstream. The following 3 locks would race against each other, causing the deadlock situation in the Syzbot bug report: - j1939_socks_lock - active_session_list_lock - sk_session_queue_lock A reasonable fix is to change j1939_socks_lock to an rwlock, since in the rare situations where a write lock is required for the linked list that j1939_socks_lock is protecting, the code does not attempt to acquire any more locks. This would break the circular lock dependency, where, for example, the current thread already locks j1939_socks_lock and attempts to acquire sk_session_queue_lock, and at the same time, another thread attempts to acquire j1939_socks_lock while holding sk_session_queue_lock. NOTE: This patch along does not fix the unregister_netdevice bug reported by Syzbot; instead, it solves a deadlock situation to prepare for one or more further patches to actually fix the Syzbot bug, which appears to be a reference counting problem within the j1939 codebase. Reported-by: Signed-off-by: Ziqi Zhao Reviewed-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://lore.kernel.org/all/20230721162226.8639-1-astrajoan@yahoo.com [mkl: remove unrelated newline change] Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/j1939-priv.h | 2 +- net/can/j1939/main.c | 2 +- net/can/j1939/socket.c | 24 ++++++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 16af1a7f80f60..74f15592d1708 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -86,7 +86,7 @@ struct j1939_priv { unsigned int tp_max_packet_size; /* lock for j1939_socks list */ - spinlock_t j1939_socks_lock; + rwlock_t j1939_socks_lock; struct list_head j1939_socks; struct kref rx_kref; diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index ecff1c947d683..a6fb89fa62785 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) return ERR_PTR(-ENOMEM); j1939_tp_init(priv); - spin_lock_init(&priv->j1939_socks_lock); + rwlock_init(&priv->j1939_socks_lock); INIT_LIST_HEAD(&priv->j1939_socks); mutex_lock(&j1939_netdev_lock); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b0be23559243c..a0bf2575febb7 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) jsk->state |= J1939_SOCK_BOUND; j1939_priv_get(priv); - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_add_tail(&jsk->list, &priv->j1939_socks); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); } static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) { - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_del_init(&jsk->list); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); j1939_priv_put(priv); jsk->state &= ~J1939_SOCK_BOUND; @@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) struct j1939_sock *jsk; bool match = false; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { match = j1939_sk_recv_match_one(jsk, skcb); if (match) break; } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); return match; } @@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sock *jsk; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { j1939_sk_recv_one(jsk, skb); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static void j1939_sk_sock_destruct(struct sock *sk) @@ -1080,12 +1080,12 @@ void j1939_sk_errqueue(struct j1939_session *session, } /* spread RX notifications to all sockets subscribed to this session */ - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { if (j1939_sk_recv_match_one(jsk, &session->skcb)) __j1939_sk_errqueue(session, &jsk->sk, type); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); }; void j1939_sk_send_loop_abort(struct sock *sk, int err) @@ -1273,7 +1273,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) struct j1939_sock *jsk; int error_code = ENETDOWN; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { jsk->sk.sk_err = error_code; if (!sock_flag(&jsk->sk, SOCK_DEAD)) @@ -1281,7 +1281,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) j1939_sk_queue_drop_all(priv, jsk, error_code); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, -- GitLab From 4dd684d4bb3cd5454e0bf6e2a1bdfbd5c9c872ed Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 20 Oct 2023 15:38:14 +0200 Subject: [PATCH 0228/2290] can: j1939: Fix UAF in j1939_sk_match_filter during setsockopt(SO_J1939_FILTER) commit efe7cf828039aedb297c1f9920b638fffee6aabc upstream. Lock jsk->sk to prevent UAF when setsockopt(..., SO_J1939_FILTER, ...) modifies jsk->filters while receiving packets. Following trace was seen on affected system: ================================================================== BUG: KASAN: slab-use-after-free in j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] Read of size 4 at addr ffff888012144014 by task j1939/350 CPU: 0 PID: 350 Comm: j1939 Tainted: G W OE 6.5.0-rc5 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: print_report+0xd3/0x620 ? kasan_complete_mode_report_info+0x7d/0x200 ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] kasan_report+0xc2/0x100 ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] __asan_load4+0x84/0xb0 j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] j1939_sk_recv+0x20b/0x320 [can_j1939] ? __kasan_check_write+0x18/0x20 ? __pfx_j1939_sk_recv+0x10/0x10 [can_j1939] ? j1939_simple_recv+0x69/0x280 [can_j1939] ? j1939_ac_recv+0x5e/0x310 [can_j1939] j1939_can_recv+0x43f/0x580 [can_j1939] ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] ? raw_rcv+0x42/0x3c0 [can_raw] ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] can_rcv_filter+0x11f/0x350 [can] can_receive+0x12f/0x190 [can] ? __pfx_can_rcv+0x10/0x10 [can] can_rcv+0xdd/0x130 [can] ? __pfx_can_rcv+0x10/0x10 [can] __netif_receive_skb_one_core+0x13d/0x150 ? __pfx___netif_receive_skb_one_core+0x10/0x10 ? __kasan_check_write+0x18/0x20 ? _raw_spin_lock_irq+0x8c/0xe0 __netif_receive_skb+0x23/0xb0 process_backlog+0x107/0x260 __napi_poll+0x69/0x310 net_rx_action+0x2a1/0x580 ? __pfx_net_rx_action+0x10/0x10 ? __pfx__raw_spin_lock+0x10/0x10 ? handle_irq_event+0x7d/0xa0 __do_softirq+0xf3/0x3f8 do_softirq+0x53/0x80 __local_bh_enable_ip+0x6e/0x70 netif_rx+0x16b/0x180 can_send+0x32b/0x520 [can] ? __pfx_can_send+0x10/0x10 [can] ? __check_object_size+0x299/0x410 raw_sendmsg+0x572/0x6d0 [can_raw] ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] ? apparmor_socket_sendmsg+0x2f/0x40 ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] sock_sendmsg+0xef/0x100 sock_write_iter+0x162/0x220 ? __pfx_sock_write_iter+0x10/0x10 ? __rtnl_unlock+0x47/0x80 ? security_file_permission+0x54/0x320 vfs_write+0x6ba/0x750 ? __pfx_vfs_write+0x10/0x10 ? __fget_light+0x1ca/0x1f0 ? __rcu_read_unlock+0x5b/0x280 ksys_write+0x143/0x170 ? __pfx_ksys_write+0x10/0x10 ? __kasan_check_read+0x15/0x20 ? fpregs_assert_state_consistent+0x62/0x70 __x64_sys_write+0x47/0x60 do_syscall_64+0x60/0x90 ? do_syscall_64+0x6d/0x90 ? irqentry_exit+0x3f/0x50 ? exc_page_fault+0x79/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Allocated by task 348: kasan_save_stack+0x2a/0x50 kasan_set_track+0x29/0x40 kasan_save_alloc_info+0x1f/0x30 __kasan_kmalloc+0xb5/0xc0 __kmalloc_node_track_caller+0x67/0x160 j1939_sk_setsockopt+0x284/0x450 [can_j1939] __sys_setsockopt+0x15c/0x2f0 __x64_sys_setsockopt+0x6b/0x80 do_syscall_64+0x60/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Freed by task 349: kasan_save_stack+0x2a/0x50 kasan_set_track+0x29/0x40 kasan_save_free_info+0x2f/0x50 __kasan_slab_free+0x12e/0x1c0 __kmem_cache_free+0x1b9/0x380 kfree+0x7a/0x120 j1939_sk_setsockopt+0x3b2/0x450 [can_j1939] __sys_setsockopt+0x15c/0x2f0 __x64_sys_setsockopt+0x6b/0x80 do_syscall_64+0x60/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol") Reported-by: Sili Luo Suggested-by: Sili Luo Acked-by: Oleksij Rempel Cc: stable@vger.kernel.org Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/all/20231020133814.383996-1-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/j1939-priv.h | 1 + net/can/j1939/socket.c | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 74f15592d1708..31a93cae5111b 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -301,6 +301,7 @@ struct j1939_sock { int ifindex; struct j1939_addr addr; + spinlock_t filters_lock; struct j1939_filter *filters; int nfilters; pgn_t pgn_rx_filter; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index a0bf2575febb7..58909b36561a6 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk, static bool j1939_sk_match_filter(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { - const struct j1939_filter *f = jsk->filters; - int nfilter = jsk->nfilters; + const struct j1939_filter *f; + int nfilter; + + spin_lock_bh(&jsk->filters_lock); + + f = jsk->filters; + nfilter = jsk->nfilters; if (!nfilter) /* receive all when no filters are assigned */ - return true; + goto filter_match_found; for (; nfilter; ++f, --nfilter) { if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) @@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk, continue; if ((skcb->addr.src_name & f->name_mask) != f->name) continue; - return true; + goto filter_match_found; } + + spin_unlock_bh(&jsk->filters_lock); return false; + +filter_match_found: + spin_unlock_bh(&jsk->filters_lock); + return true; } static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, @@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk) atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); + spin_lock_init(&jsk->filters_lock); /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ sock_set_flag(sk, SOCK_RCU_FREE); @@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, } lock_sock(&jsk->sk); + spin_lock_bh(&jsk->filters_lock); ofilters = jsk->filters; jsk->filters = filters; jsk->nfilters = count; + spin_unlock_bh(&jsk->filters_lock); release_sock(&jsk->sk); kfree(ofilters); return 0; -- GitLab From 9359ff1a4501ea7286aec70b90f88fac66a6aea5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 27 Dec 2023 16:21:24 +0100 Subject: [PATCH 0229/2290] pmdomain: core: Move the unused cleanup to a _sync initcall commit 741ba0134fa7822fcf4e4a0a537a5c4cfd706b20 upstream. The unused clock cleanup uses the _sync initcall to give all users at earlier initcalls time to probe. Do the same to avoid leaving some PDs dangling at "on" (which actually happened on qcom!). Fixes: 2fe71dcdfd10 ("PM / domains: Add late_initcall to disable unused PM domains") Signed-off-by: Konrad Dybcio Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231227-topic-pmdomain_sync_cleanup-v1-1-5f36769d538b@linaro.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 56ceba4698024..d238b47f74c34 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1052,7 +1052,7 @@ static int __init genpd_power_off_unused(void) return 0; } -late_initcall(genpd_power_off_unused); +late_initcall_sync(genpd_power_off_unused); #ifdef CONFIG_PM_SLEEP -- GitLab From c7f9c3e94e6113021870c247826ec41d4d652ce7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jan 2024 16:33:55 +0100 Subject: [PATCH 0230/2290] fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand() commit 60f92acb60a989b14e4b744501a0df0f82ef30a3 upstream. Patch series "fs/proc: do_task_stat: use sig->stats_". do_task_stat() has the same problem as getrusage() had before "getrusage: use sig->stats_lock rather than lock_task_sighand()": a hard lockup. If NR_CPUS threads call lock_task_sighand() at the same time and the process has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. This patch (of 3): thread_group_cputime() does its own locking, we can safely shift thread_group_cputime_adjusted() which does another for_each_thread loop outside of ->siglock protected section. Not only this removes for_each_thread() from the critical section with irqs disabled, this removes another case when stats_lock is taken with siglock held. We want to remove this dependency, then we can change the users of stats_lock to not disable irqs. Link: https://lkml.kernel.org/r/20240123153313.GA21832@redhat.com Link: https://lkml.kernel.org/r/20240123153355.GA21854@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 49283b8103c7e..1b0d78dfd20f9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -501,7 +501,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = utime = stime = 0; + cutime = cstime = 0; cgtime = gtime = 0; if (lock_task_sighand(task, &flags)) { @@ -535,7 +535,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_cputime_adjusted(task, &utime, &stime); gtime += sig->gtime; if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) @@ -551,10 +550,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); - if (!whole) { + + if (whole) { + thread_group_cputime_adjusted(task, &utime, &stime); + } else { + task_cputime_adjusted(task, &utime, &stime); min_flt = task->min_flt; maj_flt = task->maj_flt; - task_cputime_adjusted(task, &utime, &stime); gtime = task_gtime(task); } -- GitLab From 1e4432d463f38d98d40b6f0db2a1d93f2615bb3a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 14 Feb 2024 11:20:46 -0500 Subject: [PATCH 0231/2290] tracing: Inform kmemleak of saved_cmdlines allocation commit 2394ac4145ea91b92271e675a09af2a9ea6840b7 upstream. The allocation of the struct saved_cmdlines_buffer structure changed from: s = kmalloc(sizeof(*s), GFP_KERNEL); s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); to: orig_size = sizeof(*s) + val * TASK_COMM_LEN; order = get_order(orig_size); size = 1 << (order + PAGE_SHIFT); page = alloc_pages(GFP_KERNEL, order); if (!page) return NULL; s = page_address(page); memset(s, 0, sizeof(*s)); s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); Where that s->saved_cmdlines allocation looks to be a dangling allocation to kmemleak. That's because kmemleak only keeps track of kmalloc() allocations. For allocations that use page_alloc() directly, the kmemleak needs to be explicitly informed about it. Add kmemleak_alloc() and kmemleak_free() around the page allocation so that it doesn't give the following false positive: unreferenced object 0xffff8881010c8000 (size 32760): comm "swapper", pid 0, jiffies 4294667296 hex dump (first 32 bytes): ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ backtrace (crc ae6ec1b9): [] kmemleak_alloc+0x45/0x80 [] __kmalloc_large_node+0x10d/0x190 [] __kmalloc+0x3b1/0x4c0 [] allocate_cmdlines_buffer+0x113/0x230 [] tracer_alloc_buffers.isra.0+0x124/0x460 [] early_trace_init+0x14/0xa0 [] start_kernel+0x12e/0x3c0 [] x86_64_start_reservations+0x18/0x30 [] x86_64_start_kernel+0x7b/0x80 [] secondary_startup_64_no_verify+0x15e/0x16b Link: https://lore.kernel.org/linux-trace-kernel/87r0hfnr9r.fsf@kernel.org/ Link: https://lore.kernel.org/linux-trace-kernel/20240214112046.09a322d6@gandalf.local.home Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Catalin Marinas Fixes: 44dc5c41b5b1 ("tracing: Fix wasted memory in saved_cmdlines logic") Reported-by: Kalle Valo Tested-by: Kalle Valo Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fa6c193e22f02..f667d6bdddda5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -2268,6 +2269,7 @@ static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); kfree(s->map_cmdline_to_pid); + kmemleak_free(s); free_pages((unsigned long)s, order); } @@ -2287,6 +2289,7 @@ static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) return NULL; s = page_address(page); + kmemleak_alloc(s, size, 1, GFP_KERNEL); memset(s, 0, sizeof(*s)); /* Round up to actual allocation */ -- GitLab From cf3c8916866ca47f569dc1b92c1c78a6311cb34f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Jun 2023 16:38:47 +0800 Subject: [PATCH 0232/2290] xfrm: Use xfrm_state selector for BEET input commit 842665a9008a53ff13ac22a4e4b8ae2f10e92aca upstream. For BEET the inner address and therefore family is stored in the xfrm_state selector. Use that when decapsulating an input packet instead of incorrectly relying on a non-existent tunnel protocol. Fixes: 5f24f41e8ea6 ("xfrm: Remove inner/outer modes from input path") Reported-by: Steffen Klassert Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index deda4955c0466..c98ec262b45e9 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -331,11 +331,10 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, { switch (x->props.mode) { case XFRM_MODE_BEET: - switch (XFRM_MODE_SKB_CB(skb)->protocol) { - case IPPROTO_IPIP: - case IPPROTO_BEETPH: + switch (x->sel.family) { + case AF_INET: return xfrm4_remove_beet_encap(x, skb); - case IPPROTO_IPV6: + case AF_INET6: return xfrm6_remove_beet_encap(x, skb); } break; -- GitLab From 0a371ed6f2c105951ca723f18dbec6bd95204962 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 4 Jul 2023 08:53:49 +0800 Subject: [PATCH 0233/2290] xfrm: Silence warnings triggerable by bad packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 57010b8ece2821a1fdfdba2197d14a022f3769db upstream. After the elimination of inner modes, a couple of warnings that were previously unreachable can now be triggered by malformed inbound packets. Fix this by: 1. Moving the setting of skb->protocol into the decap functions. 2. Returning -EINVAL when unexpected protocol is seen. Reported-by: Maciej Żenczykowski Fixes: 5f24f41e8ea6 ("xfrm: Remove inner/outer modes from input path") Signed-off-by: Herbert Xu Reviewed-by: Maciej Żenczykowski Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_input.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index c98ec262b45e9..d0320e35accbf 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int optlen = 0; int err = -EINVAL; + skb->protocol = htons(ETH_P_IP); + if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { struct ip_beet_phdr *ph; int phlen; @@ -232,6 +234,8 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; + skb->protocol = htons(ETH_P_IP); + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -267,6 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; + skb->protocol = htons(ETH_P_IPV6); + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -296,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int size = sizeof(struct ipv6hdr); int err; + skb->protocol = htons(ETH_P_IPV6); + err = skb_cow_head(skb, size + skb->mac_len); if (err) goto out; @@ -346,6 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, return xfrm6_remove_tunnel_encap(x, skb); break; } + return -EINVAL; } WARN_ON_ONCE(1); @@ -366,19 +375,6 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; } - switch (XFRM_MODE_SKB_CB(skb)->protocol) { - case IPPROTO_IPIP: - case IPPROTO_BEETPH: - skb->protocol = htons(ETH_P_IP); - break; - case IPPROTO_IPV6: - skb->protocol = htons(ETH_P_IPV6); - break; - default: - WARN_ON_ONCE(1); - break; - } - return xfrm_inner_mode_encap_remove(x, skb); } -- GitLab From 944900fe2736c07288efe2d9394db4d3ca23f2c9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 22 Nov 2023 22:44:47 +0100 Subject: [PATCH 0234/2290] tls: fix NULL deref on tls_sw_splice_eof() with empty record commit 53f2cb491b500897a619ff6abd72f565933760f0 upstream. syzkaller discovered that if tls_sw_splice_eof() is executed as part of sendfile() when the plaintext/ciphertext sk_msg are empty, the send path gets confused because the empty ciphertext buffer does not have enough space for the encryption overhead. This causes tls_push_record() to go on the `split = true` path (which is only supposed to be used when interacting with an attached BPF program), and then get further confused and hit the tls_merge_open_record() path, which then assumes that there must be at least one populated buffer element, leading to a NULL deref. It is possible to have empty plaintext/ciphertext buffers if we previously bailed from tls_sw_sendmsg_locked() via the tls_trim_both_msgs() path. tls_sw_push_pending_record() already handles this case correctly; let's do the same check in tls_sw_splice_eof(). Fixes: df720d288dbb ("tls/sw: Use splice_eof() to flush") Cc: stable@vger.kernel.org Reported-by: syzbot+40d43509a099ea756317@syzkaller.appspotmail.com Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20231122214447.675768-1-jannh@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 09d258bb2df75..c8cbdd02a784e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1179,11 +1179,14 @@ void tls_sw_splice_eof(struct socket *sock) lock_sock(sk); retry: + /* same checks as in tls_sw_push_pending_record() */ rec = ctx->open_rec; if (!rec) goto unlock; msg_pl = &rec->msg_plaintext; + if (msg_pl->sg.size == 0) + goto unlock; /* Check the BPF advisor and perform transmission. */ ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, -- GitLab From 84df059d24680ebf93001328eda06d3c60ae2b58 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 22 Jan 2024 12:05:54 +0000 Subject: [PATCH 0235/2290] selftests/mm: ksm_tests should only MADV_HUGEPAGE valid memory [ Upstream commit d021b442cf312664811783e92b3d5e4548e92a53 ] ksm_tests was previously mmapping a region of memory, aligning the returned pointer to a PMD boundary, then setting MADV_HUGEPAGE, but was setting it past the end of the mmapped area due to not taking the pointer alignment into consideration. Fix this behaviour. Up until commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries"), this buggy behavior was (usually) masked because the alignment difference was always less than PMD-size. But since the mentioned commit, `ksm_tests -H -s 100` started failing. Link: https://lkml.kernel.org/r/20240122120554.3108022-1-ryan.roberts@arm.com Fixes: 325254899684 ("selftests: vm: add KSM huge pages merging time test") Signed-off-by: Ryan Roberts Cc: Pedro Demarchi Gomes Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/ksm_tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 0d85be2350fa3..a811659307855 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -470,7 +470,7 @@ static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t m if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); -- GitLab From 9a163479ddc45e42a1c20cb48a82d7dc11644c6f Mon Sep 17 00:00:00 2001 From: Audra Mitchell Date: Fri, 19 Jan 2024 15:58:01 -0500 Subject: [PATCH 0236/2290] selftests/mm: Update va_high_addr_switch.sh to check CPU for la57 flag [ Upstream commit 52e63d67b5bb423b33d7a262ac7f8bd375a90145 ] In order for the page table level 5 to be in use, the CPU must have the setting enabled in addition to the CONFIG option. Check for the flag to be set to avoid false test failures on systems that do not have this cpu flag set. The test does a series of mmap calls including three using the MAP_FIXED flag and specifying an address that is 1<<47 or 1<<48. These addresses are only available if you are using level 5 page tables, which requires both the CPU to have the capabiltiy (la57 flag) and the kernel to be configured. Currently the test only checks for the kernel configuration option, so this test can still report a false positive. Here are the three failing lines: $ ./va_high_addr_switch | grep FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(HIGH_ADDR, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED I thought (for about a second) refactoring the test so that these three mmap calls will only be run on systems with the level 5 page tables available, but the whole point of the test is to check the level 5 feature... Link: https://lkml.kernel.org/r/20240119205801.62769-1-audra@redhat.com Fixes: 4f2930c6718a ("selftests/vm: only run 128TBswitch with 5-level paging") Signed-off-by: Audra Mitchell Cc: Rafael Aquini Cc: Shuah Khan Cc: Adam Sindelar Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/va_128TBswitch.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/vm/va_128TBswitch.sh b/tools/testing/selftests/vm/va_128TBswitch.sh index 41580751dc511..231622b3a2327 100755 --- a/tools/testing/selftests/vm/va_128TBswitch.sh +++ b/tools/testing/selftests/vm/va_128TBswitch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } -- GitLab From 4bf19cef220aaff4b026143518916355806006e7 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 8 Nov 2023 10:22:15 -0800 Subject: [PATCH 0237/2290] md: bypass block throttle for superblock update [ Upstream commit d6e035aad6c09991da1c667fb83419329a3baed8 ] commit 5e2cf333b7bd ("md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d") introduced a hung bug and will be reverted in next patch, since the issue that commit is fixing is due to md superblock write is throttled by wbt, to fix it, we can have superblock write bypass block layer throttle. Fixes: 5e2cf333b7bd ("md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d") Cc: stable@vger.kernel.org # v5.19+ Suggested-by: Yu Kuai Signed-off-by: Junxiao Bi Reviewed-by: Logan Gunthorpe Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231108182216.73611-1-junxiao.bi@oracle.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3ccf1920682cb..c7efe15229514 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -963,9 +963,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, return; bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev, - 1, - REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA, - GFP_NOIO, &mddev->sync_set); + 1, + REQ_OP_WRITE | REQ_SYNC | REQ_IDLE | REQ_META + | REQ_PREFLUSH | REQ_FUA, + GFP_NOIO, &mddev->sync_set); atomic_inc(&rdev->nr_pending); -- GitLab From 5447e64acce87bec0f686da582f1c2346555a625 Mon Sep 17 00:00:00 2001 From: Andrejs Cainikovs Date: Fri, 20 Oct 2023 17:30:22 +0200 Subject: [PATCH 0238/2290] ARM: dts: imx6q-apalis: add can power-up delay on ixora board [ Upstream commit b76bbf835d8945080b22b52fc1e6f41cde06865d ] Newer variants of Ixora boards require a power-up delay when powering up the CAN transceiver of up to 1ms. Cc: stable@vger.kernel.org Signed-off-by: Andrejs Cainikovs Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts index f9f7d99bd4db8..76f3e07bc8826 100644 --- a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts +++ b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts @@ -76,6 +76,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can1_power>; regulator-name = "can1_supply"; + startup-delay-us = <1000>; }; reg_can2_supply: regulator-can2-supply { @@ -85,6 +86,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can2_power>; regulator-name = "can2_supply"; + startup-delay-us = <1000>; }; }; -- GitLab From 48b348232070035410d8a58ff5ee18c221785cdd Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 27 Jan 2023 15:02:00 +0100 Subject: [PATCH 0239/2290] wifi: mwifiex: Support SD8978 chipset [ Upstream commit bba047f15851c8b053221f1b276eb7682d59f755 ] The Marvell SD8978 (aka NXP IW416) uses identical registers as SD8987, so reuse the existing mwifiex_reg_sd8987 definition. Note that mwifiex_reg_sd8977 and mwifiex_reg_sd8997 are likewise identical, save for the fw_dump_ctrl register: They define it as 0xf0 whereas mwifiex_reg_sd8987 defines it as 0xf9. I've verified that 0xf9 is the correct value on SD8978. NXP's out-of-tree driver uses 0xf9 for all of them, so there's a chance that 0xf0 is not correct in the mwifiex_reg_sd8977 and mwifiex_reg_sd8997 definitions. I cannot test that for lack of hardware, hence am leaving it as is. NXP has only released a firmware which runs Bluetooth over UART. Perhaps Bluetooth over SDIO is unsupported by this chipset. Consequently, only an "sdiouart" firmware image is referenced, not an alternative "sdsd" image. Signed-off-by: Lukas Wunner Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/536b4f17a72ca460ad1b07045757043fb0778988.1674827105.git.lukas@wunner.de Stable-dep-of: 1c5d463c0770 ("wifi: mwifiex: add extra delay for firmware ready") Signed-off-by: Sasha Levin --- .../bindings/net/wireless/marvell-8xxx.txt | 4 ++- drivers/net/wireless/marvell/mwifiex/Kconfig | 5 ++-- drivers/net/wireless/marvell/mwifiex/sdio.c | 25 +++++++++++++++++-- drivers/net/wireless/marvell/mwifiex/sdio.h | 1 + include/linux/mmc/sdio_ids.h | 1 + 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt b/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt index 9bf9bbac16e25..cdc303caf5f45 100644 --- a/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt +++ b/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt @@ -1,4 +1,4 @@ -Marvell 8787/8897/8997 (sd8787/sd8897/sd8997/pcie8997) SDIO/PCIE devices +Marvell 8787/8897/8978/8997 (sd8787/sd8897/sd8978/sd8997/pcie8997) SDIO/PCIE devices ------ This node provides properties for controlling the Marvell SDIO/PCIE wireless device. @@ -10,7 +10,9 @@ Required properties: - compatible : should be one of the following: * "marvell,sd8787" * "marvell,sd8897" + * "marvell,sd8978" * "marvell,sd8997" + * "nxp,iw416" * "pci11ab,2b42" * "pci1b4b,2b42" diff --git a/drivers/net/wireless/marvell/mwifiex/Kconfig b/drivers/net/wireless/marvell/mwifiex/Kconfig index 2b4ff2b78a7e1..b182f7155d66f 100644 --- a/drivers/net/wireless/marvell/mwifiex/Kconfig +++ b/drivers/net/wireless/marvell/mwifiex/Kconfig @@ -10,13 +10,14 @@ config MWIFIEX mwifiex. config MWIFIEX_SDIO - tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8987/SD8997" + tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8978/SD8987/SD8997" depends on MWIFIEX && MMC select FW_LOADER select WANT_DEV_COREDUMP help This adds support for wireless adapters based on Marvell - 8786/8787/8797/8887/8897/8977/8987/8997 chipsets with SDIO interface. + 8786/8787/8797/8887/8897/8977/8978/8987/8997 chipsets with + SDIO interface. SD8978 is also known as NXP IW416. If you choose to build it as a module, it will be called mwifiex_sdio. diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index ea1c1c2412e72..a24bd40dd41ab 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -263,7 +263,7 @@ static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8887 = { 0x68, 0x69, 0x6a}, }; -static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8987 = { +static const struct mwifiex_sdio_card_reg mwifiex_reg_sd89xx = { .start_rd_port = 0, .start_wr_port = 0, .base_0_reg = 0xF8, @@ -394,6 +394,22 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { .can_ext_scan = true, }; +static const struct mwifiex_sdio_device mwifiex_sdio_sd8978 = { + .firmware_sdiouart = SD8978_SDIOUART_FW_NAME, + .reg = &mwifiex_reg_sd89xx, + .max_ports = 32, + .mp_agg_pkt_limit = 16, + .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K, + .mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .supports_sdio_new_mode = true, + .has_control_mask = false, + .can_dump_fw = true, + .fw_dump_enh = true, + .can_auto_tdls = false, + .can_ext_scan = true, +}; + static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { .firmware = SD8997_DEFAULT_FW_NAME, .firmware_sdiouart = SD8997_SDIOUART_FW_NAME, @@ -428,7 +444,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { .firmware = SD8987_DEFAULT_FW_NAME, - .reg = &mwifiex_reg_sd8987, + .reg = &mwifiex_reg_sd89xx, .max_ports = 32, .mp_agg_pkt_limit = 16, .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_2K, @@ -482,7 +498,9 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = { static const struct of_device_id mwifiex_sdio_of_match_table[] __maybe_unused = { { .compatible = "marvell,sd8787" }, { .compatible = "marvell,sd8897" }, + { .compatible = "marvell,sd8978" }, { .compatible = "marvell,sd8997" }, + { .compatible = "nxp,iw416" }, { } }; @@ -920,6 +938,8 @@ static const struct sdio_device_id mwifiex_ids[] = { .driver_data = (unsigned long)&mwifiex_sdio_sd8801}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8977_WLAN), .driver_data = (unsigned long)&mwifiex_sdio_sd8977}, + {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8978_WLAN), + .driver_data = (unsigned long)&mwifiex_sdio_sd8978}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8987_WLAN), .driver_data = (unsigned long)&mwifiex_sdio_sd8987}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8997_WLAN), @@ -3164,6 +3184,7 @@ MODULE_FIRMWARE(SD8797_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8897_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8887_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8977_DEFAULT_FW_NAME); +MODULE_FIRMWARE(SD8978_SDIOUART_FW_NAME); MODULE_FIRMWARE(SD8987_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_SDIOUART_FW_NAME); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.h b/drivers/net/wireless/marvell/mwifiex/sdio.h index 3a24bb48b2996..ae94c172310ff 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.h +++ b/drivers/net/wireless/marvell/mwifiex/sdio.h @@ -25,6 +25,7 @@ #define SD8887_DEFAULT_FW_NAME "mrvl/sd8887_uapsta.bin" #define SD8801_DEFAULT_FW_NAME "mrvl/sd8801_uapsta.bin" #define SD8977_DEFAULT_FW_NAME "mrvl/sdsd8977_combo_v2.bin" +#define SD8978_SDIOUART_FW_NAME "mrvl/sdiouartiw416_combo_v0.bin" #define SD8987_DEFAULT_FW_NAME "mrvl/sd8987_uapsta.bin" #define SD8997_DEFAULT_FW_NAME "mrvl/sdsd8997_combo_v4.bin" #define SD8997_SDIOUART_FW_NAME "mrvl/sdiouart8997_combo_v4.bin" diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 74f9d9a6d3307..0e4ef9c5127ad 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -102,6 +102,7 @@ #define SDIO_DEVICE_ID_MARVELL_8977_BT 0x9146 #define SDIO_DEVICE_ID_MARVELL_8987_WLAN 0x9149 #define SDIO_DEVICE_ID_MARVELL_8987_BT 0x914a +#define SDIO_DEVICE_ID_MARVELL_8978_WLAN 0x9159 #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 -- GitLab From 1b7b597a69bba6b0dec27845e5935e090b7c084c Mon Sep 17 00:00:00 2001 From: David Lin Date: Sat, 9 Dec 2023 07:40:29 +0800 Subject: [PATCH 0240/2290] wifi: mwifiex: add extra delay for firmware ready [ Upstream commit 1c5d463c0770c6fa2037511a24fb17966fd07d97 ] For SDIO IW416, due to a bug, FW may return ready before complete full initialization. Command timeout may occur at driver load after reboot. Workaround by adding 100ms delay at checking FW status. Signed-off-by: David Lin Cc: stable@vger.kernel.org Reviewed-by: Francesco Dolcini Acked-by: Brian Norris Tested-by: Marcel Ziswiler # Verdin AM62 (IW416) Signed-off-by: Kalle Valo Link: https://msgid.link/20231208234029.2197-1-yu-hao.lin@nxp.com Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/sdio.c | 19 +++++++++++++++++++ drivers/net/wireless/marvell/mwifiex/sdio.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index a24bd40dd41ab..e55747b50dbfb 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -331,6 +331,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8786 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = false, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = { @@ -346,6 +347,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = { @@ -361,6 +363,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { @@ -376,6 +379,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { .can_dump_fw = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { @@ -392,6 +396,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8978 = { @@ -408,6 +413,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8978 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = true, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { @@ -425,6 +431,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { @@ -440,6 +447,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { .can_dump_fw = false, .can_auto_tdls = true, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { @@ -456,6 +464,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { .fw_dump_enh = true, .can_auto_tdls = true, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8801 = { @@ -471,6 +480,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8801 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static struct memory_type_mapping generic_mem_type_map[] = { @@ -563,6 +573,7 @@ mwifiex_sdio_probe(struct sdio_func *func, const struct sdio_device_id *id) card->fw_dump_enh = data->fw_dump_enh; card->can_auto_tdls = data->can_auto_tdls; card->can_ext_scan = data->can_ext_scan; + card->fw_ready_extra_delay = data->fw_ready_extra_delay; INIT_WORK(&card->work, mwifiex_sdio_work); } @@ -766,6 +777,7 @@ mwifiex_sdio_read_fw_status(struct mwifiex_adapter *adapter, u16 *dat) static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) { + struct sdio_mmc_card *card = adapter->card; int ret = 0; u16 firmware_stat; u32 tries; @@ -783,6 +795,13 @@ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, ret = -1; } + if (card->fw_ready_extra_delay && + firmware_stat == FIRMWARE_READY_SDIO) + /* firmware might pretend to be ready, when it's not. + * Wait a little bit more as a workaround. + */ + msleep(100); + return ret; } diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.h b/drivers/net/wireless/marvell/mwifiex/sdio.h index ae94c172310ff..a5112cb35cdcd 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.h +++ b/drivers/net/wireless/marvell/mwifiex/sdio.h @@ -258,6 +258,7 @@ struct sdio_mmc_card { bool fw_dump_enh; bool can_auto_tdls; bool can_ext_scan; + bool fw_ready_extra_delay; struct mwifiex_sdio_mpa_tx mpa_tx; struct mwifiex_sdio_mpa_rx mpa_rx; @@ -281,6 +282,7 @@ struct mwifiex_sdio_device { bool fw_dump_enh; bool can_auto_tdls; bool can_ext_scan; + bool fw_ready_extra_delay; }; /* -- GitLab From 475369350157844dde3f9065591c95ec16aa64a9 Mon Sep 17 00:00:00 2001 From: Sjoerd Simons Date: Tue, 28 Nov 2023 22:35:05 +0100 Subject: [PATCH 0241/2290] bus: moxtet: Add spi device table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aaafe88d5500ba18b33be72458439367ef878788 ] The moxtet module fails to auto-load on. Add a SPI id table to allow it to do so. Signed-off-by: Sjoerd Simons Cc: Reviewed-by: Marek Behún Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- drivers/bus/moxtet.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 5eb0fe73ddc45..79fc96c8d8364 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -830,6 +830,12 @@ static void moxtet_remove(struct spi_device *spi) mutex_destroy(&moxtet->lock); } +static const struct spi_device_id moxtet_spi_ids[] = { + { "moxtet" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, moxtet_spi_ids); + static const struct of_device_id moxtet_dt_ids[] = { { .compatible = "cznic,moxtet" }, {}, @@ -841,6 +847,7 @@ static struct spi_driver moxtet_spi_driver = { .name = "moxtet", .of_match_table = moxtet_dt_ids, }, + .id_table = moxtet_spi_ids, .probe = moxtet_probe, .remove = moxtet_remove, }; -- GitLab From 9c84d580de3c6f816ab43373e21e421e5687e52e Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sat, 7 Jan 2023 12:09:57 +0100 Subject: [PATCH 0242/2290] arm64: dts: qcom: msm8916: Enable blsp_dma by default [ Upstream commit 0154d3594af3c198532ac7b4ab70f50fb5207a15 ] Adding the "dmas" to the I2C controllers prevents probing them if blsp_dma is disabled (infinite probe deferral). Avoid this by enabling blsp_dma by default - it's an integral part of the SoC that is almost always used (even if just for UART). Signed-off-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230107110958.5762-2-stephan@gerhold.net Stable-dep-of: 7c45b6ddbcff ("arm64: dts: qcom: msm8916: Make blsp_dma controlled-remotely") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/apq8016-sbc.dts | 4 ---- arch/arm64/boot/dts/qcom/msm8916.dtsi | 1 - 2 files changed, 5 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts index 9d116e1fbe10c..1ac4f8c24e231 100644 --- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts +++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts @@ -169,10 +169,6 @@ }; }; -&blsp_dma { - status = "okay"; -}; - &blsp_i2c2 { /* On Low speed expansion */ status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index bafac2cf7e3d6..f0d097ade84c5 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1522,7 +1522,6 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <0>; - status = "disabled"; }; blsp1_uart1: serial@78af000 { -- GitLab From 2488e0e4bc23327af7fbd42a0a9743ca16b6081a Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 4 Dec 2023 11:21:20 +0100 Subject: [PATCH 0243/2290] arm64: dts: qcom: msm8916: Make blsp_dma controlled-remotely [ Upstream commit 7c45b6ddbcff01f9934d11802010cfeb0879e693 ] The blsp_dma controller is shared between the different subsystems, which is why it is already initialized by the firmware. We should not reinitialize it from Linux to avoid potential other users of the DMA engine to misbehave. In mainline this can be described using the "qcom,controlled-remotely" property. In the downstream/vendor kernel from Qualcomm there is an opposite "qcom,managed-locally" property. This property is *not* set for the qcom,sps-dma@7884000 [1] so adding "qcom,controlled-remotely" upstream matches the behavior of the downstream/vendor kernel. Adding this seems to fix some weird issues with UART where both input/output becomes garbled with certain obscure firmware versions on some devices. [1]: https://git.codelinaro.org/clo/la/kernel/msm-3.10/-/blob/LA.BR.1.2.9.1-02310-8x16.0/arch/arm/boot/dts/qcom/msm8916.dtsi#L1466-1472 Cc: stable@vger.kernel.org # 6.5 Fixes: a0e5fb103150 ("arm64: dts: qcom: Add msm8916 BLSP device nodes") Signed-off-by: Stephan Gerhold Reviewed-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20231204-msm8916-blsp-dma-remote-v1-1-3e49c8838c8d@gerhold.net Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index f0d097ade84c5..987cebbda0571 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1522,6 +1522,7 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <0>; + qcom,controlled-remotely; }; blsp1_uart1: serial@78af000 { -- GitLab From c0e41c8756eff4a1d2101b58e436237cd7e0a31b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:34:01 +0100 Subject: [PATCH 0244/2290] arm64: dts: qcom: sdm845: fix USB SS wakeup [ Upstream commit 971f5d8b0618d09db75184ddd8cca0767514db5d ] The USB SS PHY interrupts need to be provided by the PDC interrupt controller in order to be able to wake the system up from low-power states. Fixes: ca4db2b538a1 ("arm64: dts: qcom: sdm845: Add USB-related nodes") Cc: stable@vger.kernel.org # 4.20 Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231213173403.29544-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 4d5905ef0b411..95c515da9f2e0 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4049,7 +4049,7 @@ assigned-clock-rates = <19200000>, <150000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 6 IRQ_TYPE_LEVEL_HIGH>, <&pdc_intc 8 IRQ_TYPE_EDGE_BOTH>, <&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", @@ -4100,7 +4100,7 @@ assigned-clock-rates = <19200000>, <150000000>; interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 7 IRQ_TYPE_LEVEL_HIGH>, <&pdc_intc 10 IRQ_TYPE_EDGE_BOTH>, <&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", -- GitLab From 0a9e803549111f9fd259f4fb68fba6cd3090c6eb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:34:03 +0100 Subject: [PATCH 0245/2290] arm64: dts: qcom: sm8150: fix USB SS wakeup [ Upstream commit cc4e1da491b84ca05339a19893884cda78f74aef ] The USB SS PHY interrupts need to be provided by the PDC interrupt controller in order to be able to wake the system up from low-power states. Fixes: 0c9dde0d2015 ("arm64: dts: qcom: sm8150: Add secondary USB and PHY nodes") Fixes: b33d2868e8d3 ("arm64: dts: qcom: sm8150: Add USB and PHY device nodes") Cc: stable@vger.kernel.org # 5.10 Cc: Jack Pham Cc: Jonathan Marek Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231213173403.29544-6-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 8efd0e227d780..eb1a9369926d2 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3629,7 +3629,7 @@ assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, <&pdc 8 IRQ_TYPE_EDGE_BOTH>, <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", @@ -3678,7 +3678,7 @@ assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 7 IRQ_TYPE_LEVEL_HIGH>, <&pdc 10 IRQ_TYPE_EDGE_BOTH>, <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", -- GitLab From c6feb7f41728d3a830ed244a730d932aad590db4 Mon Sep 17 00:00:00 2001 From: David Lin Date: Thu, 21 Dec 2023 09:55:11 +0800 Subject: [PATCH 0246/2290] wifi: mwifiex: fix uninitialized firmware_stat [ Upstream commit 3df95e265924ac898c1a38a0c01846dd0bd3b354 ] Variable firmware_stat is possible to be used without initialization. Signed-off-by: David Lin Fixes: 1c5d463c0770 ("wifi: mwifiex: add extra delay for firmware ready") Cc: stable@vger.kernel.org Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202312192236.ZflaWYCw-lkp@intel.com/ Acked-by: Brian Norris Signed-off-by: Kalle Valo Link: https://msgid.link/20231221015511.1032128-1-yu-hao.lin@nxp.com Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index e55747b50dbfb..2c9b70e9a7263 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -779,7 +779,7 @@ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, { struct sdio_mmc_card *card = adapter->card; int ret = 0; - u16 firmware_stat; + u16 firmware_stat = 0; u32 tries; for (tries = 0; tries < poll_num; tries++) { -- GitLab From bb44477d4506e52785693a39f03cdc6a2c5e8598 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 14 Dec 2023 11:08:34 +0800 Subject: [PATCH 0247/2290] crypto: lib/mpi - Fix unexpected pointer access in mpi_ec_init [ Upstream commit ba3c5574203034781ac4231acf117da917efcd2a ] When the mpi_ec_ctx structure is initialized, some fields are not cleared, causing a crash when referencing the field when the structure was released. Initially, this issue was ignored because memory for mpi_ec_ctx is allocated with the __GFP_ZERO flag. For example, this error will be triggered when calculating the Za value for SM2 separately. Fixes: d58bb7e55a8a ("lib/mpi: Introduce ec implementation to MPI library") Cc: stable@vger.kernel.org # v6.5 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- lib/mpi/ec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c index 40f5908e57a4f..e16dca1e23d52 100644 --- a/lib/mpi/ec.c +++ b/lib/mpi/ec.c @@ -584,6 +584,9 @@ void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, ctx->a = mpi_copy(a); ctx->b = mpi_copy(b); + ctx->d = NULL; + ctx->t.two_inv_p = NULL; + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; mpi_ec_get_reset(ctx); -- GitLab From e7d2e87abc6fddec0e661f9fd8c1657db42d674b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 10 Jan 2024 18:29:42 +0900 Subject: [PATCH 0248/2290] block: fix partial zone append completion handling in req_bio_endio() [ Upstream commit 748dc0b65ec2b4b7b3dbd7befcc4a54fdcac7988 ] Partial completions of zone append request is not allowed but if a zone append completion indicates a number of completed bytes different from the original BIO size, only the BIO status is set to error. This leads to bio_advance() not setting the BIO size to 0 and thus to not call bio_endio() at the end of req_bio_endio(). Make sure a partially completed zone append is failed and completed immediately by forcing the completed number of bytes (nbytes) to be equal to the BIO size, thus ensuring that bio_endio() is called. Fixes: 297db731847e ("block: fix req_bio_endio append error handling") Cc: stable@kernel.vger.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240110092942.442334-1-dlemoal@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-mq.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index c07e5eebcbd85..7ed6b9469f979 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -747,11 +747,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. + * For such case, force the completed nbytes to be equal to + * the BIO size so that bio_advance() sets the BIO remaining + * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) + if (bio->bi_iter.bi_size != nbytes) { bio->bi_status = BLK_STS_IOERR; - else + nbytes = bio->bi_iter.bi_size; + } else { bio->bi_iter.bi_sector = rq->__sector; + } } bio_advance(bio, nbytes); -- GitLab From 653bc5e6d9995d7d5f497c665b321875a626161c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 29 Jan 2024 10:57:01 +0100 Subject: [PATCH 0249/2290] netfilter: ipset: fix performance regression in swap operation commit 97f7cf1cd80eeed3b7c808b7c12463295c751001 upstream. The patch "netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test", commit 28628fa9 fixes a race condition. But the synchronize_rcu() added to the swap function unnecessarily slows it down: it can safely be moved to destroy and use call_rcu() instead. Eric Dumazet pointed out that simply calling the destroy functions as rcu callback does not work: sets with timeout use garbage collectors which need cancelling at destroy which can wait. Therefore the destroy functions are split into two: cancelling garbage collectors safely at executing the command received by netlink and moving the remaining part only into the rcu callback. Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") Reported-by: Ale Crismani Reported-by: David Wang <00107082@163.com> Tested-by: David Wang <00107082@163.com> Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/ipset/ip_set.h | 4 +++ net/netfilter/ipset/ip_set_bitmap_gen.h | 14 ++++++++-- net/netfilter/ipset/ip_set_core.c | 37 +++++++++++++++++++------ net/netfilter/ipset/ip_set_hash_gen.h | 15 ++++++++-- net/netfilter/ipset/ip_set_list_set.c | 13 +++++++-- 5 files changed, 65 insertions(+), 18 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 72f5ebc5c97a9..0b217d4ae2a48 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 26ab0e9612d82..9523104a90da4 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -28,6 +28,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d47dfdcb899b0..bddd1131e825a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1184,6 +1184,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1195,8 +1203,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1208,8 +1214,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1223,6 +1231,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1230,6 +1240,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1240,10 +1253,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1396,9 +1415,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2411,8 +2427,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7499192af5866..48fda58b91a0e 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -210,6 +210,7 @@ htable_size(u8 hbits) #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -254,6 +255,7 @@ htable_size(u8 hbits) #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -437,9 +439,6 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); list_for_each_safe(l, lt, &h->ad) { list_del(l); @@ -586,6 +585,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1416,6 +1424,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 5a67f79665742..6bc7019982b05 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void -- GitLab From ebc442c6403d9b3945f9d40e582677231ab23f45 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 4 Feb 2024 16:26:42 +0100 Subject: [PATCH 0250/2290] netfilter: ipset: Missing gc cancellations fixed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 27c5a095e2518975e20a10102908ae8231699879 upstream. The patch fdb8e12cc2cc ("netfilter: ipset: fix performance regression in swap operation") missed to add the calls to gc cancellations at the error path of create operations and at module unload. Also, because the half of the destroy operations now executed by a function registered by call_rcu(), neither NFNL_SUBSYS_IPSET mutex or rcu read lock is held and therefore the checking of them results false warnings. Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation") Reported-by: syzbot+52bbc0ad036f6f0d4a25@syzkaller.appspotmail.com Reported-by: Brad Spengler Reported-by: Стас Ничипорович Tested-by: Brad Spengler Tested-by: Стас Ничипорович Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_core.c | 2 ++ net/netfilter/ipset/ip_set_hash_gen.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bddd1131e825a..f645da82d826e 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1156,6 +1156,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -2380,6 +2381,7 @@ ip_set_net_exit(struct net *net) set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; + set->variant->cancel_gc(set); ip_set_destroy_set(set); } } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 48fda58b91a0e..ef04e556aadb4 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -419,7 +419,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference(hbucket(t, i)); + n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -439,7 +439,7 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + mtype_ahash_destroy(set, (__force struct htable *)h->table, true); list_for_each_safe(l, lt, &h->ad) { list_del(l); kfree(l); -- GitLab From 23027309b099ffc4efca5477009a11dccbdae592 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 20 Jan 2024 15:29:27 +0100 Subject: [PATCH 0251/2290] parisc: Fix random data corruption from exception handler commit 8b1d72395635af45410b66cc4c4ab37a12c4a831 upstream. The current exception handler implementation, which assists when accessing user space memory, may exhibit random data corruption if the compiler decides to use a different register than the specified register %r29 (defined in ASM_EXCEPTIONTABLE_REG) for the error code. If the compiler choose another register, the fault handler will nevertheless store -EFAULT into %r29 and thus trash whatever this register is used for. Looking at the assembly I found that this happens sometimes in emulate_ldd(). To solve the issue, the easiest solution would be if it somehow is possible to tell the fault handler which register is used to hold the error code. Using %0 or %1 in the inline assembly is not posssible as it will show up as e.g. %r29 (with the "%r" prefix), which the GNU assembler can not convert to an integer. This patch takes another, better and more flexible approach: We extend the __ex_table (which is out of the execution path) by one 32-word. In this word we tell the compiler to insert the assembler instruction "or %r0,%r0,%reg", where %reg references the register which the compiler choosed for the error return code. In case of an access failure, the fault handler finds the __ex_table entry and can examine the opcode. The used register is encoded in the lowest 5 bits, and the fault handler can then store -EFAULT into this register. Since we extend the __ex_table to 3 words we can't use the BUILDTIME_TABLE_SORT config option any longer. Signed-off-by: Helge Deller Cc: # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/Kconfig | 1 - arch/parisc/include/asm/assembly.h | 1 + arch/parisc/include/asm/extable.h | 64 +++++++++++++++++++++++++ arch/parisc/include/asm/special_insns.h | 6 ++- arch/parisc/include/asm/uaccess.h | 48 +++---------------- arch/parisc/kernel/unaligned.c | 44 ++++++++--------- arch/parisc/mm/fault.c | 11 +++-- 7 files changed, 106 insertions(+), 69 deletions(-) create mode 100644 arch/parisc/include/asm/extable.h diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 345d5e021484c..abf39ecda6fb1 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -24,7 +24,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_TABLE_SORT select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_KERNEL_BZIP2 diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 74d17d7e759da..5937d5edaba1e 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -576,6 +576,7 @@ .section __ex_table,"aw" ! \ .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ + or %r0,%r0,%r0 ! \ .previous diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h new file mode 100644 index 0000000000000..4ea23e3d79dc9 --- /dev/null +++ b/arch/parisc/include/asm/extable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_EXTABLE_H +#define __PARISC_EXTABLE_H + +#include +#include + +/* + * The exception table consists of three addresses: + * + * - A relative address to the instruction that is allowed to fault. + * - A relative address at which the program should continue (fixup routine) + * - An asm statement which specifies which CPU register will + * receive -EFAULT when an exception happens if the lowest bit in + * the fixup address is set. + * + * Note: The register specified in the err_opcode instruction will be + * modified at runtime if a fault happens. Register %r0 will be ignored. + * + * Since relative addresses are used, 32bit values are sufficient even on + * 64bit kernel. + */ + +struct pt_regs; +int fixup_exception(struct pt_regs *regs); + +#define ARCH_HAS_RELATIVE_EXTABLE +struct exception_table_entry { + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ + int err_opcode; /* sample opcode with register which holds error code */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\ + ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \ + opcode "\n" \ + ".previous\n" + +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT on fault into the register specified by the err_opcode instruction, + * and zeroes the target register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_VAR(__err_var) \ + int __err_var = 0 +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register) + +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->err_opcode = b->err_opcode; + b->err_opcode = tmp.err_opcode; +} +#define swap_ex_entry_fixup swap_ex_entry_fixup + +#endif diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index c822bd0c0e3c6..51f40eaf77806 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -8,7 +8,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ @@ -22,7 +23,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%%sr3,%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 4165079898d9e..88d0ae5769dde 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,6 +7,7 @@ */ #include #include +#include #include #include @@ -26,37 +27,6 @@ #define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif -/* - * The exception table contains two values: the first is the relative offset to - * the address of the instruction that is allowed to fault, and the second is - * the relative offset to the address of the fixup routine. Since relative - * addresses are used, 32bit values are sufficient even on 64bit kernel. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE -struct exception_table_entry { - int insn; /* relative address of insn that is allowed to fault. */ - int fixup; /* relative address of fixup routine */ -}; - -#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ - ".section __ex_table,\"aw\"\n" \ - ".align 4\n" \ - ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ - ".previous\n" - -/* - * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry - * (with lowest bit set) for which the fault handler in fixup_exception() will - * load -EFAULT into %r29 for a read or write fault, and zeroes the target - * register in case of a read fault in get_user(). - */ -#define ASM_EXCEPTIONTABLE_REG 29 -#define ASM_EXCEPTIONTABLE_VAR(__variable) \ - register long __variable __asm__ ("r29") = 0 -#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ - ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) - #define __get_user_internal(sr, val, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__gu_err); \ @@ -83,7 +53,7 @@ struct exception_table_entry { \ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ : "=r"(__gu_val), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -115,8 +85,8 @@ struct exception_table_entry { "1: ldw 0(%%sr%2,%3),%0\n" \ "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -174,7 +144,7 @@ struct exception_table_entry { __asm__ __volatile__ ( \ "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(x), "i"(sr), "r"(ptr)) @@ -186,15 +156,14 @@ struct exception_table_entry { "1: stw %1,0(%%sr%2,%3)\n" \ "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ - /* * Complex access routines -- external declarations */ @@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -struct pt_regs; -int fixup_exception(struct pt_regs *regs); - #endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index e8a4d77cff53a..8a8e7d7224a26 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -118,8 +118,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) "2: ldbs 1(%%sr1,%3), %0\n" " depw %2, 23, 24, %0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1) : "r" (saddr), "r" (regs->isr) ); @@ -150,8 +150,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) : "r" (saddr), "r" (regs->isr) ); @@ -187,8 +187,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) : "r19", "r20" ); @@ -207,9 +207,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " vshd %0,%R0,%0\n" " vshd %R0,%4,%R0\n" "4: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); } @@ -242,8 +242,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg) "1: stb %1, 0(%%sr1, %3)\n" "2: stb %2, 1(%%sr1, %3)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret), "=&r" (temp1) : "r" (val), "r" (regs->ior), "r" (regs->isr) ); @@ -283,8 +283,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -327,10 +327,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" "5: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -356,11 +356,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" "6: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0") : "+r" (ret) : "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index b00aa98b582c2..fbd9ada5e527e 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs) * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant * bit in the relative address of the fixup routine to indicate - * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with - * -EFAULT to report a userspace access error. + * that the register encoded in the "or %r0,%r0,register" + * opcode should be loaded with -EFAULT to report a userspace + * access error. */ if (fix->fixup & 1) { - regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT; + int fault_error_reg = fix->err_opcode & 0x1f; + if (!WARN_ON(!fault_error_reg)) + regs->gr[fault_error_reg] = -EFAULT; + pr_debug("Unalignment fixup of register %d at %pS\n", + fault_error_reg, (void*)regs->iaoq[0]); /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { -- GitLab From e4cf8941664cae2f89f0189c29fe2ce8c6be0d03 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Jan 2024 14:58:16 +1100 Subject: [PATCH 0252/2290] nfsd: fix RELEASE_LOCKOWNER [ Upstream commit edcf9725150e42beeca42d085149f4c88fa97afd ] The test on so_count in nfsd4_release_lockowner() is nonsense and harmful. Revert to using check_for_locks(), changing that to not sleep. First: harmful. As is documented in the kdoc comment for nfsd4_release_lockowner(), the test on so_count can transiently return a false positive resulting in a return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is clearly a protocol violation and with the Linux NFS client it can cause incorrect behaviour. If RELEASE_LOCKOWNER is sent while some other thread is still processing a LOCK request which failed because, at the time that request was received, the given owner held a conflicting lock, then the nfsd thread processing that LOCK request can hold a reference (conflock) to the lock owner that causes nfsd4_release_lockowner() to return an incorrect error. The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so it knows that the error is impossible. It assumes the lock owner was in fact released so it feels free to use the same lock owner identifier in some later locking request. When it does reuse a lock owner identifier for which a previous RELEASE failed, it will naturally use a lock_seqid of zero. However the server, which didn't release the lock owner, will expect a larger lock_seqid and so will respond with NFS4ERR_BAD_SEQID. So clearly it is harmful to allow a false positive, which testing so_count allows. The test is nonsense because ... well... it doesn't mean anything. so_count is the sum of three different counts. 1/ the set of states listed on so_stateids 2/ the set of active vfs locks owned by any of those states 3/ various transient counts such as for conflicting locks. When it is tested against '2' it is clear that one of these is the transient reference obtained by find_lockowner_str_locked(). It is not clear what the other one is expected to be. In practice, the count is often 2 because there is precisely one state on so_stateids. If there were more, this would fail. In my testing I see two circumstances when RELEASE_LOCKOWNER is called. In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in all the lock states being removed, and so the lockowner being discarded (it is removed when there are no more references which usually happens when the lock state is discarded). When nfsd4_release_lockowner() finds that the lock owner doesn't exist, it returns success. The other case shows an so_count of '2' and precisely one state listed in so_stateid. It appears that the Linux client uses a separate lock owner for each file resulting in one lock state per lock owner, so this test on '2' is safe. For another client it might not be safe. So this patch changes check_for_locks() to use the (newish) find_any_file_locked() so that it doesn't take a reference on the nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With this check is it safe to restore the use of check_for_locks() rather than testing so_count against the mysterious '2'. Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org # v6.2+ Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index faecdbfa01a29..0443fe4e29e11 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7736,14 +7736,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; - struct nfsd_file *nf = find_any_file(fp); + struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; + spin_lock(&fp->fi_lock); + nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); - return status; + goto out; } inode = locks_inode(nf->nf_file); @@ -7759,7 +7761,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } spin_unlock(&flctx->flc_lock); } - nfsd_file_put(nf); +out: + spin_unlock(&fp->fi_lock); return status; } @@ -7769,10 +7772,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * - * The lockowner's so_count is bumped when a lock record is added - * or when copying a conflicting lock. The latter case is brief, - * but can lead to fleeting false positives when looking for - * locks-in-use. + * Check if theree are any locks still held and if not - free the lockowner + * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found @@ -7808,10 +7809,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, spin_unlock(&clp->cl_lock); return nfs_ok; } - if (atomic_read(&lo->lo_owner.so_count) != 2) { - spin_unlock(&clp->cl_lock); - nfs4_put_stateowner(&lo->lo_owner); - return nfserr_locks_held; + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + spin_unlock(&clp->cl_lock); + nfs4_put_stateowner(&lo->lo_owner); + return nfserr_locks_held; + } } unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { -- GitLab From 51a8f31b939c21994f43e2d01e1a97719c8685df Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Feb 2024 13:22:39 +1100 Subject: [PATCH 0253/2290] nfsd: don't take fi_lock in nfsd_break_deleg_cb() [ Upstream commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 ] A recent change to check_for_locks() changed it to take ->flc_lock while holding ->fi_lock. This creates a lock inversion (reported by lockdep) because there is a case where ->fi_lock is taken while holding ->flc_lock. ->flc_lock is held across ->fl_lmops callbacks, and nfsd_break_deleg_cb() is one of those and does take ->fi_lock. However it doesn't need to. Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list and so needed the lock. Since then it doesn't walk the list and doesn't need the lock. Two actions are performed under the lock. One is to call nfsd_break_one_deleg which calls nfsd4_run_cb(). These doesn't act on the nfs4_file at all, so don't need the lock. The other is to set ->fi_had_conflict which is in the nfs4_file. This field is only ever set here (except when initialised to false) so there is no possible problem will multiple threads racing when setting it. The field is tested twice in nfs4_set_delegation(). The first test does not hold a lock and is documented as an opportunistic optimisation, so it doesn't impose any need to hold ->fi_lock while setting ->fi_had_conflict. The second test in nfs4_set_delegation() *is* make under ->fi_lock, so removing the locking when ->fi_had_conflict is set could make a change. The change could only be interesting if ->fi_had_conflict tested as false even though nfsd_break_one_deleg() ran before ->fi_lock was unlocked. i.e. while hash_delegation_locked() was running. As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb() there can be no importance to this interaction. So this patch removes the locking from nfsd_break_one_deleg() and moves the final test on ->fi_had_conflict out of the locked region to make it clear that locking isn't important to the test. It is still tested *after* vfs_setlease() has succeeded. This might be significant and as vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called under ->flc_lock this "after" is a true ordering provided by a spinlock. Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0443fe4e29e11..b3f6dda930d8b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4908,10 +4908,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); - spin_unlock(&fp->fi_lock); return false; } @@ -5499,12 +5497,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (status) goto out_unlock; + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); - if (fp->fi_had_conflict) - status = -EAGAIN; - else - status = hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); -- GitLab From e1c1bdaa387976cb577769a58abce5ef3e2bc88d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 23 Jan 2023 09:32:06 -0800 Subject: [PATCH 0254/2290] hrtimer: Ignore slack time for RT tasks in schedule_hrtimeout_range() commit 0c52310f260014d95c1310364379772cb74cf82d upstream. While in theory the timer can be triggered before expires + delta, for the cases of RT tasks they really have no business giving any lenience for extra slack time, so override any passed value by the user and always use zero for schedule_hrtimeout_range() calls. Furthermore, this is similar to what the nanosleep(2) family already does with current->timer_slack_ns. Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230123173206.6764-3-dave@stgolabs.net Signed-off-by: Felix Moessbauer Signed-off-by: Greg Kroah-Hartman --- kernel/time/hrtimer.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8e0aff1d1ea4f..9bb88836c42e6 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2266,7 +2266,7 @@ void __init hrtimers_init(void) /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * @clock_id: timer clock to be used */ @@ -2293,6 +2293,13 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } + /* + * Override any slack passed by the user if under + * rt contraints. + */ + if (rt_task(current)) + delta = 0; + hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); @@ -2312,7 +2319,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * * Make the current task sleep until the given expiry time has @@ -2320,7 +2327,8 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); * the current task state has been set (see set_current_state()). * * The @delta argument gives the kernel the freedom to schedule the - * actual wakeup to a time that is both power and performance friendly. + * actual wakeup to a time that is both power and performance friendly + * for regular (non RT/DL) tasks. * The kernel give the normal best effort behavior for "@expires+@delta", * but may decide to fire the timer earlier, but no earlier than @expires. * -- GitLab From 270325fb3154721f944fb9e1eca8fffc78d65f28 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:42 -0600 Subject: [PATCH 0255/2290] RDMA/irdma: Ensure iWarp QP queue memory is OS paged aligned commit 0a5ec366de7e94192669ba08de6ed336607fd282 upstream. The SQ is shared for between kernel and used by storing the kernel page pointer and passing that to a kmap_atomic(). This then requires that the alignment is PAGE_SIZE aligned. Fix by adding an iWarp specific alignment check. Fixes: e965ef0e7b2c ("RDMA/irdma: Split QP handler into irdma_reg_user_mr_type_qp") Link: https://lore.kernel.org/r/20231129202143.1434-3-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/irdma/verbs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 4859b99d54fc2..01faec6ea5285 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2845,6 +2845,13 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, switch (req.reg_type) { case IRDMA_MEMREG_TYPE_QP: + /* iWarp: Catch page not starting on OS page boundary */ + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && + ib_umem_offset(iwmr->region)) { + err = -EINVAL; + goto error; + } + total = req.sq_pages + req.rq_pages + shadow_pgcnt; if (total > iwmr->page_cnt) { err = -EINVAL; -- GitLab From 1ae3c59355dc9882e09c020afe8ffbd895ad0f29 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:41 -0300 Subject: [PATCH 0256/2290] smb: client: fix potential OOBs in smb2_parse_contexts() commit af1689a9b7701d9907dfc84d2a4b57c4bc907144 upstream. Validate offsets and lengths before dereferencing create contexts in smb2_parse_contexts(). This fixes following oops when accessing invalid create contexts from server: BUG: unable to handle page fault for address: ffff8881178d8cc3 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 4a01067 P4D 4a01067 PUD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 1736 Comm: mount.cifs Not tainted 6.7.0-rc4 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:smb2_parse_contexts+0xa0/0x3a0 [cifs] Code: f8 10 75 13 48 b8 93 ad 25 50 9c b4 11 e7 49 39 06 0f 84 d2 00 00 00 8b 45 00 85 c0 74 61 41 29 c5 48 01 c5 41 83 fd 0f 76 55 <0f> b7 7d 04 0f b7 45 06 4c 8d 74 3d 00 66 83 f8 04 75 bc ba 04 00 RSP: 0018:ffffc900007939e0 EFLAGS: 00010216 RAX: ffffc90000793c78 RBX: ffff8880180cc000 RCX: ffffc90000793c90 RDX: ffffc90000793cc0 RSI: ffff8880178d8cc0 RDI: ffff8880180cc000 RBP: ffff8881178d8cbf R08: ffffc90000793c22 R09: 0000000000000000 R10: ffff8880180cc000 R11: 0000000000000024 R12: 0000000000000000 R13: 0000000000000020 R14: 0000000000000000 R15: ffffc90000793c22 FS: 00007f873753cbc0(0000) GS:ffff88806bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff8881178d8cc3 CR3: 00000000181ca000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x181/0x480 ? search_module_extables+0x19/0x60 ? srso_alias_return_thunk+0x5/0xfbef5 ? exc_page_fault+0x1b6/0x1c0 ? asm_exc_page_fault+0x26/0x30 ? smb2_parse_contexts+0xa0/0x3a0 [cifs] SMB2_open+0x38d/0x5f0 [cifs] ? smb2_is_path_accessible+0x138/0x260 [cifs] smb2_is_path_accessible+0x138/0x260 [cifs] cifs_is_path_remote+0x8d/0x230 [cifs] cifs_mount+0x7e/0x350 [cifs] cifs_smb3_do_mount+0x128/0x780 [cifs] smb3_get_tree+0xd9/0x290 [cifs] vfs_get_tree+0x2c/0x100 ? capable+0x37/0x70 path_mount+0x2d7/0xb80 ? srso_alias_return_thunk+0x5/0xfbef5 ? _raw_spin_unlock_irqrestore+0x44/0x60 __x64_sys_mount+0x11a/0x150 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f8737657b1e Reported-by: Robert Morris Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French [Guru: Modified the patch to be applicable to the cached_dir.c file.] Signed-off-by: Guruswamy Basavaiah Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 8 ++-- fs/smb/client/smb2pdu.c | 93 +++++++++++++++++++++++--------------- fs/smb/client/smb2proto.h | 12 +++-- 3 files changed, 68 insertions(+), 45 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5a132c1e6f6c4..6f4d7aa70e5a2 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -268,10 +268,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) goto oshr_free; - smb2_parse_contexts(server, o_rsp, + rc = smb2_parse_contexts(server, rsp_iov, &oparms.fid->epoch, - oparms.fid->lease_key, &oplock, - NULL, NULL); + oparms.fid->lease_key, + &oplock, NULL, NULL); + if (rc) + goto oshr_free; if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) goto oshr_free; qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index e65f998ea4cfc..d610862ac6a05 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2145,17 +2145,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, posix->nlink, posix->mode, posix->reparse_tag); } -void -smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, __u8 *oplock, - struct smb2_file_all_info *buf, - struct create_posix_rsp *posix) +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix) { - char *data_offset; + struct smb2_create_rsp *rsp = rsp_iov->iov_base; struct create_context *cc; - unsigned int next; - unsigned int remaining; + size_t rem, off, len; + size_t doff, dlen; + size_t noff, nlen; char *name; static const char smb3_create_tag_posix[] = { 0x93, 0xAD, 0x25, 0x50, 0x9C, @@ -2164,45 +2165,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server, }; *oplock = 0; - data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); - remaining = le32_to_cpu(rsp->CreateContextsLength); - cc = (struct create_context *)data_offset; + + off = le32_to_cpu(rsp->CreateContextsOffset); + rem = le32_to_cpu(rsp->CreateContextsLength); + if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len) + return -EINVAL; + cc = (struct create_context *)((u8 *)rsp + off); /* Initialize inode number to 0 in case no valid data in qfid context */ if (buf) buf->IndexNumber = 0; - while (remaining >= sizeof(struct create_context)) { - name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) == 4 && - strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0) - *oplock = server->ops->parse_lease_buf(cc, epoch, - lease_key); - else if (buf && (le16_to_cpu(cc->NameLength) == 4) && - strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) - parse_query_id_ctxt(cc, buf); - else if ((le16_to_cpu(cc->NameLength) == 16)) { - if (posix && - memcmp(name, smb3_create_tag_posix, 16) == 0) + while (rem >= sizeof(*cc)) { + doff = le16_to_cpu(cc->DataOffset); + dlen = le32_to_cpu(cc->DataLength); + if (check_add_overflow(doff, dlen, &len) || len > rem) + return -EINVAL; + + noff = le16_to_cpu(cc->NameOffset); + nlen = le16_to_cpu(cc->NameLength); + if (noff + nlen >= doff) + return -EINVAL; + + name = (char *)cc + noff; + switch (nlen) { + case 4: + if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) { + *oplock = server->ops->parse_lease_buf(cc, epoch, + lease_key); + } else if (buf && + !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) { + parse_query_id_ctxt(cc, buf); + } + break; + case 16: + if (posix && !memcmp(name, smb3_create_tag_posix, 16)) parse_posix_ctxt(cc, buf, posix); + break; + default: + cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n", + __func__, nlen, dlen); + if (IS_ENABLED(CONFIG_CIFS_DEBUG2)) + cifs_dump_mem("context data: ", cc, dlen); + break; } - /* else { - cifs_dbg(FYI, "Context not matched with len %d\n", - le16_to_cpu(cc->NameLength)); - cifs_dump_mem("Cctxt name: ", name, 4); - } */ - - next = le32_to_cpu(cc->Next); - if (!next) + + off = le32_to_cpu(cc->Next); + if (!off) break; - remaining -= next; - cc = (struct create_context *)((char *)cc + next); + if (check_sub_overflow(rem, off, &rem)) + return -EINVAL; + cc = (struct create_context *)((u8 *)cc + off); } if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) *oplock = rsp->OplockLevel; - return; + return 0; } static int @@ -3082,8 +3101,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } - smb2_parse_contexts(server, rsp, &oparms->fid->epoch, - oparms->fid->lease_key, oplock, buf, posix); + rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch, + oparms->fid->lease_key, oplock, buf, posix); creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index be21b5d26f67e..b325fde010adc 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -249,11 +249,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -extern void smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, - __u8 *oplock, struct smb2_file_all_info *buf, - struct create_posix_rsp *posix); +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix); + extern int smb3_encryption_required(const struct cifs_tcon *tcon); extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, struct kvec *iov, unsigned int min_buf_size); -- GitLab From 380aeff204b903502582019ff067caccbd3399b3 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 19 Jan 2024 01:08:26 -0300 Subject: [PATCH 0257/2290] smb: client: fix parsing of SMB3.1.1 POSIX create context commit 76025cc2285d9ede3d717fe4305d66f8be2d9346 upstream. The data offset for the SMB3.1.1 POSIX create context will always be 8-byte aligned so having the check 'noff + nlen >= doff' in smb2_parse_contexts() is wrong as it will lead to -EINVAL because noff + nlen == doff. Fix the sanity check to correctly handle aligned create context data. Fixes: af1689a9b770 ("smb: client: fix potential OOBs in smb2_parse_contexts()") Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Guruswamy Basavaiah Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index d610862ac6a05..c1fc1651d8b69 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2184,7 +2184,7 @@ int smb2_parse_contexts(struct TCP_Server_Info *server, noff = le16_to_cpu(cc->NameOffset); nlen = le16_to_cpu(cc->NameLength); - if (noff + nlen >= doff) + if (noff + nlen > doff) return -EINVAL; name = (char *)cc + noff; -- GitLab From 989b0ff35fe5fc9652ee5bafbe8483db6f27b137 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Dec 2023 16:46:21 +0000 Subject: [PATCH 0258/2290] net: prevent mss overflow in skb_segment() commit 23d05d563b7e7b0314e65c8e882bc27eac2da8e7 upstream. Once again syzbot is able to crash the kernel in skb_segment() [1] GSO_BY_FRAGS is a forbidden value, but unfortunately the following computation in skb_segment() can reach it quite easily : mss = mss * partial_segs; 65535 = 3 * 5 * 17 * 257, so many initial values of mss can lead to a bad final result. Make sure to limit segmentation so that the new mss value is smaller than GSO_BY_FRAGS. [1] general protection fault, probably for non-canonical address 0xdffffc000000000e: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077] CPU: 1 PID: 5079 Comm: syz-executor993 Not tainted 6.7.0-rc4-syzkaller-00141-g1ae4cd3cbdd0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:skb_segment+0x181d/0x3f30 net/core/skbuff.c:4551 Code: 83 e3 02 e9 fb ed ff ff e8 90 68 1c f9 48 8b 84 24 f8 00 00 00 48 8d 78 70 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 8a 21 00 00 48 8b 84 24 f8 00 RSP: 0018:ffffc900043473d0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000010046 RCX: ffffffff886b1597 RDX: 000000000000000e RSI: ffffffff886b2520 RDI: 0000000000000070 RBP: ffffc90004347578 R08: 0000000000000005 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000002 R12: ffff888063202ac0 R13: 0000000000010000 R14: 000000000000ffff R15: 0000000000000046 FS: 0000555556e7e380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 0000000027ee2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: udp6_ufo_fragment+0xa0e/0xd00 net/ipv6/udp_offload.c:109 ipv6_gso_segment+0x534/0x17e0 net/ipv6/ip6_offload.c:120 skb_mac_gso_segment+0x290/0x610 net/core/gso.c:53 __skb_gso_segment+0x339/0x710 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x36c/0xeb0 net/core/dev.c:3626 __dev_queue_xmit+0x6f3/0x3d60 net/core/dev.c:4338 dev_queue_xmit include/linux/netdevice.h:3134 [inline] packet_xmit+0x257/0x380 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3087 [inline] packet_sendmsg+0x24c6/0x5220 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f8692032aa9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 d1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff8d685418 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f8692032aa9 RDX: 0000000000010048 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 00000000000f4240 R08: 0000000020000540 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff8d685480 R13: 0000000000000001 R14: 00007fff8d685480 R15: 0000000000000003 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:skb_segment+0x181d/0x3f30 net/core/skbuff.c:4551 Code: 83 e3 02 e9 fb ed ff ff e8 90 68 1c f9 48 8b 84 24 f8 00 00 00 48 8d 78 70 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 8a 21 00 00 48 8b 84 24 f8 00 RSP: 0018:ffffc900043473d0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000010046 RCX: ffffffff886b1597 RDX: 000000000000000e RSI: ffffffff886b2520 RDI: 0000000000000070 RBP: ffffc90004347578 R08: 0000000000000005 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000002 R12: ffff888063202ac0 R13: 0000000000010000 R14: 000000000000ffff R15: 0000000000000046 FS: 0000555556e7e380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 0000000027ee2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231212164621.4131800-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8a819d0a7bfb0..d4bd10f8723df 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4213,8 +4213,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. + * Cap len to not accidentally hit GSO_BY_FRAGS. */ - partial_segs = len / mss; + partial_segs = min(len, GSO_BY_FRAGS - 1U) / mss; if (partial_segs > 1) mss *= partial_segs; else -- GitLab From f7bbad9561f32dda2c13f6c4d0ca77d301f1c123 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:28 +0100 Subject: [PATCH 0259/2290] bpf: Add struct for bin_args arg in bpf_bprintf_prepare commit 78aa1cc9404399a15d2a1205329c6a06236f5378 upstream. Adding struct bpf_bprintf_data to hold bin_args argument for bpf_bprintf_prepare function. We will add another return argument to bpf_bprintf_prepare and pass the struct to bpf_bprintf_cleanup for proper cleanup in following changes. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-2-jolsa@kernel.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 7 ++++++- kernel/bpf/helpers.c | 24 +++++++++++++----------- kernel/bpf/verifier.c | 3 ++- kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++-------------- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c04a61ffac8ae..0e4fb16cd5c1f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2740,8 +2740,13 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +struct bpf_bprintf_data { + u32 *bin_args; + bool get_bin_args; +}; + int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_buf, u32 num_args); + u32 num_args, struct bpf_bprintf_data *data); void bpf_bprintf_cleanup(void); /* the implementation of the opaque uapi struct bpf_dynptr */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 34135fbd6097e..42141d59e9c67 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -795,16 +795,16 @@ void bpf_bprintf_cleanup(void) * Returns a negative value if fmt is an invalid format string or 0 otherwise. * * This can be used in two ways: - * - Format string verification only: when bin_args is NULL + * - Format string verification only: when data->get_bin_args is false * - Arguments preparation: in addition to the above verification, it writes in - * bin_args a binary representation of arguments usable by bstr_printf where - * pointers from BPF have been sanitized. + * data->bin_args a binary representation of arguments usable by bstr_printf + * where pointers from BPF have been sanitized. * * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use. */ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_args, u32 num_args) + u32 num_args, struct bpf_bprintf_data *data) { char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; size_t sizeof_cur_arg, sizeof_cur_ip; @@ -817,12 +817,12 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return -EINVAL; fmt_size = fmt_end - fmt; - if (bin_args) { + if (data->get_bin_args) { if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) return -EBUSY; tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; - *bin_args = (u32 *)tmp_buf; + data->bin_args = (u32 *)tmp_buf; } for (i = 0; i < fmt_size; i++) { @@ -1023,24 +1023,26 @@ out: } BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data); if (err < 0) return err; - err = bstr_printf(str, str_size, fmt, bin_args); + err = bstr_printf(str, str_size, fmt, data.bin_args); bpf_bprintf_cleanup(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 23b6d57b5eef2..1a29ac4db6eae 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7448,6 +7448,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; struct bpf_map *fmt_map = fmt_reg->map_ptr; + struct bpf_bprintf_data data = {}; int err, fmt_map_off, num_args; u64 fmt_addr; char *fmt; @@ -7472,7 +7473,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we * can focus on validating the format specifiers. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); if (err < 0) verbose(env, "Invalid format string\n"); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f4a494a457c52..bbf44095999f6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -377,18 +377,20 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; - u32 *bin_args; + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; int ret; - ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, - MAX_TRACE_PRINTK_VARARGS); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, + MAX_TRACE_PRINTK_VARARGS, &data); if (ret < 0) return ret; raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args); trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); @@ -426,25 +428,27 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; int ret, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (ret < 0) return ret; raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args); trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); @@ -471,21 +475,23 @@ const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) } BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (err < 0) return err; - seq_bprintf(m, fmt, bin_args); + seq_bprintf(m, fmt, data.bin_args); bpf_bprintf_cleanup(); -- GitLab From 95b7476f6f68d725c474e3348e89436b0abde62a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:29 +0100 Subject: [PATCH 0260/2290] bpf: Do cleanup in bpf_bprintf_cleanup only when needed commit f19a4050455aad847fb93f18dc1fe502eb60f989 upstream. Currently we always cleanup/decrement bpf_bprintf_nest_level variable in bpf_bprintf_cleanup if it's > 0. There's possible scenario where this could cause a problem, when bpf_bprintf_prepare does not get bin_args buffer (because num_args is 0) and following bpf_bprintf_cleanup call decrements bpf_bprintf_nest_level variable, like: in task context: bpf_bprintf_prepare(num_args != 0) increments 'bpf_bprintf_nest_level = 1' -> first irq : bpf_bprintf_prepare(num_args == 0) bpf_bprintf_cleanup decrements 'bpf_bprintf_nest_level = 0' -> second irq: bpf_bprintf_prepare(num_args != 0) bpf_bprintf_nest_level = 1 gets same buffer as task context above Adding check to bpf_bprintf_cleanup and doing the real cleanup only if we got bin_args data in the first place. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-3-jolsa@kernel.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 2 +- kernel/bpf/helpers.c | 16 +++++++++------- kernel/trace/bpf_trace.c | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0e4fb16cd5c1f..bc4e6969899af 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2747,7 +2747,7 @@ struct bpf_bprintf_data { int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data); -void bpf_bprintf_cleanup(void); +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); /* the implementation of the opaque uapi struct bpf_dynptr */ struct bpf_dynptr_kern { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 42141d59e9c67..64f41f58b37b7 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -781,12 +781,14 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) return 0; } -void bpf_bprintf_cleanup(void) +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { - if (this_cpu_read(bpf_bprintf_nest_level)) { - this_cpu_dec(bpf_bprintf_nest_level); - preempt_enable(); - } + if (!data->bin_args) + return; + if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) + return; + this_cpu_dec(bpf_bprintf_nest_level); + preempt_enable(); } /* @@ -1018,7 +1020,7 @@ nocopy_fmt: err = 0; out: if (err) - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(data); return err; } @@ -1044,7 +1046,7 @@ BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, err = bstr_printf(str, str_size, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return err + 1; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index bbf44095999f6..263a54d0d9eec 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -395,7 +395,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -453,7 +453,7 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -493,7 +493,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, seq_bprintf(m, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return seq_has_overflowed(m) ? -EOVERFLOW : 0; } -- GitLab From f3e975828636794a9d4cc27adb14a2f66592d414 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:30 +0100 Subject: [PATCH 0261/2290] bpf: Remove trace_printk_lock commit e2bb9e01d589f7fa82573aedd2765ff9b277816a upstream. Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer guarded with trace_printk_lock spin lock. The spin lock contention causes issues with bpf programs attached to contention_begin tracepoint [1][2]. Andrii suggested we could get rid of the contention by using trylock, but we could actually get rid of the spinlock completely by using percpu buffers the same way as for bin_args in bpf_bprintf_prepare function. Adding new return 'buf' argument to struct bpf_bprintf_data and making bpf_bprintf_prepare to return also the buffer for printk helpers. [1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/ [2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/ Reported-by: Hao Sun Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-4-jolsa@kernel.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 3 +++ kernel/bpf/helpers.c | 31 +++++++++++++++++++------------ kernel/trace/bpf_trace.c | 20 ++++++-------------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bc4e6969899af..1ca1902af23e9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2739,10 +2739,13 @@ struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +#define MAX_BPRINTF_BUF 1024 struct bpf_bprintf_data { u32 *bin_args; + char *buf; bool get_bin_args; + bool get_buf; }; int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 64f41f58b37b7..6a61a98d602cd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -753,19 +753,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary * arguments representation. */ -#define MAX_BPRINTF_BUF_LEN 512 +#define MAX_BPRINTF_BIN_ARGS 512 /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 struct bpf_bprintf_buffers { - char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; + char bin_args[MAX_BPRINTF_BIN_ARGS]; + char buf[MAX_BPRINTF_BUF]; }; -static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); + +static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); -static int try_get_fmt_tmp_buf(char **tmp_buf) +static int try_get_buffers(struct bpf_bprintf_buffers **bufs) { - struct bpf_bprintf_buffers *bufs; int nest_level; preempt_disable(); @@ -775,15 +776,14 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_bprintf_bufs); - *tmp_buf = bufs->tmp_bufs[nest_level - 1]; + *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]); return 0; } void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { - if (!data->bin_args) + if (!data->bin_args && !data->buf) return; if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) return; @@ -808,7 +808,9 @@ void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data) { + bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; + struct bpf_bprintf_buffers *buffers = NULL; size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0; u64 cur_arg; @@ -819,14 +821,19 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return -EINVAL; fmt_size = fmt_end - fmt; - if (data->get_bin_args) { - if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) - return -EBUSY; + if (get_buffers && try_get_buffers(&buffers)) + return -EBUSY; - tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; + if (data->get_bin_args) { + if (num_args) + tmp_buf = buffers->bin_args; + tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS; data->bin_args = (u32 *)tmp_buf; } + if (data->get_buf) + data->buf = buffers->buf; + for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { err = -EINVAL; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 263a54d0d9eec..3fdde232eaa92 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -368,8 +368,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) return &bpf_probe_write_user_proto; } -static DEFINE_RAW_SPINLOCK(trace_printk_lock); - #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 @@ -379,9 +377,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; struct bpf_bprintf_data data = { .get_bin_args = true, + .get_buf = true, }; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; int ret; ret = bpf_bprintf_prepare(fmt, fmt_size, args, @@ -389,11 +386,9 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); bpf_bprintf_cleanup(&data); @@ -433,9 +428,8 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, { struct bpf_bprintf_data data = { .get_bin_args = true, + .get_buf = true, }; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; int ret, num_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || @@ -447,11 +441,9 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); bpf_bprintf_cleanup(&data); -- GitLab From a160c3293a1cce15d5bb1e5886480d7d416b7353 Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Wed, 17 Jan 2024 14:37:29 -0800 Subject: [PATCH 0262/2290] userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb commit 67695f18d55924b2013534ef3bdc363bc9e14605 upstream. In mfill_atomic_hugetlb(), mmap_changing isn't being checked again if we drop mmap_lock and reacquire it. When the lock is not held, mmap_changing could have been incremented. This is also inconsistent with the behavior in mfill_atomic(). Link: https://lkml.kernel.org/r/20240117223729.1444522-1-lokeshgidra@google.com Fixes: df2cc96e77011 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races") Signed-off-by: Lokesh Gidra Cc: Andrea Arcangeli Cc: Mike Rapoport Cc: Axel Rasmussen Cc: Brian Geffon Cc: David Hildenbrand Cc: Jann Horn Cc: Kalesh Singh Cc: Matthew Wilcox (Oracle) Cc: Nicolas Geoffray Cc: Peter Xu Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Greg Kroah-Hartman --- mm/userfaultfd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 650ab6cfd5f49..992a0a16846f7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -327,6 +327,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, enum mcopy_atomic_mode mode, bool wp_copy) { @@ -445,6 +446,15 @@ retry: goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -480,6 +490,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, enum mcopy_atomic_mode mode, bool wp_copy); #endif /* CONFIG_HUGETLB_PAGE */ @@ -601,8 +612,8 @@ retry: */ if (is_vm_hugetlb_page(dst_vma)) return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, - src_start, len, mcopy_mode, - wp_copy); + src_start, len, mmap_changing, + mcopy_mode, wp_copy); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; -- GitLab From 43ec3c888653404c492749b971e56782ca4e574b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 27 Sep 2022 11:32:41 +0200 Subject: [PATCH 0263/2290] dmaengine: ioat: Free up __cleanup() name commit f62141ac730d6fe73a05750cb4482aabb681cfb9 upstream. In order to use __cleanup for __attribute__((__cleanup__(func))) the name must not be used for anything else. Avoid the conflict. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Dave Jiang Link: https://lkml.kernel.org/r/20230612093537.467120754%40infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ioat/dma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index e2070df6cad28..0b846c605d4bd 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -584,11 +584,11 @@ desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) } /** - * __cleanup - reclaim used descriptors + * __ioat_cleanup - reclaim used descriptors * @ioat_chan: channel (ring) to clean * @phys_complete: zeroed (or not) completion address (from status) */ -static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) +static void __ioat_cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; struct ioat_ring_ent *desc; @@ -675,7 +675,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan) spin_lock_bh(&ioat_chan->cleanup_lock); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); if (is_ioat_halted(*ioat_chan->completion)) { u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); @@ -712,7 +712,7 @@ static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) ioat_quiesce(ioat_chan, 0); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); __ioat_restart_chan(ioat_chan); } @@ -786,7 +786,7 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan) /* cleanup so tail points to descriptor that caused the error */ if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); @@ -943,7 +943,7 @@ void ioat_timer_event(struct timer_list *t) /* timer restarted in ioat_cleanup_preamble * and IOAT_COMPLETION_ACK cleared */ - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); goto unlock_out; } -- GitLab From 579cfab21b59fbf4bba2a564c5810ad72e7f868a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jun 2023 09:48:59 +0200 Subject: [PATCH 0264/2290] apparmor: Free up __cleanup() name commit 9a1f37ebcfe061721564042254719dc8fd5c9fa0 upstream. In order to use __cleanup for __attribute__((__cleanup__(func))) the name must not be used for anything else. Avoid the conflict. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Johansen Link: https://lkml.kernel.org/r/20230612093537.536441207%40infradead.org Signed-off-by: Greg Kroah-Hartman --- security/apparmor/include/lib.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f42359f58eb58..d468c8b90298d 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -226,7 +226,7 @@ void aa_policy_destroy(struct aa_policy *policy); */ #define fn_label_build(L, P, GFP, FN) \ ({ \ - __label__ __cleanup, __done; \ + __label__ __do_cleanup, __done; \ struct aa_label *__new_; \ \ if ((L)->size > 1) { \ @@ -244,7 +244,7 @@ void aa_policy_destroy(struct aa_policy *policy); __new_ = (FN); \ AA_BUG(!__new_); \ if (IS_ERR(__new_)) \ - goto __cleanup; \ + goto __do_cleanup; \ __lvec[__j++] = __new_; \ } \ for (__j = __count = 0; __j < (L)->size; __j++) \ @@ -266,7 +266,7 @@ void aa_policy_destroy(struct aa_policy *policy); vec_cleanup(profile, __pvec, __count); \ } else \ __new_ = NULL; \ -__cleanup: \ +__do_cleanup: \ vec_cleanup(label, __lvec, (L)->size); \ } else { \ (P) = labels_profile(L); \ -- GitLab From 3c6cc62ce1265aa5623e2e1b29c0fe258bf6e232 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 May 2023 12:23:48 +0200 Subject: [PATCH 0265/2290] locking: Introduce __cleanup() based infrastructure commit 54da6a0924311c7cf5015533991e44fb8eb12773 upstream. Use __attribute__((__cleanup__(func))) to build: - simple auto-release pointers using __free() - 'classes' with constructor and destructor semantics for scope-based resource management. - lock guards based on the above classes. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230612093537.614161713%40infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/cleanup.h | 171 ++++++++++++++++++++++++++++ include/linux/compiler-clang.h | 9 ++ include/linux/compiler_attributes.h | 6 + include/linux/device.h | 7 ++ include/linux/file.h | 6 + include/linux/irqflags.h | 7 ++ include/linux/mutex.h | 4 + include/linux/percpu.h | 4 + include/linux/preempt.h | 5 + include/linux/rcupdate.h | 3 + include/linux/rwsem.h | 8 ++ include/linux/sched/task.h | 2 + include/linux/slab.h | 3 + include/linux/spinlock.h | 31 +++++ include/linux/srcu.h | 5 + scripts/checkpatch.pl | 2 +- 16 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 include/linux/cleanup.h diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h new file mode 100644 index 0000000000000..53f1a7a932b08 --- /dev/null +++ b/include/linux/cleanup.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GUARDS_H +#define __LINUX_GUARDS_H + +#include + +/* + * DEFINE_FREE(name, type, free): + * simple helper macro that defines the required wrapper for a __free() + * based cleanup function. @free is an expression using '_T' to access + * the variable. + * + * __free(name): + * variable attribute to add a scoped based cleanup to the variable. + * + * no_free_ptr(var): + * like a non-atomic xchg(var, NULL), such that the cleanup function will + * be inhibited -- provided it sanely deals with a NULL value. + * + * return_ptr(p): + * returns p while inhibiting the __free(). + * + * Ex. + * + * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) + * + * struct obj *p __free(kfree) = kmalloc(...); + * if (!p) + * return NULL; + * + * if (!init_obj(p)) + * return NULL; + * + * return_ptr(p); + */ + +#define DEFINE_FREE(_name, _type, _free) \ + static inline void __free_##_name(void *p) { _type _T = *(_type *)p; _free; } + +#define __free(_name) __cleanup(__free_##_name) + +#define no_free_ptr(p) \ + ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + +#define return_ptr(p) return no_free_ptr(p) + + +/* + * DEFINE_CLASS(name, type, exit, init, init_args...): + * helper to define the destructor and constructor for a type. + * @exit is an expression using '_T' -- similar to FREE above. + * @init is an expression in @init_args resulting in @type + * + * EXTEND_CLASS(name, ext, init, init_args...): + * extends class @name to @name@ext with the new constructor + * + * CLASS(name, var)(args...): + * declare the variable @var as an instance of the named class + * + * Ex. + * + * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd) + * + * CLASS(fdget, f)(fd); + * if (!f.file) + * return -EBADF; + * + * // use 'f' without concern + */ + +#define DEFINE_CLASS(_name, _type, _exit, _init, _init_args...) \ +typedef _type class_##_name##_t; \ +static inline void class_##_name##_destructor(_type *p) \ +{ _type _T = *p; _exit; } \ +static inline _type class_##_name##_constructor(_init_args) \ +{ _type t = _init; return t; } + +#define EXTEND_CLASS(_name, ext, _init, _init_args...) \ +typedef class_##_name##_t class_##_name##ext##_t; \ +static inline void class_##_name##ext##_destructor(class_##_name##_t *p)\ +{ class_##_name##_destructor(p); } \ +static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ +{ class_##_name##_t t = _init; return t; } + +#define CLASS(_name, var) \ + class_##_name##_t var __cleanup(class_##_name##_destructor) = \ + class_##_name##_constructor + + +/* + * DEFINE_GUARD(name, type, lock, unlock): + * trivial wrapper around DEFINE_CLASS() above specifically + * for locks. + * + * guard(name): + * an anonymous instance of the (guard) class + * + * scoped_guard (name, args...) { }: + * similar to CLASS(name, scope)(args), except the variable (with the + * explicit name 'scope') is declard in a for-loop such that its scope is + * bound to the next (compound) statement. + * + */ + +#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ + DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + +#define guard(_name) \ + CLASS(_name, __UNIQUE_ID(guard)) + +#define scoped_guard(_name, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) + +/* + * Additional helper macros for generating lock guards with types, either for + * locks that don't have a native type (eg. RCU, preempt) or those that need a + * 'fat' pointer (eg. spin_lock_irqsave). + * + * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * + * will result in the following type: + * + * typedef struct { + * type *lock; // 'type := void' for the _0 variant + * __VA_ARGS__; + * } class_##name##_t; + * + * As above, both _lock and _unlock are statements, except this time '_T' will + * be a pointer to the above struct. + */ + +#define __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, ...) \ +typedef struct { \ + _type *lock; \ + __VA_ARGS__; \ +} class_##_name##_t; \ + \ +static inline void class_##_name##_destructor(class_##_name##_t *_T) \ +{ \ + if (_T->lock) { _unlock; } \ +} + + +#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \ +static inline class_##_name##_t class_##_name##_constructor(_type *l) \ +{ \ + class_##_name##_t _t = { .lock = l }, *_T = &_t; \ + _lock; \ + return _t; \ +} + +#define __DEFINE_LOCK_GUARD_0(_name, _lock) \ +static inline class_##_name##_t class_##_name##_constructor(void) \ +{ \ + class_##_name##_t _t = { .lock = (void*)1 }, \ + *_T __maybe_unused = &_t; \ + _lock; \ + return _t; \ +} + +#define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \ +__DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \ +__DEFINE_LOCK_GUARD_1(_name, _type, _lock) + +#define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \ +__DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ +__DEFINE_LOCK_GUARD_0(_name, _lock) + +#endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 6cfd6902bd5b9..9b673fefcef8a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -5,6 +5,15 @@ /* Compiler specific definitions for Clang compiler */ +/* + * Clang prior to 17 is being silly and considers many __cleanup() variables + * as unused (because they are, their sole purpose is to go out of scope). + * + * https://reviews.llvm.org/D152180 + */ +#undef __cleanup +#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) + /* same as gcc, this was present in clang-2.6 so we can assume it works * with any version that can compile the kernel */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 898b3458b24a0..ae4c9579ca5f0 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -75,6 +75,12 @@ # define __assume_aligned(a, ...) #endif +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-cleanup-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#cleanup + */ +#define __cleanup(func) __attribute__((__cleanup__(func))) + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute diff --git a/include/linux/device.h b/include/linux/device.h index 7cf24330d6814..5520bb546a4ac 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -30,6 +30,7 @@ #include #include #include +#include #include struct device; @@ -898,6 +899,9 @@ void device_unregister(struct device *dev); void device_initialize(struct device *dev); int __must_check device_add(struct device *dev); void device_del(struct device *dev); + +DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T)) + int device_for_each_child(struct device *dev, void *data, int (*fn)(struct device *dev, void *data)); int device_for_each_child_reverse(struct device *dev, void *data, @@ -1071,6 +1075,9 @@ extern int (*platform_notify_remove)(struct device *dev); */ struct device *get_device(struct device *dev); void put_device(struct device *dev); + +DEFINE_FREE(put_device, struct device *, if (_T) put_device(_T)) + bool kill_device(struct device *dev); #ifdef CONFIG_DEVTMPFS diff --git a/include/linux/file.h b/include/linux/file.h index 39704eae83e27..6e9099d293436 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -10,6 +10,7 @@ #include #include #include +#include struct file; @@ -80,6 +81,8 @@ static inline void fdput_pos(struct fd f) fdput(f); } +DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) + extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); @@ -88,6 +91,9 @@ extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); +DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), + get_unused_fd_flags(flags), unsigned flags) + extern void fd_install(unsigned int fd, struct file *file); extern int __receive_fd(struct file *file, int __user *ufd, diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5ec0fa71399e4..2b665c32f5fe6 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -13,6 +13,7 @@ #define _LINUX_TRACE_IRQFLAGS_H #include +#include #include #include @@ -267,4 +268,10 @@ extern void warn_bogus_irq_restore(void); #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +DEFINE_LOCK_GUARD_0(irq, local_irq_disable(), local_irq_enable()) +DEFINE_LOCK_GUARD_0(irqsave, + local_irq_save(_T->flags), + local_irq_restore(_T->flags), + unsigned long flags) + #endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 8f226d460f51c..a33aa9eb9fc3b 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -219,4 +220,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) +DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f1ec5ad1351cc..ba00a49369cae 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -128,6 +129,9 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); + +DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) + extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #define alloc_percpu_gfp(type, gfp) \ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 8cfcc5d454512..9aa6358a1a16b 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -8,6 +8,7 @@ */ #include +#include #include /* @@ -474,4 +475,8 @@ static __always_inline void preempt_enable_nested(void) preempt_enable(); } +DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) +DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) +DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 46bd9a331fd5d..d2507168b9c7b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1077,4 +1078,6 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) extern int rcu_expedited; extern int rcu_normal; +DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock()) + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index efa5c324369a2..1dd530ce8b45b 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ @@ -201,6 +202,13 @@ extern void up_read(struct rw_semaphore *sem); */ extern void up_write(struct rw_semaphore *sem); +DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) +DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) + +DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) +DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) + + /* * downgrade write lock to read lock */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 7291fb6399d2a..aaa25ed1a8fe0 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -145,6 +145,8 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T)) + static inline void put_task_struct_many(struct task_struct *t, int nr) { if (refcount_sub_and_test(nr, &t->usage)) diff --git a/include/linux/slab.h b/include/linux/slab.h index 45efc6c553b82..cb4b5deca9a9c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -17,6 +17,7 @@ #include #include #include +#include /* @@ -197,6 +198,8 @@ void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); +DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) + /** * ksize - Report actual allocation size of associated object * diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 1341f7d62da44..83377540c369a 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -493,5 +494,35 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, void free_bucket_spinlocks(spinlock_t *locks); +DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, + raw_spin_lock(_T->lock), + raw_spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, + raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), + raw_spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, + raw_spin_lock_irq(_T->lock), + raw_spin_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, + raw_spin_lock_irqsave(_T->lock, _T->flags), + raw_spin_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, + spin_lock(_T->lock), + spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, + spin_lock_irq(_T->lock), + spin_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, + spin_lock_irqsave(_T->lock, _T->flags), + spin_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d960a0..f9e1fa7ff86fc 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -212,4 +212,9 @@ static inline void smp_mb__after_srcu_read_unlock(void) /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ } +DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct, + _T->idx = srcu_read_lock(_T->lock), + srcu_read_unlock(_T->lock, _T->idx), + int idx) + #endif diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1e5e66ae5a522..ecf4250b0d2d2 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4971,7 +4971,7 @@ sub process { if|for|while|switch|return|case| volatile|__volatile__| __attribute__|format|__extension__| - asm|__asm__)$/x) + asm|__asm__|scoped_guard)$/x) { # cpp #define statements have non-optional spaces, ie # if there is a space between the name and the open -- GitLab From d3a5f798bc866dae270c3f2a863dc3d75629ffd1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jun 2023 11:28:30 +0200 Subject: [PATCH 0266/2290] kbuild: Drop -Wdeclaration-after-statement commit b5ec6fd286dfa466f64cb0e56ed768092d0342ae upstream. With the advent on scope-based resource management it comes really tedious to abide by the contraints of -Wdeclaration-after-statement. It will still be recommeneded to place declarations at the start of a scope where possible, but it will no longer be enforced. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/CAHk-%3Dwi-RyoUhbChiVaJZoZXheAwnJ7OO%3DGxe85BkPAd93TwDA%40mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- Makefile | 6 +----- arch/arm64/kernel/vdso32/Makefile | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/Makefile b/Makefile index e93554269e474..1c23f38fc5d84 100644 --- a/Makefile +++ b/Makefile @@ -459,8 +459,7 @@ HOSTRUSTC = rustc HOSTPKG_CONFIG = pkg-config KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ - -O2 -fomit-frame-pointer -std=gnu11 \ - -Wdeclaration-after-statement + -O2 -fomit-frame-pointer -std=gnu11 KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS) KBUILD_USERLDFLAGS := $(USERLDFLAGS) @@ -1018,9 +1017,6 @@ endif # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -# warn about C99 declaration after statement -KBUILD_CFLAGS += -Wdeclaration-after-statement - # Variable Length Arrays (VLAs) should not be used anywhere in the kernel KBUILD_CFLAGS += -Wvla diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 36c8f66cad251..d513533cc922f 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -68,11 +68,9 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -Wdeclaration-after-statement \ -std=gnu11 VDSO_CFLAGS += -O2 # Some useful compiler-dependent flags from top-level Makefile -VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,) VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) VDSO_CFLAGS += -fno-strict-overflow VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) -- GitLab From 24ec7504a08a67247fbe798d1de995208a8c128a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Feb 2024 15:25:12 +0000 Subject: [PATCH 0267/2290] sched/membarrier: reduce the ability to hammer on sys_membarrier commit 944d5fe50f3f03daacfea16300e656a1691c4a23 upstream. On some systems, sys_membarrier can be very expensive, causing overall slowdowns for everything. So put a lock on the path in order to serialize the accesses to prevent the ability for this to be called at too high of a frequency and saturate the machine. Reviewed-and-tested-by: Mathieu Desnoyers Acked-by: Borislav Petkov Fixes: 22e4ebb97582 ("membarrier: Provide expedited private command") Fixes: c5f58bd58f43 ("membarrier: Provide GLOBAL_EXPEDITED command") Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sched/membarrier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 0c5be7ebb1dca..08b16d20c85bb 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -161,6 +161,9 @@ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK) +static DEFINE_MUTEX(membarrier_ipi_mutex); +#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex) + static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ @@ -258,6 +261,7 @@ static int membarrier_global_expedited(void) if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { @@ -346,6 +350,7 @@ static int membarrier_private_expedited(int flags, int cpu_id) if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); if (cpu_id >= 0) { @@ -459,6 +464,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) * between threads which are users of @mm has its membarrier state * updated. */ + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { -- GitLab From 058d1c56167ec78daf420d6c43ddd7e2ba6b9bdf Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 6 Feb 2024 17:18:02 -0800 Subject: [PATCH 0268/2290] of: property: Add in-ports/out-ports support to of_graph_get_port_parent() commit 8f1e0d791b5281f3a38620bc7c57763dc551be15 upstream. Similar to the existing "ports" node name, coresight device tree bindings have added "in-ports" and "out-ports" as standard node names for a collection of ports. Add support for these name to of_graph_get_port_parent() so that remote-endpoint parsing can find the correct parent node for these coresight ports too. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20240207011803.2637531-4-saravanak@google.com Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/property.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 550efd1a58fc6..33d5f16c81204 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -762,7 +762,9 @@ struct device_node *of_graph_get_port_parent(struct device_node *node) /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); - if (depth == 2 && !of_node_name_eq(node, "ports")) + if (depth == 2 && !of_node_name_eq(node, "ports") && + !of_node_name_eq(node, "in-ports") && + !of_node_name_eq(node, "out-ports")) break; } return node; -- GitLab From 6589f0f72f8edd1fa11adce4eedbd3615f2e78ab Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 4 Feb 2024 01:16:45 +0900 Subject: [PATCH 0269/2290] nilfs2: fix potential bug in end_buffer_async_write commit 5bc09b397cbf1221f8a8aacb1152650c9195b02b upstream. According to a syzbot report, end_buffer_async_write(), which handles the completion of block device writes, may detect abnormal condition of the buffer async_write flag and cause a BUG_ON failure when using nilfs2. Nilfs2 itself does not use end_buffer_async_write(). But, the async_write flag is now used as a marker by commit 7f42ec394156 ("nilfs2: fix issue with race condition of competition between segments for dirty blocks") as a means of resolving double list insertion of dirty blocks in nilfs_lookup_dirty_data_buffers() and nilfs_lookup_node_buffers() and the resulting crash. This modification is safe as long as it is used for file data and b-tree node blocks where the page caches are independent. However, it was irrelevant and redundant to also introduce async_write for segment summary and super root blocks that share buffers with the backing device. This led to the possibility that the BUG_ON check in end_buffer_async_write would fail as described above, if independent writebacks of the backing device occurred in parallel. The use of async_write for segment summary buffers has already been removed in a previous change. Fix this issue by removing the manipulation of the async_write flag for the remaining super root block buffer. Link: https://lkml.kernel.org/r/20240203161645.4992-1-konishi.ryusuke@gmail.com Fixes: 7f42ec394156 ("nilfs2: fix issue with race condition of competition between segments for dirty blocks") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+5c04210f7c7f897c1e7f@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/00000000000019a97c05fd42f8c8@google.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segment.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a4a147a983e0a..0a84613960dbf 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1702,7 +1702,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); @@ -1713,6 +1712,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } break; } + set_buffer_async_write(bh); if (bh->b_page != fs_page) { nilfs_begin_page_io(fs_page); fs_page = bh->b_page; @@ -1798,7 +1798,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { clear_buffer_uptodate(bh); if (bh->b_page != bd_page) { @@ -1807,6 +1806,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) } break; } + clear_buffer_async_write(bh); if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); fs_page = bh->b_page; @@ -1894,8 +1894,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) BIT(BH_Delay) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Redirected)); - set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh == segbuf->sb_super_root) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; @@ -1903,6 +1904,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) update_sr = true; break; } + set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, 0); fs_page = bh->b_page; -- GitLab From 13f79a002602e562ce425b403fa9797dcf1cfeab Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Jan 2023 01:41:14 +0900 Subject: [PATCH 0270/2290] nilfs2: replace WARN_ONs for invalid DAT metadata block requests commit 5124a0a549857c4b87173280e192eea24dea72ad upstream. If DAT metadata file block access fails due to corruption of the DAT file or abnormal virtual block numbers held by b-trees or inodes, a kernel warning is generated. This replaces the WARN_ONs by error output, so that a kernel, booted with panic_on_warn, does not panic. This patch also replaces the detected return code -ENOENT with another internal code -EINVAL to notify the bmap layer of metadata corruption. When the bmap layer sees -EINVAL, it handles the abnormal situation with nilfs_bmap_convert_error() and finally returns code -EIO as it should. Link: https://lkml.kernel.org/r/0000000000005cc3d205ea23ddcf@google.com Link: https://lkml.kernel.org/r/20230126164114.6911-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/dat.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 9930fa901039f..1e7f653c1df7e 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { - return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, - create, &req->pr_entry_bh); + int ret; + + ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, + create, &req->pr_entry_bh); + if (unlikely(ret == -ENOENT)) { + nilfs_err(dat->i_sb, + "DAT doesn't have a block to manage vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + /* + * Return internal code -EINVAL to notify bmap layer of + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } static void nilfs_dat_commit_entry(struct inode *dat, @@ -123,11 +136,7 @@ static void nilfs_dat_commit_free(struct inode *dat, int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) { - int ret; - - ret = nilfs_dat_prepare_entry(dat, req, 0); - WARN_ON(ret == -ENOENT); - return ret; + return nilfs_dat_prepare_entry(dat, req, 0); } void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, @@ -154,10 +163,8 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - WARN_ON(ret == -ENOENT); + if (ret < 0) return ret; - } kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, -- GitLab From c5d83ac2bf6ca668a39ffb1a576899a66153ba19 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Jan 2024 15:57:56 +0100 Subject: [PATCH 0271/2290] dm: limit the number of targets and parameter size area commit bd504bcfec41a503b32054da5472904b404341a4 upstream. The kvmalloc function fails with a warning if the size is larger than INT_MAX. The warning was triggered by a syscall testing robot. In order to avoid the warning, this commit limits the number of targets to 1048576 and the size of the parameter area to 1073741824. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-core.h | 2 ++ drivers/md/dm-ioctl.c | 3 ++- drivers/md/dm-table.c | 9 +++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 71dcd8fd4050a..6314210d36971 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -21,6 +21,8 @@ #include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 +#define DM_MAX_TARGETS 1048576 +#define DM_MAX_TARGET_PARAMS 1024 struct dm_io; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 206e6ce554dc7..4376754816abe 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1877,7 +1877,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern minimum_data_size - sizeof(param_kernel->version))) return -EFAULT; - if (param_kernel->data_size < minimum_data_size) { + if (unlikely(param_kernel->data_size < minimum_data_size) || + unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) { DMERR("Invalid data size in the ioctl structure: %u", param_kernel->data_size); return -EINVAL; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index dac6a5f25f2be..e0367a672eabf 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -128,7 +128,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, fmode_t mode, unsigned int num_targets, struct mapped_device *md) { - struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t; + + if (num_targets > DM_MAX_TARGETS) + return -EOVERFLOW; + + t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; @@ -143,7 +148,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (!num_targets) { kfree(t); - return -ENOMEM; + return -EOVERFLOW; } if (alloc_targets(t, num_targets)) { -- GitLab From d028cc6d235fb0fe919bf20d785c4c7dd4eab7a9 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 14 Feb 2024 17:55:18 +0000 Subject: [PATCH 0272/2290] arm64: Subscribe Microsoft Azure Cobalt 100 to ARM Neoverse N2 errata commit fb091ff394792c018527b3211bbdfae93ea4ac02 upstream. Add the MIDR value of Microsoft Azure Cobalt 100, which is a Microsoft implemented CPU based on r0p0 of the ARM Neoverse N2 CPU, and therefore suffers from all the same errata. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Easwar Hariharan Reviewed-by: Anshuman Khandual Acked-by: Mark Rutland Acked-by: Marc Zyngier Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240214175522.2457857-1-eahariha@linux.microsoft.com Signed-off-by: Will Deacon Signed-off-by: Easwar Hariharan Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/silicon-errata.rst | 7 +++++++ arch/arm64/include/asm/cputype.h | 4 ++++ arch/arm64/kernel/cpu_errata.c | 3 +++ 3 files changed, 14 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d9fce65b2f047..27135b9c07acb 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -221,3 +221,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7dce9c0aa7836..af3a678a76b3a 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -61,6 +61,7 @@ #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_MICROSOFT 0x6D #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -128,6 +129,8 @@ #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -179,6 +182,7 @@ #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 61f22e9c92b4c..74584597bfb82 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -390,6 +390,7 @@ static const struct midr_range erratum_1463225[] = { static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2139208 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -403,6 +404,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { static const struct midr_range tsb_flush_fail_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2067961 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2054223 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -415,6 +417,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = { static struct midr_range trbe_write_out_of_range_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2253138 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), -- GitLab From 9020513afafe1b28ffc5d0dad6accb4ebcd0030c Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 29 Dec 2022 15:44:43 +0400 Subject: [PATCH 0273/2290] fs/ntfs3: Add null pointer checks commit fc4992458e0aa2d2e82a25c922e6ac36c2d91083 upstream. Added null pointer checks in function ntfs_security_init. Also added le32_to_cpu in functions ntfs_security_init and indx_read. Signed-off-by: Konstantin Komarov Cc: "Doebel, Bjoern" Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/fsntfs.c | 16 ++++++++++------ fs/ntfs3/index.c | 3 ++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 873b1434a9989..4b72bc7f12ca3 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1842,10 +1842,12 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); - if (root_sdh->type != ATTR_ZERO || + if(!(root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || + root_sdh->type != ATTR_ZERO || root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH || - offsetof(struct INDEX_ROOT, ihdr) + root_sdh->ihdr.used > attr->res.data_size) { + offsetof(struct INDEX_ROOT, ihdr) + + le32_to_cpu(root_sdh->ihdr.used) > + le32_to_cpu(attr->res.data_size)) { err = -EINVAL; goto out; } @@ -1861,10 +1863,12 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); - if (root_sii->type != ATTR_ZERO || + if(!(root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || + root_sii->type != ATTR_ZERO || root_sii->rule != NTFS_COLLATION_TYPE_UINT || - offsetof(struct INDEX_ROOT, ihdr) + root_sii->ihdr.used > attr->res.data_size) { + offsetof(struct INDEX_ROOT, ihdr) + + le32_to_cpu(root_sii->ihdr.used) > + le32_to_cpu(attr->res.data_size)) { err = -EINVAL; goto out; } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index b89a33f5761ef..7371f7855e4c4 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1097,7 +1097,8 @@ ok: } /* check for index header length */ - if (offsetof(struct INDEX_BUFFER, ihdr) + ib->ihdr.used > bytes) { + if (offsetof(struct INDEX_BUFFER, ihdr) + le32_to_cpu(ib->ihdr.used) > + bytes) { err = -EINVAL; goto out; } -- GitLab From 6fd24675188d354b1cad47462969afa2ab09d819 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Jan 2024 16:04:18 +0100 Subject: [PATCH 0274/2290] mlxsw: spectrum_acl_tcam: Fix stack corruption commit 483ae90d8f976f8339cf81066312e1329f2d3706 upstream. When tc filters are first added to a net device, the corresponding local port gets bound to an ACL group in the device. The group contains a list of ACLs. In turn, each ACL points to a different TCAM region where the filters are stored. During forwarding, the ACLs are sequentially evaluated until a match is found. One reason to place filters in different regions is when they are added with decreasing priorities and in an alternating order so that two consecutive filters can never fit in the same region because of their key usage. In Spectrum-2 and newer ASICs the firmware started to report that the maximum number of ACLs in a group is more than 16, but the layout of the register that configures ACL groups (PAGT) was not updated to account for that. It is therefore possible to hit stack corruption [1] in the rare case where more than 16 ACLs in a group are required. Fix by limiting the maximum ACL group size to the minimum between what the firmware reports and the maximum ACLs that fit in the PAGT register. Add a test case to make sure the machine does not crash when this condition is hit. [1] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: mlxsw_sp_acl_tcam_group_update+0x116/0x120 [...] dump_stack_lvl+0x36/0x50 panic+0x305/0x330 __stack_chk_fail+0x15/0x20 mlxsw_sp_acl_tcam_group_update+0x116/0x120 mlxsw_sp_acl_tcam_group_region_attach+0x69/0x110 mlxsw_sp_acl_tcam_vchunk_get+0x492/0xa20 mlxsw_sp_acl_tcam_ventry_add+0x25/0xe0 mlxsw_sp_acl_rule_add+0x47/0x240 mlxsw_sp_flower_replace+0x1a9/0x1d0 tc_setup_cb_add+0xdc/0x1c0 fl_hw_replace_filter+0x146/0x1f0 fl_change+0xc17/0x1360 tc_new_tfilter+0x472/0xb90 rtnetlink_rcv_msg+0x313/0x3b0 netlink_rcv_skb+0x58/0x100 netlink_unicast+0x244/0x390 netlink_sendmsg+0x1e4/0x440 ____sys_sendmsg+0x164/0x260 ___sys_sendmsg+0x9a/0xe0 __sys_sendmsg+0x7a/0xc0 do_syscall_64+0x40/0xe0 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC") Reported-by: Orel Hagag Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Signed-off-by: Petr Machata Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/2d91c89afba59c22587b444994ae419dbea8d876.1705502064.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 2 + .../drivers/net/mlxsw/spectrum-2/tc_flower.sh | 56 ++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 3b9ba8fa247ab..dc2e204bcd727 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -65,6 +65,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, tcam->max_groups = max_groups; tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUP_SIZE); + tcam->max_group_size = min_t(unsigned int, tcam->max_group_size, + MLXSW_REG_PAGT_ACL_MAX_NUM); err = ops->init(mlxsw_sp, tcam->priv, tcam); if (err) diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 7bf56ea161e35..616d3581419ca 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test \ - max_erp_entries_test" + max_erp_entries_test max_group_size_test" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/tc_common.sh @@ -1033,6 +1033,60 @@ max_erp_entries_test() "max chain $chain_failed, mask $mask_failed" } +max_group_size_test() +{ + # The number of ACLs in an ACL group is limited. Once the maximum + # number of ACLs has been reached, filters cannot be added. This test + # verifies that when this limit is reached, insertion fails without + # crashing. + + RET=0 + + local num_acls=32 + local max_size + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + for ((i=1; i < $num_acls; i++)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter add dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter add dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + + ret=$? + [[ $ret -ne 0 ]] && max_size=$((i - 1)) && break + done + + # We expect to exceed the maximum number of ACLs in a group, so that + # insertion eventually fails. Otherwise, the test should be adjusted to + # add more filters. + check_fail $ret "expected to exceed number of ACLs in a group" + + for ((; i >= 1; i--)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter del dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter del dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + done + + log_test "max ACL group size test ($tcflags). max size $max_size" +} + setup_prepare() { h1=${NETIFS[p1]} -- GitLab From 81e1dc2f70014b9523dd02ca763788e4f81e5bac Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Feb 2024 09:12:53 +0100 Subject: [PATCH 0275/2290] Linux 6.1.79 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240220204841.073267068@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Jon Hunter Tested-by: Salvatore Bonaccorso Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20240221130223.073542172@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Allen Pais Tested-by: Jon Hunter Tested-by: Florian Fainelli Tested-by: kernelci.org bot Tested-by: Yann Sionneau Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Mateusz Jończyk Tested-by: Kelsey Steele Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1c23f38fc5d84..d6bc9f597e8b8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 78 +SUBLEVEL = 79 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 02149c7cd115d3c82d20f758be4c9013d1128928 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 14 Feb 2023 08:49:11 -0500 Subject: [PATCH 0276/2290] net/sched: Retire CBQ qdisc commit 051d442098421c28c7951625652f61b1e15c4bd5 upstream. While this amazing qdisc has served us well over the years it has not been getting any tender love and care and has bitrotted over time. It has become mostly a shooting target for syzkaller lately. For this reason, we are retiring it. Goodbye CBQ - we loved you. Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/Kconfig | 17 - net/sched/Makefile | 1 - net/sched/sch_cbq.c | 1727 ----------------- .../tc-testing/tc-tests/qdiscs/cbq.json | 184 -- 4 files changed, 1929 deletions(-) delete mode 100644 net/sched/sch_cbq.c delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 24cf0bf7c80e5..bbd84d8bc64ae 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -45,23 +45,6 @@ if NET_SCHED comment "Queueing/Scheduling" -config NET_SCH_CBQ - tristate "Class Based Queueing (CBQ)" - help - Say Y here if you want to use the Class-Based Queueing (CBQ) packet - scheduling algorithm. This algorithm classifies the waiting packets - into a tree-like hierarchy of classes; the leaves of this tree are - in turn scheduled by separate algorithms. - - See the top of for more details. - - CBQ is a commonly used scheduler, so if you're unsure, you should - say Y here. Then say Y to all the queueing algorithms below that you - want to use as leaf disciplines. - - To compile this code as a module, choose M here: the - module will be called sch_cbq. - config NET_SCH_HTB tristate "Hierarchical Token Bucket (HTB)" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 8a33a35fc50d5..0108d1bffa512 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_ACT_CT) += act_ct.o obj-$(CONFIG_NET_ACT_GATE) += act_gate.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o -obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c deleted file mode 100644 index 36db5f6782f2c..0000000000000 --- a/net/sched/sch_cbq.c +++ /dev/null @@ -1,1727 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/sch_cbq.c Class-Based Queueing discipline. - * - * Authors: Alexey Kuznetsov, - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* Class-Based Queueing (CBQ) algorithm. - ======================================= - - Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource - Management Models for Packet Networks", - IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 - - [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 - - [3] Sally Floyd, "Notes on Class-Based Queueing: Setting - Parameters", 1996 - - [4] Sally Floyd and Michael Speer, "Experimental Results - for Class-Based Queueing", 1998, not published. - - ----------------------------------------------------------------------- - - Algorithm skeleton was taken from NS simulator cbq.cc. - If someone wants to check this code against the LBL version, - he should take into account that ONLY the skeleton was borrowed, - the implementation is different. Particularly: - - --- The WRR algorithm is different. Our version looks more - reasonable (I hope) and works when quanta are allowed to be - less than MTU, which is always the case when real time classes - have small rates. Note, that the statement of [3] is - incomplete, delay may actually be estimated even if class - per-round allotment is less than MTU. Namely, if per-round - allotment is W*r_i, and r_1+...+r_k = r < 1 - - delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B - - In the worst case we have IntServ estimate with D = W*r+k*MTU - and C = MTU*r. The proof (if correct at all) is trivial. - - - --- It seems that cbq-2.0 is not very accurate. At least, I cannot - interpret some places, which look like wrong translations - from NS. Anyone is advised to find these differences - and explain to me, why I am wrong 8). - - --- Linux has no EOI event, so that we cannot estimate true class - idle time. Workaround is to consider the next dequeue event - as sign that previous packet is finished. This is wrong because of - internal device queueing, but on a permanently loaded link it is true. - Moreover, combined with clock integrator, this scheme looks - very close to an ideal solution. */ - -struct cbq_sched_data; - - -struct cbq_class { - struct Qdisc_class_common common; - struct cbq_class *next_alive; /* next class with backlog in this priority band */ - -/* Parameters */ - unsigned char priority; /* class priority */ - unsigned char priority2; /* priority to be used after overlimit */ - unsigned char ewma_log; /* time constant for idle time calculation */ - - u32 defmap; - - /* Link-sharing scheduler parameters */ - long maxidle; /* Class parameters: see below. */ - long offtime; - long minidle; - u32 avpkt; - struct qdisc_rate_table *R_tab; - - /* General scheduler (WRR) parameters */ - long allot; - long quantum; /* Allotment per WRR round */ - long weight; /* Relative allotment: see below */ - - struct Qdisc *qdisc; /* Ptr to CBQ discipline */ - struct cbq_class *split; /* Ptr to split node */ - struct cbq_class *share; /* Ptr to LS parent in the class tree */ - struct cbq_class *tparent; /* Ptr to tree parent in the class tree */ - struct cbq_class *borrow; /* NULL if class is bandwidth limited; - parent otherwise */ - struct cbq_class *sibling; /* Sibling chain */ - struct cbq_class *children; /* Pointer to children chain */ - - struct Qdisc *q; /* Elementary queueing discipline */ - - -/* Variables */ - unsigned char cpriority; /* Effective priority */ - unsigned char delayed; - unsigned char level; /* level of the class in hierarchy: - 0 for leaf classes, and maximal - level of children + 1 for nodes. - */ - - psched_time_t last; /* Last end of service */ - psched_time_t undertime; - long avgidle; - long deficit; /* Saved deficit for WRR */ - psched_time_t penalized; - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct tc_cbq_xstats xstats; - - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - - int filters; - - struct cbq_class *defaults[TC_PRIO_MAX + 1]; -}; - -struct cbq_sched_data { - struct Qdisc_class_hash clhash; /* Hash table of all classes */ - int nclasses[TC_CBQ_MAXPRIO + 1]; - unsigned int quanta[TC_CBQ_MAXPRIO + 1]; - - struct cbq_class link; - - unsigned int activemask; - struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes - with backlog */ - -#ifdef CONFIG_NET_CLS_ACT - struct cbq_class *rx_class; -#endif - struct cbq_class *tx_class; - struct cbq_class *tx_borrowed; - int tx_len; - psched_time_t now; /* Cached timestamp */ - unsigned int pmask; - - struct qdisc_watchdog watchdog; /* Watchdog timer, - started when CBQ has - backlog, but cannot - transmit just now */ - psched_tdiff_t wd_expires; - int toplevel; - u32 hgenerator; -}; - - -#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) - -static inline struct cbq_class * -cbq_class_lookup(struct cbq_sched_data *q, u32 classid) -{ - struct Qdisc_class_common *clc; - - clc = qdisc_class_find(&q->clhash, classid); - if (clc == NULL) - return NULL; - return container_of(clc, struct cbq_class, common); -} - -#ifdef CONFIG_NET_CLS_ACT - -static struct cbq_class * -cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) -{ - struct cbq_class *cl; - - for (cl = this->tparent; cl; cl = cl->tparent) { - struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; - - if (new != NULL && new != this) - return new; - } - return NULL; -} - -#endif - -/* Classify packet. The procedure is pretty complicated, but - * it allows us to combine link sharing and priority scheduling - * transparently. - * - * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, - * so that it resolves to split nodes. Then packets are classified - * by logical priority, or a more specific classifier may be attached - * to the split node. - */ - -static struct cbq_class * -cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *head = &q->link; - struct cbq_class **defmap; - struct cbq_class *cl = NULL; - u32 prio = skb->priority; - struct tcf_proto *fl; - struct tcf_result res; - - /* - * Step 1. If skb->priority points to one of our classes, use it. - */ - if (TC_H_MAJ(prio ^ sch->handle) == 0 && - (cl = cbq_class_lookup(q, prio)) != NULL) - return cl; - - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - for (;;) { - int result = 0; - defmap = head->defaults; - - fl = rcu_dereference_bh(head->filter_list); - /* - * Step 2+n. Apply classifier. - */ - result = tcf_classify(skb, NULL, fl, &res, true); - if (!fl || result < 0) - goto fallback; - if (result == TC_ACT_SHOT) - return NULL; - - cl = (void *)res.class; - if (!cl) { - if (TC_H_MAJ(res.classid)) - cl = cbq_class_lookup(q, res.classid); - else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) - cl = defmap[TC_PRIO_BESTEFFORT]; - - if (cl == NULL) - goto fallback; - } - if (cl->level >= head->level) - goto fallback; -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - fallthrough; - case TC_ACT_RECLASSIFY: - return cbq_reclassify(skb, cl); - } -#endif - if (cl->level == 0) - return cl; - - /* - * Step 3+n. If classifier selected a link sharing class, - * apply agency specific classifier. - * Repeat this procedure until we hit a leaf node. - */ - head = cl; - } - -fallback: - cl = head; - - /* - * Step 4. No success... - */ - if (TC_H_MAJ(prio) == 0 && - !(cl = head->defaults[prio & TC_PRIO_MAX]) && - !(cl = head->defaults[TC_PRIO_BESTEFFORT])) - return head; - - return cl; -} - -/* - * A packet has just been enqueued on the empty class. - * cbq_activate_class adds it to the tail of active class list - * of its priority band. - */ - -static inline void cbq_activate_class(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - int prio = cl->cpriority; - struct cbq_class *cl_tail; - - cl_tail = q->active[prio]; - q->active[prio] = cl; - - if (cl_tail != NULL) { - cl->next_alive = cl_tail->next_alive; - cl_tail->next_alive = cl; - } else { - cl->next_alive = cl; - q->activemask |= (1<qdisc); - int prio = this->cpriority; - struct cbq_class *cl; - struct cbq_class *cl_prev = q->active[prio]; - - do { - cl = cl_prev->next_alive; - if (cl == this) { - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - if (cl == q->active[prio]) { - q->active[prio] = cl_prev; - if (cl == q->active[prio]) { - q->active[prio] = NULL; - q->activemask &= ~(1<active[prio]); -} - -static void -cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) -{ - int toplevel = q->toplevel; - - if (toplevel > cl->level) { - psched_time_t now = psched_get_time(); - - do { - if (cl->undertime < now) { - q->toplevel = cl->level; - return; - } - } while ((cl = cl->borrow) != NULL && toplevel > cl->level); - } -} - -static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - int ret; - struct cbq_class *cl = cbq_classify(skb, sch, &ret); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; -#endif - if (cl == NULL) { - if (ret & __NET_XMIT_BYPASS) - qdisc_qstats_drop(sch); - __qdisc_drop(skb, to_free); - return ret; - } - - ret = qdisc_enqueue(skb, cl->q, to_free); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - cbq_mark_toplevel(q, cl); - if (!cl->next_alive) - cbq_activate_class(cl); - return ret; - } - - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; - } - return ret; -} - -/* Overlimit action: penalize leaf class by adding offtime */ -static void cbq_overlimit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - psched_tdiff_t delay = cl->undertime - q->now; - - if (!cl->delayed) { - delay += cl->offtime; - - /* - * Class goes to sleep, so that it will have no - * chance to work avgidle. Let's forgive it 8) - * - * BTW cbq-2.0 has a crap in this - * place, apparently they forgot to shift it by cl->ewma_log. - */ - if (cl->avgidle < 0) - delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); - if (cl->avgidle < cl->minidle) - cl->avgidle = cl->minidle; - if (delay <= 0) - delay = 1; - cl->undertime = q->now + delay; - - cl->xstats.overactions++; - cl->delayed = 1; - } - if (q->wd_expires == 0 || q->wd_expires > delay) - q->wd_expires = delay; - - /* Dirty work! We must schedule wakeups based on - * real available rate, rather than leaf rate, - * which may be tiny (even zero). - */ - if (q->toplevel == TC_CBQ_MAXLEVEL) { - struct cbq_class *b; - psched_tdiff_t base_delay = q->wd_expires; - - for (b = cl->borrow; b; b = b->borrow) { - delay = b->undertime - q->now; - if (delay < base_delay) { - if (delay <= 0) - delay = 1; - base_delay = delay; - } - } - - q->wd_expires = base_delay; - } -} - -/* - * It is mission critical procedure. - * - * We "regenerate" toplevel cutoff, if transmitting class - * has backlog and it is not regulated. It is not part of - * original CBQ description, but looks more reasonable. - * Probably, it is wrong. This question needs further investigation. - */ - -static inline void -cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, - struct cbq_class *borrowed) -{ - if (cl && q->toplevel >= borrowed->level) { - if (cl->q->q.qlen > 1) { - do { - if (borrowed->undertime == PSCHED_PASTPERFECT) { - q->toplevel = borrowed->level; - return; - } - } while ((borrowed = borrowed->borrow) != NULL); - } -#if 0 - /* It is not necessary now. Uncommenting it - will save CPU cycles, but decrease fairness. - */ - q->toplevel = TC_CBQ_MAXLEVEL; -#endif - } -} - -static void -cbq_update(struct cbq_sched_data *q) -{ - struct cbq_class *this = q->tx_class; - struct cbq_class *cl = this; - int len = q->tx_len; - psched_time_t now; - - q->tx_class = NULL; - /* Time integrator. We calculate EOS time - * by adding expected packet transmission time. - */ - now = q->now + L2T(&q->link, len); - - for ( ; cl; cl = cl->share) { - long avgidle = cl->avgidle; - long idle; - - _bstats_update(&cl->bstats, len, 1); - - /* - * (now - last) is total time between packet right edges. - * (last_pktlen/rate) is "virtual" busy time, so that - * - * idle = (now - last) - last_pktlen/rate - */ - - idle = now - cl->last; - if ((unsigned long)idle > 128*1024*1024) { - avgidle = cl->maxidle; - } else { - idle -= L2T(cl, len); - - /* true_avgidle := (1-W)*true_avgidle + W*idle, - * where W=2^{-ewma_log}. But cl->avgidle is scaled: - * cl->avgidle == true_avgidle/W, - * hence: - */ - avgidle += idle - (avgidle>>cl->ewma_log); - } - - if (avgidle <= 0) { - /* Overlimit or at-limit */ - - if (avgidle < cl->minidle) - avgidle = cl->minidle; - - cl->avgidle = avgidle; - - /* Calculate expected time, when this class - * will be allowed to send. - * It will occur, when: - * (1-W)*true_avgidle + W*delay = 0, i.e. - * idle = (1/W - 1)*(-true_avgidle) - * or - * idle = (1 - W)*(-cl->avgidle); - */ - idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); - - /* - * That is not all. - * To maintain the rate allocated to the class, - * we add to undertime virtual clock, - * necessary to complete transmitted packet. - * (len/phys_bandwidth has been already passed - * to the moment of cbq_update) - */ - - idle -= L2T(&q->link, len); - idle += L2T(cl, len); - - cl->undertime = now + idle; - } else { - /* Underlimit */ - - cl->undertime = PSCHED_PASTPERFECT; - if (avgidle > cl->maxidle) - cl->avgidle = cl->maxidle; - else - cl->avgidle = avgidle; - } - if ((s64)(now - cl->last) > 0) - cl->last = now; - } - - cbq_update_toplevel(q, this, q->tx_borrowed); -} - -static inline struct cbq_class * -cbq_under_limit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *this_cl = cl; - - if (cl->tparent == NULL) - return cl; - - if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) { - cl->delayed = 0; - return cl; - } - - do { - /* It is very suspicious place. Now overlimit - * action is generated for not bounded classes - * only if link is completely congested. - * Though it is in agree with ancestor-only paradigm, - * it looks very stupid. Particularly, - * it means that this chunk of code will either - * never be called or result in strong amplification - * of burstiness. Dangerous, silly, and, however, - * no another solution exists. - */ - cl = cl->borrow; - if (!cl) { - this_cl->qstats.overlimits++; - cbq_overlimit(this_cl); - return NULL; - } - if (cl->level > q->toplevel) - return NULL; - } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime); - - cl->delayed = 0; - return cl; -} - -static inline struct sk_buff * -cbq_dequeue_prio(struct Qdisc *sch, int prio) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl_tail, *cl_prev, *cl; - struct sk_buff *skb; - int deficit; - - cl_tail = cl_prev = q->active[prio]; - cl = cl_prev->next_alive; - - do { - deficit = 0; - - /* Start round */ - do { - struct cbq_class *borrow = cl; - - if (cl->q->q.qlen && - (borrow = cbq_under_limit(cl)) == NULL) - goto skip_class; - - if (cl->deficit <= 0) { - /* Class exhausted its allotment per - * this round. Switch to the next one. - */ - deficit = 1; - cl->deficit += cl->quantum; - goto next_class; - } - - skb = cl->q->dequeue(cl->q); - - /* Class did not give us any skb :-( - * It could occur even if cl->q->q.qlen != 0 - * f.e. if cl->q == "tbf" - */ - if (skb == NULL) - goto skip_class; - - cl->deficit -= qdisc_pkt_len(skb); - q->tx_class = cl; - q->tx_borrowed = borrow; - if (borrow != cl) { -#ifndef CBQ_XSTATS_BORROWS_BYTES - borrow->xstats.borrows++; - cl->xstats.borrows++; -#else - borrow->xstats.borrows += qdisc_pkt_len(skb); - cl->xstats.borrows += qdisc_pkt_len(skb); -#endif - } - q->tx_len = qdisc_pkt_len(skb); - - if (cl->deficit <= 0) { - q->active[prio] = cl; - cl = cl->next_alive; - cl->deficit += cl->quantum; - } - return skb; - -skip_class: - if (cl->q->q.qlen == 0 || prio != cl->cpriority) { - /* Class is empty or penalized. - * Unlink it from active chain. - */ - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - /* Did cl_tail point to it? */ - if (cl == cl_tail) { - /* Repair it! */ - cl_tail = cl_prev; - - /* Was it the last class in this band? */ - if (cl == cl_tail) { - /* Kill the band! */ - q->active[prio] = NULL; - q->activemask &= ~(1<q->q.qlen) - cbq_activate_class(cl); - return NULL; - } - - q->active[prio] = cl_tail; - } - if (cl->q->q.qlen) - cbq_activate_class(cl); - - cl = cl_prev; - } - -next_class: - cl_prev = cl; - cl = cl->next_alive; - } while (cl_prev != cl_tail); - } while (deficit); - - q->active[prio] = cl_prev; - - return NULL; -} - -static inline struct sk_buff * -cbq_dequeue_1(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct sk_buff *skb; - unsigned int activemask; - - activemask = q->activemask & 0xFF; - while (activemask) { - int prio = ffz(~activemask); - activemask &= ~(1<tx_class) - cbq_update(q); - - q->now = now; - - for (;;) { - q->wd_expires = 0; - - skb = cbq_dequeue_1(sch); - if (skb) { - qdisc_bstats_update(sch, skb); - sch->q.qlen--; - return skb; - } - - /* All the classes are overlimit. - * - * It is possible, if: - * - * 1. Scheduler is empty. - * 2. Toplevel cutoff inhibited borrowing. - * 3. Root class is overlimit. - * - * Reset 2d and 3d conditions and retry. - * - * Note, that NS and cbq-2.0 are buggy, peeking - * an arbitrary class is appropriate for ancestor-only - * sharing, but not for toplevel algorithm. - * - * Our version is better, but slower, because it requires - * two passes, but it is unavoidable with top-level sharing. - */ - - if (q->toplevel == TC_CBQ_MAXLEVEL && - q->link.undertime == PSCHED_PASTPERFECT) - break; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->link.undertime = PSCHED_PASTPERFECT; - } - - /* No packets in scheduler or nobody wants to give them to us :-( - * Sigh... start watchdog timer in the last case. - */ - - if (sch->q.qlen) { - qdisc_qstats_overlimit(sch); - if (q->wd_expires) - qdisc_watchdog_schedule(&q->watchdog, - now + q->wd_expires); - } - return NULL; -} - -/* CBQ class maintenance routines */ - -static void cbq_adjust_levels(struct cbq_class *this) -{ - if (this == NULL) - return; - - do { - int level = 0; - struct cbq_class *cl; - - cl = this->children; - if (cl) { - do { - if (cl->level > level) - level = cl->level; - } while ((cl = cl->sibling) != this->children); - } - this->level = level + 1; - } while ((this = this->tparent) != NULL); -} - -static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) -{ - struct cbq_class *cl; - unsigned int h; - - if (q->quanta[prio] == 0) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - /* BUGGGG... Beware! This expression suffer of - * arithmetic overflows! - */ - if (cl->priority == prio) { - cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ - q->quanta[prio]; - } - if (cl->quantum <= 0 || - cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { - pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", - cl->common.classid, cl->quantum); - cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; - } - } - } -} - -static void cbq_sync_defmap(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *split = cl->split; - unsigned int h; - int i; - - if (split == NULL) - return; - - for (i = 0; i <= TC_PRIO_MAX; i++) { - if (split->defaults[i] == cl && !(cl->defmap & (1<defaults[i] = NULL; - } - - for (i = 0; i <= TC_PRIO_MAX; i++) { - int level = split->level; - - if (split->defaults[i]) - continue; - - for (h = 0; h < q->clhash.hashsize; h++) { - struct cbq_class *c; - - hlist_for_each_entry(c, &q->clhash.hash[h], - common.hnode) { - if (c->split == split && c->level < level && - c->defmap & (1<defaults[i] = c; - level = c->level; - } - } - } - } -} - -static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask) -{ - struct cbq_class *split = NULL; - - if (splitid == 0) { - split = cl->split; - if (!split) - return; - splitid = split->common.classid; - } - - if (split == NULL || split->common.classid != splitid) { - for (split = cl->tparent; split; split = split->tparent) - if (split->common.classid == splitid) - break; - } - - if (split == NULL) - return; - - if (cl->split != split) { - cl->defmap = 0; - cbq_sync_defmap(cl); - cl->split = split; - cl->defmap = def & mask; - } else - cl->defmap = (cl->defmap & ~mask) | (def & mask); - - cbq_sync_defmap(cl); -} - -static void cbq_unlink_class(struct cbq_class *this) -{ - struct cbq_class *cl, **clp; - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - - qdisc_class_hash_remove(&q->clhash, &this->common); - - if (this->tparent) { - clp = &this->sibling; - cl = *clp; - do { - if (cl == this) { - *clp = cl->sibling; - break; - } - clp = &cl->sibling; - } while ((cl = *clp) != this->sibling); - - if (this->tparent->children == this) { - this->tparent->children = this->sibling; - if (this->sibling == this) - this->tparent->children = NULL; - } - } else { - WARN_ON(this->sibling != this); - } -} - -static void cbq_link_class(struct cbq_class *this) -{ - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - struct cbq_class *parent = this->tparent; - - this->sibling = this; - qdisc_class_hash_insert(&q->clhash, &this->common); - - if (parent == NULL) - return; - - if (parent->children == NULL) { - parent->children = this; - } else { - this->sibling = parent->children->sibling; - parent->children->sibling = this; - } -} - -static void -cbq_reset(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int prio; - unsigned int h; - - q->activemask = 0; - q->pmask = 0; - q->tx_class = NULL; - q->tx_borrowed = NULL; - qdisc_watchdog_cancel(&q->watchdog); - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) - q->active[prio] = NULL; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - qdisc_reset(cl->q); - - cl->next_alive = NULL; - cl->undertime = PSCHED_PASTPERFECT; - cl->avgidle = cl->maxidle; - cl->deficit = cl->quantum; - cl->cpriority = cl->priority; - } - } -} - - -static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) -{ - if (lss->change & TCF_CBQ_LSS_FLAGS) { - cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; - cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; - } - if (lss->change & TCF_CBQ_LSS_EWMA) - cl->ewma_log = lss->ewma_log; - if (lss->change & TCF_CBQ_LSS_AVPKT) - cl->avpkt = lss->avpkt; - if (lss->change & TCF_CBQ_LSS_MINIDLE) - cl->minidle = -(long)lss->minidle; - if (lss->change & TCF_CBQ_LSS_MAXIDLE) { - cl->maxidle = lss->maxidle; - cl->avgidle = lss->maxidle; - } - if (lss->change & TCF_CBQ_LSS_OFFTIME) - cl->offtime = lss->offtime; -} - -static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]--; - q->quanta[cl->priority] -= cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]++; - q->quanta[cl->priority] += cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - - if (wrr->allot) - cl->allot = wrr->allot; - if (wrr->weight) - cl->weight = wrr->weight; - if (wrr->priority) { - cl->priority = wrr->priority - 1; - cl->cpriority = cl->priority; - if (cl->priority >= cl->priority2) - cl->priority2 = TC_CBQ_MAXPRIO - 1; - } - - cbq_addprio(q, cl); - return 0; -} - -static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt) -{ - cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange); - return 0; -} - -static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { - [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) }, - [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) }, - [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) }, - [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) }, - [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) }, - [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, - [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, -}; - -static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], - struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - int err; - - if (!opt) { - NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, - cbq_policy, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_WRROPT]) { - const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); - - if (wrr->priority > TC_CBQ_MAXPRIO) { - NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO"); - err = -EINVAL; - } - } - return err; -} - -static int cbq_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct tc_ratespec *r; - int err; - - qdisc_watchdog_init(&q->watchdog, sch); - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) { - NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete"); - return -EINVAL; - } - - r = nla_data(tb[TCA_CBQ_RATE]); - - q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack); - if (!q->link.R_tab) - return -EINVAL; - - err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack); - if (err) - goto put_rtab; - - err = qdisc_class_hash_init(&q->clhash); - if (err < 0) - goto put_block; - - q->link.sibling = &q->link; - q->link.common.classid = sch->handle; - q->link.qdisc = sch; - q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (!q->link.q) - q->link.q = &noop_qdisc; - else - qdisc_hash_add(q->link.q, true); - - q->link.priority = TC_CBQ_MAXPRIO - 1; - q->link.priority2 = TC_CBQ_MAXPRIO - 1; - q->link.cpriority = TC_CBQ_MAXPRIO - 1; - q->link.allot = psched_mtu(qdisc_dev(sch)); - q->link.quantum = q->link.allot; - q->link.weight = q->link.R_tab->rate.rate; - - q->link.ewma_log = TC_CBQ_DEF_EWMA; - q->link.avpkt = q->link.allot/2; - q->link.minidle = -0x7FFFFFFF; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - cbq_link_class(&q->link); - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT])); - - cbq_addprio(q, &q->link); - return 0; - -put_block: - tcf_block_put(q->link.block); - -put_rtab: - qdisc_put_rtab(q->link.R_tab); - return err; -} - -static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - - if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_lssopt opt; - - opt.flags = 0; - if (cl->borrow == NULL) - opt.flags |= TCF_CBQ_LSS_BOUNDED; - if (cl->share == NULL) - opt.flags |= TCF_CBQ_LSS_ISOLATED; - opt.ewma_log = cl->ewma_log; - opt.level = cl->level; - opt.avpkt = cl->avpkt; - opt.maxidle = cl->maxidle; - opt.minidle = (u32)(-cl->minidle); - opt.offtime = cl->offtime; - opt.change = ~0; - if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_wrropt opt; - - memset(&opt, 0, sizeof(opt)); - opt.flags = 0; - opt.allot = cl->allot; - opt.priority = cl->priority + 1; - opt.cpriority = cl->cpriority + 1; - opt.weight = cl->weight; - if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_fopt opt; - - if (cl->split || cl->defmap) { - opt.split = cl->split ? cl->split->common.classid : 0; - opt.defmap = cl->defmap; - opt.defchange = ~0; - if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt)) - goto nla_put_failure; - } - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) -{ - if (cbq_dump_lss(skb, cl) < 0 || - cbq_dump_rate(skb, cl) < 0 || - cbq_dump_wrr(skb, cl) < 0 || - cbq_dump_fopt(skb, cl) < 0) - return -1; - return 0; -} - -static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *nest; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, &q->link) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - q->link.xstats.avgidle = q->link.avgidle; - return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats)); -} - -static int -cbq_dump_class(struct Qdisc *sch, unsigned long arg, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - struct nlattr *nest; - - if (cl->tparent) - tcm->tcm_parent = cl->tparent->common.classid; - else - tcm->tcm_parent = TC_H_ROOT; - tcm->tcm_handle = cl->common.classid; - tcm->tcm_info = cl->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, cl) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - __u32 qlen; - - cl->xstats.avgidle = cl->avgidle; - cl->xstats.undertime = 0; - qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); - - if (cl->undertime != PSCHED_PASTPERFECT) - cl->xstats.undertime = cl->undertime - q->now; - - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) - return -1; - - return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); -} - -static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old, struct netlink_ext_ack *extack) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack); - if (new == NULL) - return -ENOBUFS; - } - - *old = qdisc_replace(sch, new, &cl->q); - return 0; -} - -static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - return cl->q; -} - -static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cbq_deactivate_class(cl); -} - -static unsigned long cbq_find(struct Qdisc *sch, u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - return (unsigned long)cbq_class_lookup(q, classid); -} - -static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - WARN_ON(cl->filters); - - tcf_block_put(cl->block); - qdisc_put(cl->q); - qdisc_put_rtab(cl->R_tab); - gen_kill_estimator(&cl->rate_est); - if (cl != &q->link) - kfree(cl); -} - -static void cbq_destroy(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct hlist_node *next; - struct cbq_class *cl; - unsigned int h; - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = NULL; -#endif - /* - * Filters must be destroyed first because we don't destroy the - * classes from root to leafs which means that filters can still - * be bound to classes which have been destroyed already. --TGR '04 - */ - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - tcf_block_put(cl->block); - cl->block = NULL; - } - } - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], - common.hnode) - cbq_destroy_class(sch, cl); - } - qdisc_class_hash_destroy(&q->clhash); -} - -static int -cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, - unsigned long *arg, struct netlink_ext_ack *extack) -{ - int err; - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)*arg; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct cbq_class *parent; - struct qdisc_rate_table *rtab = NULL; - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) { - NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params"); - return -EOPNOTSUPP; - } - - if (cl) { - /* Check parent */ - if (parentid) { - if (cl->tparent && - cl->tparent->common.classid != parentid) { - NL_SET_ERR_MSG(extack, "Invalid parent id"); - return -EINVAL; - } - if (!cl->tparent && parentid != TC_H_ROOT) { - NL_SET_ERR_MSG(extack, "Parent must be root"); - return -EINVAL; - } - } - - if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), - tb[TCA_CBQ_RTAB], extack); - if (rtab == NULL) - return -EINVAL; - } - - if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, - true, - tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); - qdisc_put_rtab(rtab); - return err; - } - } - - /* Change class parameters */ - sch_tree_lock(sch); - - if (cl->next_alive != NULL) - cbq_deactivate_class(cl); - - if (rtab) { - qdisc_put_rtab(cl->R_tab); - cl->R_tab = rtab; - } - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - - if (tb[TCA_CBQ_WRROPT]) { - cbq_rmprio(q, cl); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - } - - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - - if (cl->q->q.qlen) - cbq_activate_class(cl); - - sch_tree_unlock(sch); - - return 0; - } - - if (parentid == TC_H_ROOT) - return -EINVAL; - - if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) { - NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing"); - return -EINVAL; - } - - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB], - extack); - if (rtab == NULL) - return -EINVAL; - - if (classid) { - err = -EINVAL; - if (TC_H_MAJ(classid ^ sch->handle) || - cbq_class_lookup(q, classid)) { - NL_SET_ERR_MSG(extack, "Specified class not found"); - goto failure; - } - } else { - int i; - classid = TC_H_MAKE(sch->handle, 0x8000); - - for (i = 0; i < 0x8000; i++) { - if (++q->hgenerator >= 0x8000) - q->hgenerator = 1; - if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) - break; - } - err = -ENOSR; - if (i >= 0x8000) { - NL_SET_ERR_MSG(extack, "Unable to generate classid"); - goto failure; - } - classid = classid|q->hgenerator; - } - - parent = &q->link; - if (parentid) { - parent = cbq_class_lookup(q, parentid); - err = -EINVAL; - if (!parent) { - NL_SET_ERR_MSG(extack, "Failed to find parentid"); - goto failure; - } - } - - err = -ENOBUFS; - cl = kzalloc(sizeof(*cl), GFP_KERNEL); - if (cl == NULL) - goto failure; - - gnet_stats_basic_sync_init(&cl->bstats); - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { - kfree(cl); - goto failure; - } - - if (tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } - } - - cl->R_tab = rtab; - rtab = NULL; - cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - NULL); - if (!cl->q) - cl->q = &noop_qdisc; - else - qdisc_hash_add(cl->q, true); - - cl->common.classid = classid; - cl->tparent = parent; - cl->qdisc = sch; - cl->allot = parent->allot; - cl->quantum = cl->allot; - cl->weight = cl->R_tab->rate.rate; - - sch_tree_lock(sch); - cbq_link_class(cl); - cl->borrow = cl->tparent; - if (cl->tparent != &q->link) - cl->share = cl->tparent; - cbq_adjust_levels(parent); - cl->minidle = -0x7FFFFFFF; - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - if (cl->ewma_log == 0) - cl->ewma_log = q->link.ewma_log; - if (cl->maxidle == 0) - cl->maxidle = q->link.maxidle; - if (cl->avpkt == 0) - cl->avpkt = q->link.avpkt; - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - sch_tree_unlock(sch); - - qdisc_class_hash_grow(sch, &q->clhash); - - *arg = (unsigned long)cl; - return 0; - -failure: - qdisc_put_rtab(rtab); - return err; -} - -static int cbq_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl->filters || cl->children || cl == &q->link) - return -EBUSY; - - sch_tree_lock(sch); - - qdisc_purge_queue(cl->q); - - if (cl->next_alive) - cbq_deactivate_class(cl); - - if (q->tx_borrowed == cl) - q->tx_borrowed = q->tx_class; - if (q->tx_class == cl) { - q->tx_class = NULL; - q->tx_borrowed = NULL; - } -#ifdef CONFIG_NET_CLS_ACT - if (q->rx_class == cl) - q->rx_class = NULL; -#endif - - cbq_unlink_class(cl); - cbq_adjust_levels(cl->tparent); - cl->defmap = 0; - cbq_sync_defmap(cl); - - cbq_rmprio(q, cl); - sch_tree_unlock(sch); - - cbq_destroy_class(sch, cl); - return 0; -} - -static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl == NULL) - cl = &q->link; - - return cl->block; -} - -static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, - u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *p = (struct cbq_class *)parent; - struct cbq_class *cl = cbq_class_lookup(q, classid); - - if (cl) { - if (p && p->level <= cl->level) - return 0; - cl->filters++; - return (unsigned long)cl; - } - return 0; -} - -static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cl->filters--; -} - -static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - unsigned int h; - - if (arg->stop) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) - return; - } - } -} - -static const struct Qdisc_class_ops cbq_class_ops = { - .graft = cbq_graft, - .leaf = cbq_leaf, - .qlen_notify = cbq_qlen_notify, - .find = cbq_find, - .change = cbq_change_class, - .delete = cbq_delete, - .walk = cbq_walk, - .tcf_block = cbq_tcf_block, - .bind_tcf = cbq_bind_filter, - .unbind_tcf = cbq_unbind_filter, - .dump = cbq_dump_class, - .dump_stats = cbq_dump_class_stats, -}; - -static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &cbq_class_ops, - .id = "cbq", - .priv_size = sizeof(struct cbq_sched_data), - .enqueue = cbq_enqueue, - .dequeue = cbq_dequeue, - .peek = qdisc_peek_dequeued, - .init = cbq_init, - .reset = cbq_reset, - .destroy = cbq_destroy, - .change = NULL, - .dump = cbq_dump, - .dump_stats = cbq_dump_stats, - .owner = THIS_MODULE, -}; - -static int __init cbq_module_init(void) -{ - return register_qdisc(&cbq_qdisc_ops); -} -static void __exit cbq_module_exit(void) -{ - unregister_qdisc(&cbq_qdisc_ops); -} -module_init(cbq_module_init) -module_exit(cbq_module_exit) -MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json deleted file mode 100644 index 1ab21c83a1223..0000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json +++ /dev/null @@ -1,184 +0,0 @@ -[ - { - "id": "3460", - "name": "Create CBQ with default setting", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "0592", - "name": "Create CBQ with mpu", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 mpu 1000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4684", - "name": "Create CBQ with valid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 128", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4345", - "name": "Create CBQ with invalid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 100", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4525", - "name": "Create CBQ with valid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 16", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6784", - "name": "Create CBQ with invalid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 128", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5468", - "name": "Delete CBQ with handle", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "492a", - "name": "Show CBQ class", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class cbq 1: root rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] -- GitLab From 09038f47e45cd5dbb02315db2134403a6b160ceb Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 14 Feb 2023 08:49:12 -0500 Subject: [PATCH 0277/2290] net/sched: Retire ATM qdisc commit fb38306ceb9e770adfb5ffa6e3c64047b55f7a07 upstream. The ATM qdisc has served us well over the years but has not been getting much TLC due to lack of known users. Most recently it has become a shooting target for syzkaller. For this reason, we are retiring it. Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/Kconfig | 14 - net/sched/Makefile | 1 - net/sched/sch_atm.c | 706 ------------------ .../tc-testing/tc-tests/qdiscs/atm.json | 94 --- 4 files changed, 815 deletions(-) delete mode 100644 net/sched/sch_atm.c delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json diff --git a/net/sched/Kconfig b/net/sched/Kconfig index bbd84d8bc64ae..9bccbff826fb4 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -68,20 +68,6 @@ config NET_SCH_HFSC To compile this code as a module, choose M here: the module will be called sch_hfsc. -config NET_SCH_ATM - tristate "ATM Virtual Circuits (ATM)" - depends on ATM - help - Say Y here if you want to use the ATM pseudo-scheduler. This - provides a framework for invoking classifiers, which in turn - select classes of this queuing discipline. Each class maps - the flow(s) it is handling to a given virtual circuit. - - See the top of for more details. - - To compile this code as a module, choose M here: the - module will be called sch_atm. - config NET_SCH_PRIO tristate "Multi Band Priority Queueing (PRIO)" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 0108d1bffa512..0c5762f5e07b4 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o -obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c deleted file mode 100644 index 4a981ca90b0bf..0000000000000 --- a/net/sched/sch_atm.c +++ /dev/null @@ -1,706 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for fput */ -#include -#include -#include - -/* - * The ATM queuing discipline provides a framework for invoking classifiers - * (aka "filters"), which in turn select classes of this queuing discipline. - * Each class maps the flow(s) it is handling to a given VC. Multiple classes - * may share the same VC. - * - * When creating a class, VCs are specified by passing the number of the open - * socket descriptor by which the calling process references the VC. The kernel - * keeps the VC open at least until all classes using it are removed. - * - * In this file, most functions are named atm_tc_* to avoid confusion with all - * the atm_* in net/atm. This naming convention differs from what's used in the - * rest of net/sched. - * - * Known bugs: - * - sometimes messes up the IP stack - * - any manipulations besides the few operations described in the README, are - * untested and likely to crash the system - * - should lock the flow while there is data in the queue (?) - */ - -#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) - -struct atm_flow_data { - struct Qdisc_class_common common; - struct Qdisc *q; /* FIFO, TBF, etc. */ - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ - void (*old_pop)(struct atm_vcc *vcc, - struct sk_buff *skb); /* chaining */ - struct atm_qdisc_data *parent; /* parent qdisc */ - struct socket *sock; /* for closing */ - int ref; /* reference count */ - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct list_head list; - struct atm_flow_data *excess; /* flow for excess traffic; - NULL to set CLP instead */ - int hdr_len; - unsigned char hdr[]; /* header data; MUST BE LAST */ -}; - -struct atm_qdisc_data { - struct atm_flow_data link; /* unclassified skbs go here */ - struct list_head flows; /* NB: "link" is also on this - list */ - struct tasklet_struct task; /* dequeue tasklet */ -}; - -/* ------------------------- Class/flow operations ------------------------- */ - -static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - list_for_each_entry(flow, &p->flows, list) { - if (flow->common.classid == classid) - return flow; - } - return NULL; -} - -static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", - sch, p, flow, new, old); - if (list_empty(&flow->list)) - return -EINVAL; - if (!new) - new = &noop_qdisc; - *old = flow->q; - flow->q = new; - if (*old) - qdisc_reset(*old); - return 0; -} - -static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); - return flow ? flow->q : NULL; -} - -static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -static unsigned long atm_tc_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - if (flow) - flow->ref++; - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -/* - * atm_tc_put handles all destructions, including the ones that are explicitly - * requested (atm_tc_destroy, etc.). The assumption here is that we never drop - * anything that still seems to be in use. - */ -static void atm_tc_put(struct Qdisc *sch, unsigned long cl) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (--flow->ref) - return; - pr_debug("atm_tc_put: destroying\n"); - list_del_init(&flow->list); - pr_debug("atm_tc_put: qdisc %p\n", flow->q); - qdisc_put(flow->q); - tcf_block_put(flow->block); - if (flow->sock) { - pr_debug("atm_tc_put: f_count %ld\n", - file_count(flow->sock->file)); - flow->vcc->pop = flow->old_pop; - sockfd_put(flow->sock); - } - if (flow->excess) - atm_tc_put(sch, (unsigned long)flow->excess); - if (flow != &p->link) - kfree(flow); - /* - * If flow == &p->link, the qdisc no longer works at this point and - * needs to be removed. (By the caller of atm_tc_put.) - */ -} - -static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; - - pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); - VCC2FLOW(vcc)->old_pop(vcc, skb); - tasklet_schedule(&p->task); -} - -static const u8 llc_oui_ip[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ - 0x00, 0x00, - 0x08, 0x00 -}; /* Ethertype IP (0800) */ - -static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = { - [TCA_ATM_FD] = { .type = NLA_U32 }, - [TCA_ATM_EXCESS] = { .type = NLA_U32 }, -}; - -static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)*arg; - struct atm_flow_data *excess = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_ATM_MAX + 1]; - struct socket *sock; - int fd, error, hdr_len; - void *hdr; - - pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," - "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); - /* - * The concept of parents doesn't apply for this qdisc. - */ - if (parent && parent != TC_H_ROOT && parent != sch->handle) - return -EINVAL; - /* - * ATM classes cannot be changed. In order to change properties of the - * ATM connection, that socket needs to be modified directly (via the - * native ATM API. In order to send a flow to a different VC, the old - * class needs to be removed and a new one added. (This may be changed - * later.) - */ - if (flow) - return -EBUSY; - if (opt == NULL) - return -EINVAL; - - error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, - NULL); - if (error < 0) - return error; - - if (!tb[TCA_ATM_FD]) - return -EINVAL; - fd = nla_get_u32(tb[TCA_ATM_FD]); - pr_debug("atm_tc_change: fd %d\n", fd); - if (tb[TCA_ATM_HDR]) { - hdr_len = nla_len(tb[TCA_ATM_HDR]); - hdr = nla_data(tb[TCA_ATM_HDR]); - } else { - hdr_len = RFC1483LLC_LEN; - hdr = NULL; /* default LLC/SNAP for IP */ - } - if (!tb[TCA_ATM_EXCESS]) - excess = NULL; - else { - excess = (struct atm_flow_data *) - atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); - if (!excess) - return -ENOENT; - } - pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n", - opt->nla_type, nla_len(opt), hdr_len); - sock = sockfd_lookup(fd, &error); - if (!sock) - return error; /* f_count++ */ - pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); - if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { - error = -EPROTOTYPE; - goto err_out; - } - /* @@@ should check if the socket is really operational or we'll crash - on vcc->send */ - if (classid) { - if (TC_H_MAJ(classid ^ sch->handle)) { - pr_debug("atm_tc_change: classid mismatch\n"); - error = -EINVAL; - goto err_out; - } - } else { - int i; - unsigned long cl; - - for (i = 1; i < 0x8000; i++) { - classid = TC_H_MAKE(sch->handle, 0x8000 | i); - cl = atm_tc_find(sch, classid); - if (!cl) - break; - } - } - pr_debug("atm_tc_change: new id %x\n", classid); - flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); - pr_debug("atm_tc_change: flow %p\n", flow); - if (!flow) { - error = -ENOBUFS; - goto err_out; - } - - error = tcf_block_get(&flow->block, &flow->filter_list, sch, - extack); - if (error) { - kfree(flow); - goto err_out; - } - - flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - extack); - if (!flow->q) - flow->q = &noop_qdisc; - pr_debug("atm_tc_change: qdisc %p\n", flow->q); - flow->sock = sock; - flow->vcc = ATM_SD(sock); /* speedup */ - flow->vcc->user_back = flow; - pr_debug("atm_tc_change: vcc %p\n", flow->vcc); - flow->old_pop = flow->vcc->pop; - flow->parent = p; - flow->vcc->pop = sch_atm_pop; - flow->common.classid = classid; - flow->ref = 1; - flow->excess = excess; - list_add(&flow->list, &p->link.list); - flow->hdr_len = hdr_len; - if (hdr) - memcpy(flow->hdr, hdr, hdr_len); - else - memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); - *arg = (unsigned long)flow; - return 0; -err_out: - sockfd_put(sock); - return error; -} - -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (list_empty(&flow->list)) - return -EINVAL; - if (rcu_access_pointer(flow->filter_list) || flow == &p->link) - return -EBUSY; - /* - * Reference count must be 2: one for "keepalive" (set at class - * creation), and one for the reference held when calling delete. - */ - if (flow->ref < 2) { - pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); - return -EINVAL; - } - if (flow->ref > 2) - return -EBUSY; /* catch references via excess, etc. */ - atm_tc_put(sch, arg); - return 0; -} - -static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); - if (walker->stop) - return; - list_for_each_entry(flow, &p->flows, list) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker)) - break; - } -} - -static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - return flow ? flow->block : p->link.block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - struct tcf_result res; - int result; - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - - pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - result = TC_ACT_OK; /* be nice to gcc */ - flow = NULL; - if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) { - struct tcf_proto *fl; - - list_for_each_entry(flow, &p->flows, list) { - fl = rcu_dereference_bh(flow->filter_list); - if (fl) { - result = tcf_classify(skb, NULL, fl, &res, true); - if (result < 0) - continue; - if (result == TC_ACT_SHOT) - goto done; - - flow = (struct atm_flow_data *)res.class; - if (!flow) - flow = lookup_flow(sch, res.classid); - goto drop; - } - } - flow = NULL; -done: - ; - } - if (!flow) { - flow = &p->link; - } else { - if (flow->vcc) - ATM_SKB(skb)->atm_options = flow->vcc->atm_options; - /*@@@ looks good ... but it's not supposed to work :-) */ -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - case TC_ACT_SHOT: - __qdisc_drop(skb, to_free); - goto drop; - case TC_ACT_RECLASSIFY: - if (flow->excess) - flow = flow->excess; - else - ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; - break; - } -#endif - } - - ret = qdisc_enqueue(skb, flow->q, to_free); - if (ret != NET_XMIT_SUCCESS) { -drop: __maybe_unused - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - if (flow) - flow->qstats.drops++; - } - return ret; - } - /* - * Okay, this may seem weird. We pretend we've dropped the packet if - * it goes via ATM. The reason for this is that the outer qdisc - * expects to be able to q->dequeue the packet later on if we return - * success at this place. Also, sch->q.qdisc needs to reflect whether - * there is a packet egligible for dequeuing or not. Note that the - * statistics of the outer qdisc are necessarily wrong because of all - * this. There's currently no correct solution for this. - */ - if (flow == &p->link) { - sch->q.qlen++; - return NET_XMIT_SUCCESS; - } - tasklet_schedule(&p->task); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -/* - * Dequeue packets and send them over ATM. Note that we quite deliberately - * avoid checking net_device's flow control here, simply because sch_atm - * uses its own channels, which have nothing to do with any CLIP/LANE/or - * non-ATM interfaces. - */ - -static void sch_atm_dequeue(struct tasklet_struct *t) -{ - struct atm_qdisc_data *p = from_tasklet(p, t, task); - struct Qdisc *sch = qdisc_from_priv(p); - struct atm_flow_data *flow; - struct sk_buff *skb; - - pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - if (flow == &p->link) - continue; - /* - * If traffic is properly shaped, this won't generate nasty - * little bursts. Otherwise, it may ... (but that's okay) - */ - while ((skb = flow->q->ops->peek(flow->q))) { - if (!atm_may_send(flow->vcc, skb->truesize)) - break; - - skb = qdisc_dequeue_peeked(flow->q); - if (unlikely(!skb)) - break; - - qdisc_bstats_update(sch, skb); - bstats_update(&flow->bstats, skb); - pr_debug("atm_tc_dequeue: sending on class %p\n", flow); - /* remove any LL header somebody else has attached */ - skb_pull(skb, skb_network_offset(skb)); - if (skb_headroom(skb) < flow->hdr_len) { - struct sk_buff *new; - - new = skb_realloc_headroom(skb, flow->hdr_len); - dev_kfree_skb(skb); - if (!new) - continue; - skb = new; - } - pr_debug("sch_atm_dequeue: ip %p, data %p\n", - skb_network_header(skb), skb->data); - ATM_SKB(skb)->vcc = flow->vcc; - memcpy(skb_push(skb, flow->hdr_len), flow->hdr, - flow->hdr_len); - refcount_add(skb->truesize, - &sk_atm(flow->vcc)->sk_wmem_alloc); - /* atm.atm_options are already set by atm_tc_enqueue */ - flow->vcc->send(flow->vcc, skb); - } - } -} - -static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - - pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); - tasklet_schedule(&p->task); - skb = qdisc_dequeue_peeked(p->link.q); - if (skb) - sch->q.qlen--; - return skb; -} - -static struct sk_buff *atm_tc_peek(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - - pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); - - return p->link.q->ops->peek(p->link.q); -} - -static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - INIT_LIST_HEAD(&p->flows); - INIT_LIST_HEAD(&p->link.list); - gnet_stats_basic_sync_init(&p->link.bstats); - list_add(&p->link.list, &p->flows); - p->link.q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, sch->handle, extack); - if (!p->link.q) - p->link.q = &noop_qdisc; - pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - p->link.vcc = NULL; - p->link.sock = NULL; - p->link.common.classid = sch->handle; - p->link.ref = 1; - - err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, - extack); - if (err) - return err; - - tasklet_setup(&p->task, sch_atm_dequeue); - return 0; -} - -static void atm_tc_reset(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) - qdisc_reset(flow->q); -} - -static void atm_tc_destroy(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow, *tmp; - - pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - tcf_block_put(flow->block); - flow->block = NULL; - } - - list_for_each_entry_safe(flow, tmp, &p->flows, list) { - if (flow->ref > 1) - pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); - atm_tc_put(sch, (unsigned long)flow); - } - tasklet_kill(&p->task); -} - -static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - struct nlattr *nest; - - pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", - sch, p, flow, skb, tcm); - if (list_empty(&flow->list)) - return -EINVAL; - tcm->tcm_handle = flow->common.classid; - tcm->tcm_info = flow->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr)) - goto nla_put_failure; - if (flow->vcc) { - struct sockaddr_atmpvc pvc; - int state; - - memset(&pvc, 0, sizeof(pvc)); - pvc.sap_family = AF_ATMPVC; - pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; - pvc.sap_addr.vpi = flow->vcc->vpi; - pvc.sap_addr.vci = flow->vcc->vci; - if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc)) - goto nla_put_failure; - state = ATM_VF2VS(flow->vcc->flags); - if (nla_put_u32(skb, TCA_ATM_STATE, state)) - goto nla_put_failure; - } - if (flow->excess) { - if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) - goto nla_put_failure; - } else { - if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) - goto nla_put_failure; - } - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} -static int -atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || - gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) - return -1; - - return 0; -} - -static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - return 0; -} - -static const struct Qdisc_class_ops atm_class_ops = { - .graft = atm_tc_graft, - .leaf = atm_tc_leaf, - .find = atm_tc_find, - .change = atm_tc_change, - .delete = atm_tc_delete, - .walk = atm_tc_walk, - .tcf_block = atm_tc_tcf_block, - .bind_tcf = atm_tc_bind_filter, - .unbind_tcf = atm_tc_put, - .dump = atm_tc_dump_class, - .dump_stats = atm_tc_dump_class_stats, -}; - -static struct Qdisc_ops atm_qdisc_ops __read_mostly = { - .cl_ops = &atm_class_ops, - .id = "atm", - .priv_size = sizeof(struct atm_qdisc_data), - .enqueue = atm_tc_enqueue, - .dequeue = atm_tc_dequeue, - .peek = atm_tc_peek, - .init = atm_tc_init, - .reset = atm_tc_reset, - .destroy = atm_tc_destroy, - .dump = atm_tc_dump, - .owner = THIS_MODULE, -}; - -static int __init atm_init(void) -{ - return register_qdisc(&atm_qdisc_ops); -} - -static void __exit atm_exit(void) -{ - unregister_qdisc(&atm_qdisc_ops); -} - -module_init(atm_init) -module_exit(atm_exit) -MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json deleted file mode 100644 index f5bc8670a67d1..0000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json +++ /dev/null @@ -1,94 +0,0 @@ -[ - { - "id": "7628", - "name": "Create ATM with default setting", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "390a", - "name": "Delete ATM with valid handle", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root atm" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "32a0", - "name": "Show ATM class", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class atm 1: parent 1:", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6310", - "name": "Dump ATM stats", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] -- GitLab From a41f6e170b852d2c68fc76012c49ec3332b0edcc Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 14 Feb 2023 08:49:13 -0500 Subject: [PATCH 0278/2290] net/sched: Retire dsmark qdisc commit bbe77c14ee6185a61ba6d5e435c1cbb489d2a9ed upstream. The dsmark qdisc has served us well over the years for diffserv but has not been getting much attention due to other more popular approaches to do diffserv services. Most recently it has become a shooting target for syzkaller. For this reason, we are retiring it. Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/Kconfig | 11 - net/sched/Makefile | 1 - net/sched/sch_dsmark.c | 518 ------------------ .../tc-testing/tc-tests/qdiscs/dsmark.json | 140 ----- 4 files changed, 670 deletions(-) delete mode 100644 net/sched/sch_dsmark.c delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9bccbff826fb4..9c4a80fce794f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -186,17 +186,6 @@ config NET_SCH_GRED To compile this code as a module, choose M here: the module will be called sch_gred. -config NET_SCH_DSMARK - tristate "Differentiated Services marker (DSMARK)" - help - Say Y if you want to schedule packets according to the - Differentiated Services architecture proposed in RFC 2475. - Technical information on this method, with pointers to associated - RFCs, is available at . - - To compile this code as a module, choose M here: the - module will be called sch_dsmark. - config NET_SCH_NETEM tristate "Network emulator (NETEM)" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 0c5762f5e07b4..a66ac1e7b79b5 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -38,7 +38,6 @@ obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o -obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c deleted file mode 100644 index 401ffaf87d622..0000000000000 --- a/net/sched/sch_dsmark.c +++ /dev/null @@ -1,518 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_dsmark.c - Differentiated Services field marker */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * classid class marking - * ------- ----- ------- - * n/a 0 n/a - * x:0 1 use entry [0] - * ... ... ... - * x:y y>0 y+1 use entry [y] - * ... ... ... - * x:indices-1 indices use entry [indices-1] - * ... ... ... - * x:y y+1 use entry [y & (indices-1)] - * ... ... ... - * 0xffff 0x10000 use entry [indices-1] - */ - - -#define NO_DEFAULT_INDEX (1 << 16) - -struct mask_value { - u8 mask; - u8 value; -}; - -struct dsmark_qdisc_data { - struct Qdisc *q; - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct mask_value *mv; - u16 indices; - u8 set_tc_index; - u32 default_index; /* index range is 0...0xffff */ -#define DSMARK_EMBEDDED_SZ 16 - struct mask_value embedded[DSMARK_EMBEDDED_SZ]; -}; - -static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) -{ - return index <= p->indices && index > 0; -} - -/* ------------------------- Class/flow operations ------------------------- */ - -static int dsmark_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", - __func__, sch, p, new, old); - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (new == NULL) - new = &noop_qdisc; - } - - *old = qdisc_replace(sch, new, &p->q); - return 0; -} - -static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - return p->q; -} - -static unsigned long dsmark_find(struct Qdisc *sch, u32 classid) -{ - return TC_H_MIN(classid) + 1; -} - -static unsigned long dsmark_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", - __func__, sch, qdisc_priv(sch), classid); - - return dsmark_find(sch, classid); -} - -static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl) -{ -} - -static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = { - [TCA_DSMARK_INDICES] = { .type = NLA_U16 }, - [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 }, - [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG }, - [TCA_DSMARK_MASK] = { .type = NLA_U8 }, - [TCA_DSMARK_VALUE] = { .type = NLA_U8 }, -}; - -static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - - pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", - __func__, sch, p, classid, parent, *arg); - - if (!dsmark_valid_index(p, *arg)) { - err = -ENOENT; - goto errout; - } - - if (!opt) - goto errout; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - if (tb[TCA_DSMARK_VALUE]) - p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]); - - if (tb[TCA_DSMARK_MASK]) - p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]); - - err = 0; - -errout: - return err; -} - -static int dsmark_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - if (!dsmark_valid_index(p, arg)) - return -EINVAL; - - p->mv[arg - 1].mask = 0xff; - p->mv[arg - 1].value = 0; - - return 0; -} - -static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int i; - - pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", - __func__, sch, p, walker); - - if (walker->stop) - return; - - for (i = 0; i < p->indices; i++) { - if (p->mv[i].mask == 0xff && !p->mv[i].value) { - walker->count++; - continue; - } - if (!tc_qdisc_stats_dump(sch, i + 1, walker)) - break; - } -} - -static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - return p->block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - unsigned int len = qdisc_pkt_len(skb); - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); - - if (p->set_tc_index) { - int wlen = skb_network_offset(skb); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - wlen += sizeof(struct iphdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) - & ~INET_ECN_MASK; - break; - - case htons(ETH_P_IPV6): - wlen += sizeof(struct ipv6hdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) - & ~INET_ECN_MASK; - break; - default: - skb->tc_index = 0; - break; - } - } - - if (TC_H_MAJ(skb->priority) == sch->handle) - skb->tc_index = TC_H_MIN(skb->priority); - else { - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tcf_classify(skb, NULL, fl, &res, false); - - pr_debug("result %d class 0x%04x\n", result, res.classid); - - switch (result) { -#ifdef CONFIG_NET_CLS_ACT - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - - case TC_ACT_SHOT: - goto drop; -#endif - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - break; - - default: - if (p->default_index != NO_DEFAULT_INDEX) - skb->tc_index = p->default_index; - break; - } - } - - err = qdisc_enqueue(skb, p->q, to_free); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - qdisc_qstats_drop(sch); - return err; - } - - sch->qstats.backlog += len; - sch->q.qlen++; - - return NET_XMIT_SUCCESS; - -drop: - qdisc_drop(skb, sch, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - u32 index; - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - skb = qdisc_dequeue_peeked(p->q); - if (skb == NULL) - return NULL; - - qdisc_bstats_update(sch, skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; - - index = skb->tc_index & (p->indices - 1); - pr_debug("index %d->%d\n", skb->tc_index, index); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - case htons(ETH_P_IPV6): - ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - default: - /* - * Only complain if a change was actually attempted. - * This way, we can send non-IP traffic through dsmark - * and don't need yet another qdisc as a bypass. - */ - if (p->mv[index].mask != 0xff || p->mv[index].value) - pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb_protocol(skb, true))); - break; - } - - return skb; -} - -static struct sk_buff *dsmark_peek(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - return p->q->ops->peek(p->q); -} - -static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - u32 default_index = NO_DEFAULT_INDEX; - u16 indices; - int i; - - pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); - - if (!opt) - goto errout; - - err = tcf_block_get(&p->block, &p->filter_list, sch, extack); - if (err) - return err; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - err = -EINVAL; - if (!tb[TCA_DSMARK_INDICES]) - goto errout; - indices = nla_get_u16(tb[TCA_DSMARK_INDICES]); - - if (hweight32(indices) != 1) - goto errout; - - if (tb[TCA_DSMARK_DEFAULT_INDEX]) - default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]); - - if (indices <= DSMARK_EMBEDDED_SZ) - p->mv = p->embedded; - else - p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL); - if (!p->mv) { - err = -ENOMEM; - goto errout; - } - for (i = 0; i < indices; i++) { - p->mv[i].mask = 0xff; - p->mv[i].value = 0; - } - p->indices = indices; - p->default_index = default_index; - p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - - p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, - NULL); - if (p->q == NULL) - p->q = &noop_qdisc; - else - qdisc_hash_add(p->q, true); - - pr_debug("%s: qdisc %p\n", __func__, p->q); - - err = 0; -errout: - return err; -} - -static void dsmark_reset(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - if (p->q) - qdisc_reset(p->q); -} - -static void dsmark_destroy(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - tcf_block_put(p->block); - qdisc_put(p->q); - if (p->mv != p->embedded) - kfree(p->mv); -} - -static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); - - if (!dsmark_valid_index(p, cl)) - return -EINVAL; - - tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); - tcm->tcm_info = p->q->handle; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || - nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) - goto nla_put_failure; - - if (p->default_index != NO_DEFAULT_INDEX && - nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index)) - goto nla_put_failure; - - if (p->set_tc_index && - nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static const struct Qdisc_class_ops dsmark_class_ops = { - .graft = dsmark_graft, - .leaf = dsmark_leaf, - .find = dsmark_find, - .change = dsmark_change, - .delete = dsmark_delete, - .walk = dsmark_walk, - .tcf_block = dsmark_tcf_block, - .bind_tcf = dsmark_bind_filter, - .unbind_tcf = dsmark_unbind_filter, - .dump = dsmark_dump_class, -}; - -static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &dsmark_class_ops, - .id = "dsmark", - .priv_size = sizeof(struct dsmark_qdisc_data), - .enqueue = dsmark_enqueue, - .dequeue = dsmark_dequeue, - .peek = dsmark_peek, - .init = dsmark_init, - .reset = dsmark_reset, - .destroy = dsmark_destroy, - .change = NULL, - .dump = dsmark_dump, - .owner = THIS_MODULE, -}; - -static int __init dsmark_module_init(void) -{ - return register_qdisc(&dsmark_qdisc_ops); -} - -static void __exit dsmark_module_exit(void) -{ - unregister_qdisc(&dsmark_qdisc_ops); -} - -module_init(dsmark_module_init) -module_exit(dsmark_module_exit) - -MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json deleted file mode 100644 index c030795f9c37d..0000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json +++ /dev/null @@ -1,140 +0,0 @@ -[ - { - "id": "6345", - "name": "Create DSMARK with default setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "3462", - "name": "Create DSMARK with default_index setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 512", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0200", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "ca95", - "name": "Create DSMARK with set_tc_index flag", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "a950", - "name": "Create DSMARK with multiple setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4092", - "name": "Delete DSMARK with handle", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5930", - "name": "Show DSMARK class", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class dsmark 1:", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] -- GitLab From 89bebf2753115fed2626402d9049436a3e7ba616 Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Mon, 2 Oct 2023 13:55:51 +0200 Subject: [PATCH 0279/2290] sched/rt: Disallow writing invalid values to sched_rt_period_us commit 079be8fc630943d9fc70a97807feb73d169ee3fc upstream. The validation of the value written to sched_rt_period_us was broken because: - the sysclt_sched_rt_period is declared as unsigned int - parsed by proc_do_intvec() - the range is asserted after the value parsed by proc_do_intvec() Because of this negative values written to the file were written into a unsigned integer that were later on interpreted as large positive integers which did passed the check: if (sysclt_sched_rt_period <= 0) return EINVAL; This commit fixes the parsing by setting explicit range for both perid_us and runtime_us into the sched_rt_sysctls table and processes the values with proc_dointvec_minmax() instead. Alternatively if we wanted to use full range of unsigned int for the period value we would have to split the proc_handler and use proc_douintvec() for it however even the Documentation/scheduller/sched-rt-group.rst describes the range as 1 to INT_MAX. As far as I can tell the only problem this causes is that the sysctl file allows writing negative values which when read back may confuse userspace. There is also a LTP test being submitted for these sysctl files at: http://patchwork.ozlabs.org/project/ltp/patch/20230901144433.2526-1-chrubis@suse.cz/ Signed-off-by: Cyril Hrubis Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231002115553.3007-2-chrubis@suse.cz Cc: Mahmoud Adam Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 76bafa8d331a7..8d5c77569fbb0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -37,6 +37,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rt_runtime_us", @@ -44,6 +46,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_NEG_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rr_timeslice_ms", @@ -2970,9 +2974,6 @@ static int sched_rt_global_constraints(void) #ifdef CONFIG_SYSCTL static int sched_rt_global_validate(void) { - if (sysctl_sched_rt_period <= 0) - return -EINVAL; - if ((sysctl_sched_rt_runtime != RUNTIME_INF) && ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || ((u64)sysctl_sched_rt_runtime * @@ -3003,7 +3004,7 @@ static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) { ret = sched_rt_global_validate(); -- GitLab From 5552b7bf26d9ad3e1ded3ffd0caeb63257a3168f Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Wed, 2 Aug 2023 17:19:06 +0200 Subject: [PATCH 0280/2290] sched/rt: sysctl_sched_rr_timeslice show default timeslice after reset commit c1fc6484e1fb7cc2481d169bfef129a1b0676abe upstream. The sched_rr_timeslice can be reset to default by writing value that is <= 0. However after reading from this file we always got the last value written, which is not useful at all. $ echo -1 > /proc/sys/kernel/sched_rr_timeslice_ms $ cat /proc/sys/kernel/sched_rr_timeslice_ms -1 Fix this by setting the variable that holds the sysctl file value to the jiffies_to_msecs(RR_TIMESLICE) in case that <= 0 value was written. Signed-off-by: Cyril Hrubis Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Petr Vorel Acked-by: Mel Gorman Tested-by: Petr Vorel Cc: Mahmoud Adam Link: https://lore.kernel.org/r/20230802151906.25258-3-chrubis@suse.cz Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8d5c77569fbb0..3a2335bc1d58b 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -3048,6 +3048,9 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, sched_rr_timeslice = sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE : msecs_to_jiffies(sysctl_sched_rr_timeslice); + + if (sysctl_sched_rr_timeslice <= 0) + sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE); } mutex_unlock(&mutex); -- GitLab From 6967ddd378e9d63f5c59dbace8b2cf963d6d6a0d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jan 2024 11:40:37 +0300 Subject: [PATCH 0281/2290] PCI: dwc: Fix a 64bit bug in dw_pcie_ep_raise_msix_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b5d1b4b46f856da1473c7ba9a5cdfcb55c9b2478 upstream. The "msg_addr" variable is u64. However, the "aligned_offset" is an unsigned int. This means that when the code does: msg_addr &= ~aligned_offset; it will unintentionally zero out the high 32 bits. Use ALIGN_DOWN() to do the alignment instead. Fixes: 2217fffcd63f ("PCI: dwc: endpoint: Fix dw_pcie_ep_raise_msix_irq() alignment support") Link: https://lore.kernel.org/r/af59c7ad-ab93-40f7-ad4a-7ac0b14d37f5@moroto.mountain Signed-off-by: Dan Carpenter Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel Reviewed-by: Ilpo Järvinen Reviewed-by: Manivannan Sadhasivam Cc: Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-designware-ep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 59c164b5c64aa..4086a7818981a 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -6,6 +6,7 @@ * Author: Kishon Vijay Abraham I */ +#include #include #include @@ -600,7 +601,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, } aligned_offset = msg_addr & (epc->mem->window.page_size - 1); - msg_addr &= ~aligned_offset; + msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, epc->mem->window.page_size); if (ret) -- GitLab From 0d27ac1779092446afb6e21666c70b7b8627d512 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 12 Jan 2024 19:37:29 +0100 Subject: [PATCH 0282/2290] riscv/efistub: Ensure GP-relative addressing is not used commit afb2a4fb84555ef9e61061f6ea63ed7087b295d5 upstream. The cflags for the RISC-V efistub were missing -mno-relax, thus were under the risk that the compiler could use GP-relative addressing. That happened for _edata with binutils-2.41 and kernel 6.1, causing the relocation to fail due to an invalid kernel_size in handle_kernel_image. It was not yet observed with newer versions, but that may just be luck. Cc: Signed-off-by: Jan Kiszka Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index ef5045a53ce09..b6e1dcb98a64c 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -25,7 +25,7 @@ cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ - -fpic + -fpic -mno-relax cflags-$(CONFIG_LOONGARCH) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fpie -- GitLab From c9ae228cfd1a4e292eb9680744a40a56299aa42f Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sun, 29 Oct 2023 18:07:04 +0100 Subject: [PATCH 0283/2290] dmaengine: apple-admac: Keep upper bits of REG_BUS_WIDTH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 306f5df81fcc89b462fbeb9dbe26d9a8ad7c7582 ] For RX channels, REG_BUS_WIDTH seems to default to a value of 0xf00, and macOS preserves the upper bits when setting the configuration in the lower ones. If we reset the upper bits to 0, this causes framing errors on suspend/resume (the data stream "tears" and channels get swapped around). Keeping the upper bits untouched, like the macOS driver does, fixes this issue. Signed-off-by: Hector Martin Reviewed-by: Martin Povišer Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20231029170704.82238-1-povik+lin@cutebit.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/apple-admac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 4cf8da77bdd91..cac4532fe23a9 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -56,6 +56,8 @@ #define REG_BUS_WIDTH(ch) (0x8040 + (ch) * 0x200) +#define BUS_WIDTH_WORD_SIZE GENMASK(3, 0) +#define BUS_WIDTH_FRAME_SIZE GENMASK(7, 4) #define BUS_WIDTH_8BIT 0x00 #define BUS_WIDTH_16BIT 0x01 #define BUS_WIDTH_32BIT 0x02 @@ -739,7 +741,8 @@ static int admac_device_config(struct dma_chan *chan, struct admac_data *ad = adchan->host; bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV; int wordsize = 0; - u32 bus_width = 0; + u32 bus_width = readl_relaxed(ad->base + REG_BUS_WIDTH(adchan->no)) & + ~(BUS_WIDTH_WORD_SIZE | BUS_WIDTH_FRAME_SIZE); switch (is_tx ? config->dst_addr_width : config->src_addr_width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: -- GitLab From e717bd412001495f17400bfc09f606f1b594ef5a Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Thu, 11 Jan 2024 15:59:41 +0300 Subject: [PATCH 0284/2290] scsi: target: core: Add TMF to tmr_list handling [ Upstream commit 83ab68168a3d990d5ff39ab030ad5754cbbccb25 ] An abort that is responded to by iSCSI itself is added to tmr_list but does not go to target core. A LUN_RESET that goes through tmr_list takes a refcounter on the abort and waits for completion. However, the abort will be never complete because it was not started in target core. Unable to locate ITT: 0x05000000 on CID: 0 Unable to locate RefTaskTag: 0x05000000 on CID: 0. wait_for_tasks: Stopping tmf LUN_RESET with tag 0x0 ref_task_tag 0x0 i_state 34 t_state ISTATE_PROCESSING refcnt 2 transport_state active,stop,fabric_stop wait for tasks: tmf LUN_RESET with tag 0x0 ref_task_tag 0x0 i_state 34 t_state ISTATE_PROCESSING refcnt 2 transport_state active,stop,fabric_stop ... INFO: task kworker/0:2:49 blocked for more than 491 seconds. task:kworker/0:2 state:D stack: 0 pid: 49 ppid: 2 flags:0x00000800 Workqueue: events target_tmr_work [target_core_mod] Call Trace: __switch_to+0x2c4/0x470 _schedule+0x314/0x1730 schedule+0x64/0x130 schedule_timeout+0x168/0x430 wait_for_completion+0x140/0x270 target_put_cmd_and_wait+0x64/0xb0 [target_core_mod] core_tmr_lun_reset+0x30/0xa0 [target_core_mod] target_tmr_work+0xc8/0x1b0 [target_core_mod] process_one_work+0x2d4/0x5d0 worker_thread+0x78/0x6c0 To fix this, only add abort to tmr_list if it will be handled by target core. Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20240111125941.8688-1-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_device.c | 5 ----- drivers/target/target_core_transport.c | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 301fe376a1206..13558cbd9b82e 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -147,7 +147,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - unsigned long flags; rcu_read_lock(); deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun); @@ -178,10 +177,6 @@ out_unlock: se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); - spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); - list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); - spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags); - return 0; } EXPORT_SYMBOL(transport_lookup_tmr_lun); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0686882bcbda3..fb93d74c5d0b2 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3627,6 +3627,10 @@ int transport_generic_handle_tmr( unsigned long flags; bool aborted = false; + spin_lock_irqsave(&cmd->se_dev->se_tmr_lock, flags); + list_add_tail(&cmd->se_tmr_req->tmr_list, &cmd->se_dev->dev_tmr_list); + spin_unlock_irqrestore(&cmd->se_dev->se_tmr_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->transport_state & CMD_T_ABORTED) { aborted = true; -- GitLab From b2cb83539cdb54ed693e28e8569e54c49ffb6d56 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 17 Jan 2024 05:55:39 +0000 Subject: [PATCH 0285/2290] cifs: open_cached_dir should not rely on primary channel [ Upstream commit 936eba9cfb5cfbf6a2c762cd163605f2b784e03e ] open_cached_dir today selects ses->server a.k.a primary channel to send requests. When multichannel is used, the primary channel maybe down. So it does not make sense to rely only on that channel. This fix makes this function pick a channel with the standard helper function cifs_pick_channel. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cached_dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 6f4d7aa70e5a2..fd082151c5f9b 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -149,7 +149,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -EOPNOTSUPP; ses = tcon->ses; - server = ses->server; + server = cifs_pick_channel(ses); cfids = tcon->cfids; if (!server->ops->new_lease_key) -- GitLab From 8dda42b1f2e40b7fdd6a039de8558448b9016748 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 19 Jan 2024 18:10:44 +0530 Subject: [PATCH 0286/2290] dmaengine: shdma: increase size of 'dev_id' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 404290240827c3bb5c4e195174a8854eef2f89ac ] We seem to have hit warnings of 'output may be truncated' which is fixed by increasing the size of 'dev_id' drivers/dma/sh/shdmac.c: In function ‘sh_dmae_probe’: drivers/dma/sh/shdmac.c:541:34: error: ‘%d’ directive output may be truncated writing between 1 and 10 bytes into a region of size 9 [-Werror=format-truncation=] 541 | "sh-dmae%d.%d", pdev->id, id); | ^~ In function ‘sh_dmae_chan_probe’, inlined from ‘sh_dmae_probe’ at drivers/dma/sh/shdmac.c:845:9: drivers/dma/sh/shdmac.c:541:26: note: directive argument in the range [0, 2147483647] 541 | "sh-dmae%d.%d", pdev->id, id); | ^~~~~~~~~~~~~~ drivers/dma/sh/shdmac.c:541:26: note: directive argument in the range [0, 19] drivers/dma/sh/shdmac.c:540:17: note: ‘snprintf’ output between 11 and 21 bytes into a destination of size 16 540 | snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 541 | "sh-dmae%d.%d", pdev->id, id); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/sh/shdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h index 9c121a4b33ad8..f97d80343aea4 100644 --- a/drivers/dma/sh/shdma.h +++ b/drivers/dma/sh/shdma.h @@ -25,7 +25,7 @@ struct sh_dmae_chan { const struct sh_dmae_slave_config *config; /* Slave DMA configuration */ int xmit_shift; /* log_2(bytes_per_xfer) */ void __iomem *base; - char dev_id[16]; /* unique name per DMAC of channel */ + char dev_id[32]; /* unique name per DMAC of channel */ int pm_error; dma_addr_t slave_addr; }; -- GitLab From d3dbfb9d11fceb8bda2a4f0e5d44ae7363ca9bd1 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 19 Jan 2024 18:10:44 +0530 Subject: [PATCH 0287/2290] dmaengine: fsl-qdma: increase size of 'irq_name' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6386f6c995b3ab91c72cfb76e4465553c555a8da ] We seem to have hit warnings of 'output may be truncated' which is fixed by increasing the size of 'irq_name' drivers/dma/fsl-qdma.c: In function ‘fsl_qdma_irq_init’: drivers/dma/fsl-qdma.c:824:46: error: ‘%d’ directive writing between 1 and 11 bytes into a region of size 10 [-Werror=format-overflow=] 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~ drivers/dma/fsl-qdma.c:824:35: note: directive argument in the range [-2147483641, 2147483646] 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~~~~~~~~~~~~~ drivers/dma/fsl-qdma.c:824:17: note: ‘sprintf’ output between 12 and 22 bytes into a destination of size 20 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/fsl-qdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 69385f32e2756..f383f219ed008 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -805,7 +805,7 @@ fsl_qdma_irq_init(struct platform_device *pdev, int i; int cpu; int ret; - char irq_name[20]; + char irq_name[32]; fsl_qdma->error_irq = platform_get_irq_byname(pdev, "qdma-error"); -- GitLab From e540c44983871b27db2a2d16bb9b25e5ed7cc748 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 16 Jan 2024 14:22:57 +0000 Subject: [PATCH 0288/2290] wifi: cfg80211: fix missing interfaces when dumping [ Upstream commit a6e4f85d3820d00694ed10f581f4c650445dbcda ] The nl80211_dump_interface() supports resumption in case nl80211_send_iface() doesn't have the resources to complete its work. The logic would store the progress as iteration offsets for rdev and wdev loops. However the logic did not properly handle resumption for non-last rdev. Assuming a system with 2 rdevs, with 2 wdevs each, this could happen: dump(cb=[0, 0]): if_start=cb[1] (=0) send rdev0.wdev0 -> ok send rdev0.wdev1 -> yield cb[1] = 1 dump(cb=[0, 1]): if_start=cb[1] (=1) send rdev0.wdev1 -> ok // since if_start=1 the rdev0.wdev0 got skipped // through if_idx < if_start send rdev1.wdev1 -> ok The if_start needs to be reset back to 0 upon wdev loop end. The problem is actually hard to hit on a desktop, and even on most routers. The prerequisites for this manifesting was: - more than 1 wiphy - a few handful of interfaces - dump without rdev or wdev filter I was seeing this with 4 wiphys 9 interfaces each. It'd miss 6 interfaces from the last wiphy reported to userspace. Signed-off-by: Michal Kazior Link: https://msgid.link/20240116142340.89678-1-kazikcz@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 70fb14b8bab07..c259d3227a9e2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3960,6 +3960,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; } + if_start = 0; wp_idx++; } out: -- GitLab From eb39bb548bf974acad7bd6780fe11f9e6652d696 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 4 Jan 2024 19:10:59 +0100 Subject: [PATCH 0289/2290] wifi: mac80211: fix race condition on enabling fast-xmit [ Upstream commit bcbc84af1183c8cf3d1ca9b78540c2185cd85e7f ] fast-xmit must only be enabled after the sta has been uploaded to the driver, otherwise it could end up passing the not-yet-uploaded sta via drv_tx calls to the driver, leading to potential crashes because of uninitialized drv_priv data. Add a missing sta->uploaded check and re-check fast xmit after inserting a sta. Signed-off-by: Felix Fietkau Link: https://msgid.link/20240104181059.84032-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/sta_info.c | 2 ++ net/mac80211/tx.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f3d6c3e4c970e..bd56015b29258 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -891,6 +891,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: if (sta->sta.valid_links) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 322a035f75929..3d62e8b718740 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3044,7 +3044,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || -- GitLab From 070398d32c5f3ab0e890374904ad94551c76aec4 Mon Sep 17 00:00:00 2001 From: Fullway Wang Date: Thu, 18 Jan 2024 11:49:40 +0800 Subject: [PATCH 0290/2290] fbdev: savage: Error out if pixclock equals zero [ Upstream commit 04e5eac8f3ab2ff52fa191c187a46d4fdbc1e288 ] The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of pixclock, it may cause divide-by-zero error. Although pixclock is checked in savagefb_decode_var(), but it is not checked properly in savagefb_probe(). Fix this by checking whether pixclock is zero in the function savagefb_check_var() before info->var.pixclock is used as the divisor. This is similar to CVE-2022-3061 in i740fb which was fixed by commit 15cf0b8. Signed-off-by: Fullway Wang Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/savage/savagefb_driver.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index b7818b652698f..a7b63c475f954 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo *var, DBG("savagefb_check_var"); + if (!var->pixclock) + return -EINVAL; + var->transp.offset = 0; var->transp.length = 0; switch (var->bits_per_pixel) { -- GitLab From f329523f6a65c3bbce913ad35473d83a319d5d99 Mon Sep 17 00:00:00 2001 From: Fullway Wang Date: Thu, 18 Jan 2024 14:24:43 +0800 Subject: [PATCH 0291/2290] fbdev: sis: Error out if pixclock equals zero [ Upstream commit e421946be7d9bf545147bea8419ef8239cb7ca52 ] The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of pixclock, it may cause divide-by-zero error. In sisfb_check_var(), var->pixclock is used as a divisor to caculate drate before it is checked against zero. Fix this by checking it at the beginning. This is similar to CVE-2022-3061 in i740fb which was fixed by commit 15cf0b8. Signed-off-by: Fullway Wang Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/sis/sis_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 1c197c3f95381..fe8996461b9ef 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1475,6 +1475,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) vtotal = var->upper_margin + var->lower_margin + var->vsync_len; + if (!var->pixclock) + return -EINVAL; pixclock = var->pixclock; if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) { -- GitLab From f19361d570c67e7e014896fa2dacd7d721bf0aa8 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 23 Jan 2024 15:11:49 +0800 Subject: [PATCH 0292/2290] spi: hisi-sfc-v3xx: Return IRQ_NONE if no interrupts were detected [ Upstream commit de8b6e1c231a95abf95ad097b993d34b31458ec9 ] Return IRQ_NONE from the interrupt handler when no interrupt was detected. Because an empty interrupt will cause a null pointer error: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: complete+0x54/0x100 hisi_sfc_v3xx_isr+0x2c/0x40 [spi_hisi_sfc_v3xx] __handle_irq_event_percpu+0x64/0x1e0 handle_irq_event+0x7c/0x1cc Signed-off-by: Devyn Liu Link: https://msgid.link/r/20240123071149.917678-1-liudingyuan@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-hisi-sfc-v3xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c index d3a23b1c2a4c5..61bf00dfe9c33 100644 --- a/drivers/spi/spi-hisi-sfc-v3xx.c +++ b/drivers/spi/spi-hisi-sfc-v3xx.c @@ -377,6 +377,11 @@ static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = { static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data) { struct hisi_sfc_v3xx_host *host = data; + u32 reg; + + reg = readl(host->regbase + HISI_SFC_V3XX_INT_STAT); + if (!reg) + return IRQ_NONE; hisi_sfc_v3xx_disable_int(host); -- GitLab From 8fc80874103a5c20aebdc2401361aa01c817f75b Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:26:34 +0100 Subject: [PATCH 0293/2290] block: Fix WARNING in _copy_from_iter [ Upstream commit 13f3956eb5681a4045a8dfdef48df5dc4d9f58a6 ] Syzkaller reports a warning in _copy_from_iter because an iov_iter is supposedly used in the wrong direction. The reason is that syzcaller managed to generate a request with a transfer direction of SG_DXFER_TO_FROM_DEV. This instructs the kernel to copy user buffers into the kernel, read into the copied buffers and then copy the data back to user space. Thus the iovec is used in both directions. Detect this situation in the block layer and construct a new iterator with the correct direction for the copy-in. Reported-by: syzbot+a532b03fdfee2c137666@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000009b92c10604d7a5e9@google.com/t/ Reported-by: syzbot+63dec323ac56c28e644f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000003faaa105f6e7c658@google.com/T/ Signed-off-by: Christian A. Ehrhardt Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240121202634.275068-1-lk@c--e.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-map.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 66da9e2b19abf..b337ae347bfa3 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -203,12 +203,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, /* * success */ - if ((iov_iter_rw(iter) == WRITE && - (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { + if (iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else if (map_data && map_data->from_user) { + struct iov_iter iter2 = *iter; + + /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ + iter2.data_source = ITER_SOURCE; + ret = bio_copy_from_iter(bio, &iter2); + if (ret) + goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); -- GitLab From bba595eb1422749ad9aea7e98991fa9db04bb26d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 23 Jan 2024 15:47:34 -0800 Subject: [PATCH 0294/2290] smb: Work around Clang __bdos() type confusion [ Upstream commit 8deb05c84b63b4fdb8549e08942867a68924a5b8 ] Recent versions of Clang gets confused about the possible size of the "user" allocation, and CONFIG_FORTIFY_SOURCE ends up emitting a warning[1]: repro.c:126:4: warning: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 126 | __write_overflow_field(p_size_field, size); | ^ for this memset(): int len; __le16 *user; ... len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); ... if (len) { ... } else { memset(user, '\0', 2); } While Clang works on this bug[2], switch to using a direct assignment, which avoids memset() entirely which both simplifies the code and silences the false positive warning. (Making "len" size_t also silences the warning, but the direct assignment seems better.) Reported-by: Nathan Chancellor Closes: https://github.com/ClangBuiltLinux/linux/issues/1966 [1] Link: https://github.com/llvm/llvm-project/issues/77813 [2] Cc: Steve French Cc: Paulo Alcantara Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: llvm@lists.linux.dev Signed-off-by: Kees Cook Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsencrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index d0ac2648c0d61..d3d4cf6321fd5 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -444,7 +444,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { - memset(user, '\0', 2); + *(u16 *)user = 0; } rc = crypto_shash_update(ses->server->secmech.hmacmd5, -- GitLab From a2aa77b5d8e3f521a893de21bda5d823649d1c09 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:45 +0000 Subject: [PATCH 0295/2290] cifs: translate network errors on send to -ECONNABORTED [ Upstream commit a68106a6928e0a6680f12bcc7338c0dddcfe4d11 ] When the network stack returns various errors, we today bubble up the error to the user (in case of soft mounts). This change translates all network errors except -EINTR and -EAGAIN to -ECONNABORTED. A similar approach is taken when we receive network errors when reading from the socket. The change also forces the cifsd thread to reconnect during it's next activity. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/transport.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8a1dd8407a3a7..97bf46de8e429 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -427,10 +427,17 @@ unmask: server->conn_id, server->hostname); } smbd_done: - if (rc < 0 && rc != -EINTR) + /* + * there's hardly any use for the layers above to know the + * actual error code here. All they should do at this point is + * to retry the connection and hope it goes away. + */ + if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else if (rc > 0) + rc = -ECONNABORTED; + cifs_signal_cifsd_for_reconnect(server, false); + } else if (rc > 0) rc = 0; out: cifs_in_send_dec(server); -- GitLab From 41e137c2c75aed1b5f8595f2b2f8970539ba56bf Mon Sep 17 00:00:00 2001 From: Conrad Kostecki Date: Tue, 23 Jan 2024 19:30:02 +0100 Subject: [PATCH 0296/2290] ahci: asm1166: correct count of reported ports [ Upstream commit 0077a504e1a4468669fd2e011108db49133db56e ] The ASM1166 SATA host controller always reports wrongly, that it has 32 ports. But in reality, it only has six ports. This seems to be a hardware issue, as all tested ASM1166 SATA host controllers reports such high count of ports. Example output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0xffffff3f impl SATA mode. By adjusting the port_map, the count is limited to six ports. New output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0x3f impl SATA mode. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=211873 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218346 Signed-off-by: Conrad Kostecki Reviewed-by: Hans de Goede Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 805645efb3ccf..e22124f42183f 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -658,6 +658,11 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + } + if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->saved_port_map = 1; -- GitLab From 2d623c94fbba3554f4446ba6f3c764994e8b0d26 Mon Sep 17 00:00:00 2001 From: Maksim Kiselev Date: Wed, 24 Jan 2024 10:24:36 +0300 Subject: [PATCH 0297/2290] aoe: avoid potential deadlock at set_capacity [ Upstream commit e169bd4fb2b36c4b2bee63c35c740c85daeb2e86 ] Move set_capacity() outside of the section procected by (&d->lock). To avoid possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- [1] lock(&bdev->bd_size_lock); local_irq_disable(); [2] lock(&d->lock); [3] lock(&bdev->bd_size_lock); [4] lock(&d->lock); *** DEADLOCK *** Where [1](&bdev->bd_size_lock) hold by zram_add()->set_capacity(). [2]lock(&d->lock) hold by aoeblk_gdalloc(). And aoeblk_gdalloc() is trying to acquire [3](&bdev->bd_size_lock) at set_capacity() call. In this situation an attempt to acquire [4]lock(&d->lock) from aoecmd_cfg_rsp() will lead to deadlock. So the simplest solution is breaking lock dependency [2](&d->lock) -> [3](&bdev->bd_size_lock) by moving set_capacity() outside. Signed-off-by: Maksim Kiselev Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240124072436.3745720-2-bigunclemax@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/aoe/aoeblk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 128722cf6c3ca..827802e418dd3 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct blk_mq_tag_set *set; + sector_t ssize; ulong flags; int late = 0; int err; @@ -395,7 +396,7 @@ aoeblk_gdalloc(void *vp) gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - set_capacity(gd, d->ssize); + ssize = d->ssize; snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); @@ -404,6 +405,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); + set_capacity(gd, ssize); + err = device_add_disk(NULL, gd, aoe_attr_groups); if (err) goto out_disk_cleanup; -- GitLab From f48a6eb2e5e88c7548a8f20f6ad8131b06427655 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 25 Jan 2024 17:04:01 +0200 Subject: [PATCH 0298/2290] ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers [ Upstream commit 20730e9b277873deeb6637339edcba64468f3da3 ] With one of the on-board ASM1061 AHCI controllers (1b21:0612) on an ASUSTeK Pro WS WRX80E-SAGE SE WIFI mainboard, a controller hang was observed that was immediately preceded by the following kernel messages: ahci 0000:28:00.0: Using 64-bit DMA addresses ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00000 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00300 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00380 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00400 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00680 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00700 flags=0x0000] The first message is produced by code in drivers/iommu/dma-iommu.c which is accompanied by the following comment that seems to apply: /* * Try to use all the 32-bit PCI addresses first. The original SAC vs. * DAC reasoning loses relevance with PCIe, but enough hardware and * firmware bugs are still lurking out there that it's safest not to * venture into the 64-bit space until necessary. * * If your device goes wrong after seeing the notice then likely either * its driver is not setting DMA masks accurately, the hardware has * some inherent bug in handling >32-bit addresses, or not all the * expected address bits are wired up between the device and the IOMMU. */ Asking the ASM1061 on a discrete PCIe card to DMA from I/O virtual address 0xffffffff00000000 produces the following I/O page faults: vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000000 flags=0x0010] vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000500 flags=0x0010] Note that the upper 21 bits of the logged DMA address are zero. (When asking a different PCIe device in the same PCIe slot to DMA to the same I/O virtual address, we do see all the upper 32 bits of the DMA address as 1, so this is not an issue with the chipset or IOMMU configuration on the test system.) Also, hacking libahci to always set the upper 21 bits of all DMA addresses to 1 produces no discernible effect on the behavior of the ASM1061, and mkfs/mount/scrub/etc work as without this hack. This all strongly suggests that the ASM1061 has a 43 bit DMA address limit, and this commit therefore adds a quirk to deal with this limit. This issue probably applies to (some of) the other supported ASMedia parts as well, but we limit it to the PCI IDs known to refer to ASM1061 parts, as that's the only part we know for sure to be affected by this issue at this point. Link: https://lore.kernel.org/linux-ide/ZaZ2PIpEId-rl6jv@wantstofly.org/ Signed-off-by: Lennert Buytenhek [cassel: drop date from error messages in commit log] Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 29 +++++++++++++++++++++++------ drivers/ata/ahci.h | 1 + 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e22124f42183f..42c6b660550c2 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -49,6 +49,7 @@ enum { enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, + board_ahci_43bit_dma, board_ahci_ign_iferr, board_ahci_low_power, board_ahci_no_debounce_delay, @@ -129,6 +130,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_43bit_dma] = { + AHCI_HFLAGS (AHCI_HFLAG_43BIT_ONLY), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_ign_iferr] = { AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), .flags = AHCI_FLAG_COMMON, @@ -597,11 +605,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ - /* Asmedia */ + /* ASMedia */ { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci }, /* ASM1061 */ - { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ + { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ + { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ @@ -949,11 +957,20 @@ static int ahci_pci_device_resume(struct device *dev) #endif /* CONFIG_PM */ -static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac) +static int ahci_configure_dma_masks(struct pci_dev *pdev, + struct ahci_host_priv *hpriv) { - const int dma_bits = using_dac ? 64 : 32; + int dma_bits; int rc; + if (hpriv->cap & HOST_CAP_64) { + dma_bits = 64; + if (hpriv->flags & AHCI_HFLAG_43BIT_ONLY) + dma_bits = 43; + } else { + dma_bits = 32; + } + /* * If the device fixup already set the dma_mask to some non-standard * value, don't extend it here. This happens on STA2X11, for example. @@ -1926,7 +1943,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_gtf_filter_workaround(host); /* initialize adapter */ - rc = ahci_configure_dma_masks(pdev, hpriv->cap & HOST_CAP_64); + rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) return rc; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index ff8e6ae1c6362..f9c5906a8afa8 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -247,6 +247,7 @@ enum { AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during suspend/resume */ AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */ + AHCI_HFLAG_43BIT_ONLY = BIT(29), /* 43bit DMA addr limit */ /* ap->flags bits */ -- GitLab From 6c292c2f902760fcd82cbf8609ca766ea89fe34e Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Jan 2024 09:47:57 +0800 Subject: [PATCH 0299/2290] MIPS: reserve exception vector space ONLY ONCE [ Upstream commit abcabb9e30a1f9a69c76776f8abffc31c377b542 ] "cpu_probe" is called both by BP and APs, but reserving exception vector (like 0x0-0x1000) called by "cpu_probe" need once and calling on APs is too late since memblock is unavailable at that time. So, reserve exception vector ONLY by BP. Suggested-by: Thomas Bogendoerfer Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/kernel/traps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 246c6a6b02614..5b778995d4483 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64]; void reserve_exception_space(phys_addr_t addr, unsigned long size) { - memblock_reserve(addr, size); + /* + * reserve exception space on CPUs other than CPU0 + * is too late, since memblock is unavailable when APs + * up + */ + if (smp_processor_id() == 0) + memblock_reserve(addr, size); } void __init *set_except_vector(int n, void *addr) -- GitLab From a600d7f0c1d0872131186a42f30e77700fef1e14 Mon Sep 17 00:00:00 2001 From: Phoenix Chen Date: Fri, 26 Jan 2024 17:53:08 +0800 Subject: [PATCH 0300/2290] platform/x86: touchscreen_dmi: Add info for the TECLAST X16 Plus tablet [ Upstream commit 1abdf288b0ef5606f76b6e191fa6df05330e3d7e ] Add touch screen info for TECLAST X16 Plus tablet. Signed-off-by: Phoenix Chen Link: https://lore.kernel.org/r/20240126095308.5042-1-asbeltogf@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/touchscreen_dmi.c | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 9a92d515abb9b..50ec19188a20d 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -913,6 +913,32 @@ static const struct ts_dmi_data teclast_tbook11_data = { .properties = teclast_tbook11_props, }; +static const struct property_entry teclast_x16_plus_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 14), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1916), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1264), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data teclast_x16_plus_data = { + .embedded_fw = { + .name = "silead/gsl3692-teclast-x16-plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 43560, + .sha256 = { 0x9d, 0xb0, 0x3d, 0xf1, 0x00, 0x3c, 0xb5, 0x25, + 0x62, 0x8a, 0xa0, 0x93, 0x4b, 0xe0, 0x4e, 0x75, + 0xd1, 0x27, 0xb1, 0x65, 0x3c, 0xba, 0xa5, 0x0f, + 0xcd, 0xb4, 0xbe, 0x00, 0xbb, 0xf6, 0x43, 0x29 }, + }, + .acpi_name = "MSSL1680:00", + .properties = teclast_x16_plus_props, +}; + static const struct property_entry teclast_x3_plus_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), PROPERTY_ENTRY_U32("touchscreen-size-y", 1500), @@ -1567,6 +1593,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_SKU, "E5A6_A1"), }, }, + { + /* Teclast X16 Plus */ + .driver_data = (void *)&teclast_x16_plus_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_PRODUCT_SKU, "D3A5_A1"), + }, + }, { /* Teclast X3 Plus */ .driver_data = (void *)&teclast_x3_plus_data, -- GitLab From 8b40eb2e716b503f7a4e1090815a17b1341b2150 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:37 +0800 Subject: [PATCH 0301/2290] ext4: avoid dividing by 0 in mb_update_avg_fragment_size() when block bitmap corrupt [ Upstream commit 993bf0f4c393b3667830918f9247438a8f6fdb5b ] Determine if bb_fragments is 0 instead of determining bb_free to eliminate the risk of dividing by zero when the block bitmap is corrupted. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-6-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1a310ee7d9e55..296185cbd1547 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,7 +831,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, -- GitLab From f97e75fa4e12b0aa0224e83fcbda8853ac2adf36 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:38 +0800 Subject: [PATCH 0302/2290] ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found() [ Upstream commit 4530b3660d396a646aad91a787b6ab37cf604b53 ] Determine if the group block bitmap is corrupted before using ac_b_ex in ext4_mb_try_best_found() to avoid allocating blocks from a group with a corrupted block bitmap in the following concurrency and making the situation worse. ext4_mb_regular_allocator ext4_lock_group(sb, group) ext4_mb_good_group // check if the group bbitmap is corrupted ext4_mb_complex_scan_group // Scan group gets ac_b_ex but doesn't use it ext4_unlock_group(sb, group) ext4_mark_group_bitmap_corrupted(group) // The block bitmap was corrupted during // the group unlock gap. ext4_mb_try_best_found ext4_lock_group(ac->ac_sb, group) ext4_mb_use_best_found mb_mark_used // Allocating blocks in block bitmap corrupted group Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-7-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 296185cbd1547..744472c0b6fa5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2176,6 +2176,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2183,6 +2186,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); -- GitLab From d639102f4cbd4cb65d1225dba3b9265596aab586 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:39 +0800 Subject: [PATCH 0303/2290] ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal() [ Upstream commit 832698373a25950942c04a512daa652c18a9b513 ] Places the logic for checking if the group's block bitmap is corrupt under the protection of the group lock to avoid allocating blocks from the group with a corrupted block bitmap. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-8-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 744472c0b6fa5..6a3e27771df73 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2215,12 +2215,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2253,6 +2251,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); -- GitLab From 3e746c4e4848914fa2d1ad5192cab5c589e6be13 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 23 Dec 2023 15:16:50 +0100 Subject: [PATCH 0304/2290] Input: goodix - accept ACPI resources with gpio_count == 3 && gpio_int_idx == 0 [ Upstream commit 180a8f12c21f41740fee09ca7f7aa98ff5bb99f8 ] Some devices list 3 Gpio resources in the ACPI resource list for the touchscreen: 1. GpioInt resource pointing to the GPIO used for the interrupt 2. GpioIo resource pointing to the reset GPIO 3. GpioIo resource pointing to the GPIO used for the interrupt Note how the third extra GpioIo resource really is a duplicate of the GpioInt provided info. Ignore this extra GPIO, treating this setup the same as gpio_count == 2 && gpio_int_idx == 0 fixes the touchscreen not working on the Thunderbook Colossus W803 rugged tablet and likely also on the CyberBook_T116K. Reported-by: Maarten van der Schrieck Closes: https://gitlab.com/AdyaAdya/goodix-touchscreen-linux-driver/-/issues/22 Suggested-by: Maarten van der Schrieck Tested-by: Maarten van der Schrieck Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231223141650.10679-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/goodix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 3f0732db7bf5b..6de64b3f900fb 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) } } - if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) { + /* Some devices with gpio_int_idx 0 list a third unused GPIO */ + if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) { ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_first_gpios; } else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) { -- GitLab From 9d508c897153ae8dd79303f7f035f078139f6b49 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 11:19:29 +0800 Subject: [PATCH 0305/2290] dmaengine: ti: edma: Add some null pointer checks to the edma_probe [ Upstream commit 6e2276203ac9ff10fc76917ec9813c660f627369 ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240118031929.192192-1-chentao@kylinos.cn Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/ti/edma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 7ec6e5d728b03..9212ac9f978f2 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2413,6 +2413,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, ecc); if (ret) { @@ -2429,6 +2434,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, ecc); if (ret) { -- GitLab From 13c1af5f3bc4c1959f492fbb846aee30aa1fca28 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:26 +0100 Subject: [PATCH 0306/2290] regulator: pwm-regulator: Add validity checks in continuous .get_voltage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c92688cac239794e4a1d976afa5203a4d3a2ac0e ] Continuous regulators can be configured to operate only in a certain duty cycle range (for example from 0..91%). Add a check to error out if the duty cycle translates to an unsupported (or out of range) voltage. Suggested-by: Uwe Kleine-König Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/pwm-regulator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index b9eeaff1c6615..925e486f73a6d 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -158,6 +158,9 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. -- GitLab From fbd1cb2a9b77ec166eaf7931df150149116da7d1 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Fri, 26 Jan 2024 16:26:43 +0800 Subject: [PATCH 0307/2290] nvmet-tcp: fix nvme tcp ida memory leak [ Upstream commit 47c5dd66c1840524572dcdd956f4af2bdb6fbdff ] The nvmet_tcp_queue_ida should be destroy when the nvmet-tcp module exit. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index ce42afe8f64ef..3480768274699 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1884,6 +1884,7 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); -- GitLab From 1675aae9e19e72c48e1c95338b894845944a855b Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:23 +0100 Subject: [PATCH 0308/2290] usb: ucsi_acpi: Quirk to ack a connector change ack cmd [ Upstream commit f3be347ea42dbb0358cd8b2d8dc543a23b70a976 ] The PPM on some Dell laptops seems to expect that the ACK_CC_CI command to clear the connector change notification is in turn followed by another ACK_CC_CI to acknowledge the ACK_CC_CI command itself. This is in violation of the UCSI spec that states: "The only notification that is not acknowledged by the OPM is the command completion notification for the ACK_CC_CI or the PPM_RESET command." Add a quirk to send this ack anyway. Apply the quirk to all Dell systems. On the first command that acks a connector change send a dummy command to determine if it runs into a timeout. Only activate the quirk if it does. This ensure that we do not break Dell systems that do not need the quirk. Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-4-lk@c--e.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/ucsi/ucsi_acpi.c | 71 ++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 26171c5d3c61c..48130d636a020 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -25,6 +25,8 @@ struct ucsi_acpi { unsigned long flags; guid_t guid; u64 cmd; + bool dell_quirk_probed; + bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -126,12 +128,73 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; -static const struct dmi_system_id zenbook_dmi_id[] = { +/* + * Some Dell laptops expect that an ACK command with the + * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) + * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. + * If this is not done events are not delivered to OSPM and + * subsequent commands will timeout. + */ +static int +ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + u64 cmd = *(u64 *)val, ack = 0; + int ret; + + if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && + cmd & UCSI_ACK_CONNECTOR_CHANGE) + ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; + + ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); + if (ret != 0) + return ret; + if (ack == 0) + return ret; + + if (!ua->dell_quirk_probed) { + ua->dell_quirk_probed = true; + + cmd = UCSI_GET_CAPABILITY; + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, + sizeof(cmd)); + if (ret == 0) + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, + &ack, sizeof(ack)); + if (ret != -ETIMEDOUT) + return ret; + + ua->dell_quirk_active = true; + dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); + dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); + } + + if (!ua->dell_quirk_active) + return ret; + + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); +} + +static const struct ucsi_operations ucsi_dell_ops = { + .read = ucsi_acpi_read, + .sync_write = ucsi_dell_sync_write, + .async_write = ucsi_acpi_async_write +}; + +static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), }, + .driver_data = (void *)&ucsi_zenbook_ops, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + }, + .driver_data = (void *)&ucsi_dell_ops, }, { } }; @@ -160,6 +223,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev) { struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); const struct ucsi_operations *ops = &ucsi_acpi_ops; + const struct dmi_system_id *id; struct ucsi_acpi *ua; struct resource *res; acpi_status status; @@ -189,8 +253,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev) init_completion(&ua->complete); ua->dev = &pdev->dev; - if (dmi_check_system(zenbook_dmi_id)) - ops = &ucsi_zenbook_ops; + id = dmi_first_match(ucsi_acpi_quirks); + if (id) + ops = id->driver_data; ua->ucsi = ucsi_create(&pdev->dev, ops); if (IS_ERR(ua->ucsi)) -- GitLab From c7bdaff0d07505e319cf89548bc115969e9fb816 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 29 Jan 2024 15:12:54 +0300 Subject: [PATCH 0309/2290] ALSA: usb-audio: Check presence of valid altsetting control [ Upstream commit 346f59d1e8ed0eed41c80e1acb657e484c308e6a ] Many devices with a single alternate setting do not have a Valid Alternate Setting Control and validation performed by validate_sample_rate_table_v2v3() doesn't work on them and is not really needed. So check the presense of control before sending altsetting validation requests. MOTU Microbook IIc is suffering the most without this check. It takes up to 40 seconds to bootup due to how slow it switches sampling rates: [ 2659.164824] usb 3-2: New USB device found, idVendor=07fd, idProduct=0004, bcdDevice= 0.60 [ 2659.164827] usb 3-2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 2659.164829] usb 3-2: Product: MicroBook IIc [ 2659.164830] usb 3-2: Manufacturer: MOTU [ 2659.166204] usb 3-2: Found last interface = 3 [ 2679.322298] usb 3-2: No valid sample rate available for 1:1, assuming a firmware bug [ 2679.322306] usb 3-2: 1:1: add audio endpoint 0x3 [ 2679.322321] usb 3-2: Creating new data endpoint #3 [ 2679.322552] usb 3-2: 1:1 Set sample rate 96000, clock 1 [ 2684.362250] usb 3-2: 2:1: cannot get freq (v2/v3): err -110 [ 2694.444700] usb 3-2: No valid sample rate available for 2:1, assuming a firmware bug [ 2694.444707] usb 3-2: 2:1: add audio endpoint 0x84 [ 2694.444721] usb 3-2: Creating new data endpoint #84 [ 2699.482103] usb 3-2: 2:1 Set sample rate 96000, clock 1 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240129121254.3454481-1-alexander@tsoy.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/format.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/usb/format.c b/sound/usb/format.c index ab5fed9f55b60..3b45d0ee76938 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, int clock) { struct usb_device *dev = chip->dev; + struct usb_host_interface *alts; unsigned int *table; unsigned int nr_rates; int i, err; + u32 bmControls; /* performing the rate verification may lead to unexpected USB bus * behavior afterwards by some unknown reason. Do this only for the @@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES)) return 0; /* don't perform the validation as default */ + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) + return 0; + + if (fp->protocol == UAC_VERSION_3) { + struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = le32_to_cpu(as->bmControls); + } else { + struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = as->bmControls; + } + + if (!uac_v2v3_control_is_readable(bmControls, + UAC2_AS_VAL_ALT_SETTINGS)) + return 0; + table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; -- GitLab From 4dec3068eaa5b51693514d4f41ae3cc744baf4cf Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:43 +0800 Subject: [PATCH 0310/2290] ASoC: sunxi: sun4i-spdif: Add support for Allwinner H616 [ Upstream commit 0adf963b8463faa44653e22e56ce55f747e68868 ] The SPDIF hardware block found in the H616 SoC has the same layout as the one found in the H6 SoC, except that it is missing the receiver side. Since the driver currently only supports the transmit function, support for the H616 is identical to what is currently done for the H6. Signed-off-by: Chen-Yu Tsai Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-4-wens@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sunxi/sun4i-spdif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c index bcceebca915ac..484b0e7c2defa 100644 --- a/sound/soc/sunxi/sun4i-spdif.c +++ b/sound/soc/sunxi/sun4i-spdif.c @@ -578,6 +578,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = { .compatible = "allwinner,sun50i-h6-spdif", .data = &sun50i_h6_spdif_quirks, }, + { + .compatible = "allwinner,sun50i-h616-spdif", + /* Essentially the same as the H6, but without RX */ + .data = &sun50i_h6_spdif_quirks, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match); -- GitLab From 949296ee62db11f8afb5ea414dd317a6b9a76712 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jan 2024 10:40:53 +0100 Subject: [PATCH 0311/2290] spi: sh-msiof: avoid integer overflow in constants [ Upstream commit 6500ad28fd5d67d5ca0fee9da73c463090842440 ] cppcheck rightfully warned: drivers/spi/spi-sh-msiof.c:792:28: warning: Signed integer overflow for expression '7<<29'. [integerOverflow] sh_msiof_write(p, SIFCTR, SIFCTR_TFWM_1 | SIFCTR_RFWM_1); Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://msgid.link/r/20240130094053.10672-1-wsa+renesas@sang-engineering.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-sh-msiof.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 51ceaa4857249..ec3a4939ee984 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -137,14 +137,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) -- GitLab From 1c57e5ef85c736637c36ffd44c39fa6e9c7ea4ba Mon Sep 17 00:00:00 2001 From: Brenton Simpson Date: Tue, 30 Jan 2024 13:34:16 -0800 Subject: [PATCH 0312/2290] Input: xpad - add Lenovo Legion Go controllers [ Upstream commit 80441f76ee67002437db61f3b317ed80cce085d2 ] The Lenovo Legion Go is a handheld gaming system, similar to a Steam Deck. It has a gamepad (including rear paddles), 3 gyroscopes, a trackpad, volume buttons, a power button, and 2 LED ring lights. The Legion Go firmware presents these controls as a USB hub with various devices attached. In its default state, the gamepad is presented as an Xbox controller connected to this hub. (By holding a combination of buttons, it can be changed to use the older DirectInput API.) This patch teaches the existing Xbox controller module `xpad` to bind to the controller in the Legion Go, which enables support for the: - directional pad, - analog sticks (including clicks), - X, Y, A, B, - start and select (or menu and capture), - shoulder buttons, and - rumble. The trackpad, touchscreen, volume controls, and power button are already supported via existing kernel modules. Two of the face buttons, the gyroscopes, rear paddles, and LEDs are not. After this patch lands, the Legion Go will be mostly functional in Linux, out-of-the-box. The various components of the USB hub can be synthesized into a single logical controller (including the additional buttons) in userspace with [Handheld Daemon](https://github.com/hhd-dev/hhd), which makes the Go fully functional. Signed-off-by: Brenton Simpson Link: https://lore.kernel.org/r/20240118183546.418064-1-appsforartists@google.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e8011d70d0799..02f3bc4e4895e 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -294,6 +294,7 @@ static const struct xpad_device { { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -489,6 +490,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x15e4), /* Numark X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ + XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA Controllers */ -- GitLab From 17fe3616d854de8f4f0c29bcdb5de100a68bddca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Jan 2024 15:24:10 +0000 Subject: [PATCH 0313/2290] misc: open-dice: Fix spurious lockdep warning [ Upstream commit ac9762a74c7ca7cbfcb4c65f5871373653a046ac ] When probing the open-dice driver with PROVE_LOCKING=y, lockdep complains that the mutex in 'drvdata->lock' has a non-static key: | INFO: trying to register non-static key. | The code is fine but needs lockdep annotation, or maybe | you didn't initialize this object before use? | turning off the locking correctness validator. Fix the problem by initialising the mutex memory with mutex_init() instead of __MUTEX_INITIALIZER(). Cc: Arnd Bergmann Cc: David Brazdil Cc: Greg Kroah-Hartman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20240126152410.10148-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/open-dice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c index c61be3404c6f2..504b836a7abf8 100644 --- a/drivers/misc/open-dice.c +++ b/drivers/misc/open-dice.c @@ -142,7 +142,6 @@ static int __init open_dice_probe(struct platform_device *pdev) return -ENOMEM; *drvdata = (struct open_dice_drvdata){ - .lock = __MUTEX_INITIALIZER(drvdata->lock), .rmem = rmem, .misc = (struct miscdevice){ .parent = dev, @@ -152,6 +151,7 @@ static int __init open_dice_probe(struct platform_device *pdev) .mode = 0600, }, }; + mutex_init(&drvdata->lock); /* Index overflow check not needed, misc_register() will fail. */ snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); -- GitLab From 41b256f473ac0cd5b6a4973f4cd75c6103661ab5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 25 Jan 2024 17:29:46 -0500 Subject: [PATCH 0314/2290] netfilter: conntrack: check SCTP_CID_SHUTDOWN_ACK for vtag setting in sctp_new [ Upstream commit 6e348067ee4bc5905e35faa3a8fafa91c9124bc7 ] The annotation says in sctp_new(): "If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8)". However, it does not check SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the conntrack entry(ct). Because of that, if the ct in Router disappears for some reason in [1] with the packet sequence like below: Client > Server: sctp (1) [INIT] [init tag: 3201533963] Server > Client: sctp (1) [INIT ACK] [init tag: 972498433] Client > Server: sctp (1) [COOKIE ECHO] Server > Client: sctp (1) [COOKIE ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057809] Server > Client: sctp (1) [SACK] [cum ack 3075057809] Server > Client: sctp (1) [HB REQ] (the ct in Router disappears somehow) <-------- [1] Client > Server: sctp (1) [HB ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [ABORT] when processing HB ACK packet in Router it calls sctp_new() to initialize the new ct with vtag[REPLY] set to HB_ACK packet's vtag. Later when sending DATA from Client, all the SACKs from Server will get dropped in Router, as the SACK packet's vtag does not match vtag[REPLY] in the ct. The worst thing is the vtag in this ct will never get fixed by the upcoming packets from Server. This patch fixes it by checking SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the ct in sctp_new() as the annotation says. With this fix, it will leave vtag[REPLY] in ct to 0 in the case above, and the next HB REQ/ACK from Server is able to fix the vtag as its value is 0 in nf_conntrack_sctp_packet(). Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c94a9971d790c..7ffd698497f2a 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -299,7 +299,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", -- GitLab From bead6ff98689091b89591a221035d1fe90cf9cdb Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Tue, 16 Jan 2024 11:00:00 -0500 Subject: [PATCH 0315/2290] drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz [ Upstream commit 2ff33c759a4247c84ec0b7815f1f223e155ba82a ] [why] Originally, PMFW said min FCLK is 300Mhz, but min DCFCLK can be increased to 400Mhz because min FCLK is now 600Mhz so FCLK >= 1.5 * DCFCLK hardware requirement will still be satisfied. Increasing min DCFCLK addresses underflow issues (underflow occurs when phantom pipe is turned on for some Sub-Viewport configs). [how] Increasing DCFCLK by raising the min_dcfclk_mhz Reviewed-by: Chaitanya Dhere Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 85e0d1c2a9085..baecc0ffe7580 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2123,7 +2123,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; -- GitLab From 5653a6d65a11fd41afa79977aaab7f3aa7b6fbad Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:21 +0000 Subject: [PATCH 0316/2290] ASoC: wm_adsp: Don't overwrite fwf_name with the default [ Upstream commit daf3f0f99cde93a066240462b7a87cdfeedc04c0 ] There's no need to overwrite fwf_name with a kstrdup() of the cs_dsp part name. It is trivial to select either fwf_name or cs_dsp.part as the string to use when building the filename in wm_adsp_request_firmware_file(). This leaves fwf_name entirely owned by the codec driver. It also avoids problems with freeing the pointer. With the original code fwf_name was either a pointer owned by the codec driver, or a kstrdup() created by wm_adsp. This meant wm_adsp must free it if it set it, but not if the codec driver set it. The code was handling this by using devm_kstrdup(). But there is no absolute requirement that wm_adsp_common_init() must be called from probe(), so this was a pseudo-memory leak - each new call to wm_adsp_common_init() would allocate another block of memory but these would only be freed if the owning codec driver was removed. Signed-off-by: Richard Fitzgerald Link: https://msgid.link/r/20240129162737.497-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 2cfca78f0401f..47a4c363227cc 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -740,19 +740,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp, const char *filetype) { struct cs_dsp *cs_dsp = &dsp->cs_dsp; + const char *fwf; char *s, c; int ret = 0; + if (dsp->fwf_name) + fwf = dsp->fwf_name; + else + fwf = dsp->cs_dsp.name; + if (system_name && asoc_component_prefix) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix, filetype); else if (system_name) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name, + *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf, wm_adsp_fw[dsp->fw].file, filetype); if (*filename == NULL) @@ -842,29 +848,18 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", - cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, - system_name, asoc_component_prefix); + cirrus_dir, dsp->part, + dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, + wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); return -ENOENT; } static int wm_adsp_common_init(struct wm_adsp *dsp) { - char *p; - INIT_LIST_HEAD(&dsp->compr_list); INIT_LIST_HEAD(&dsp->buffer_list); - if (!dsp->fwf_name) { - p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL); - if (!p) - return -ENOMEM; - - dsp->fwf_name = p; - for (; *p != 0; ++p) - *p = tolower(*p); - } - return 0; } -- GitLab From dee697ac5330e36f050b4ed0d6f520b8738fe12a Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 1 Feb 2024 14:53:08 +0300 Subject: [PATCH 0317/2290] ALSA: usb-audio: Ignore clock selector errors for single connection [ Upstream commit eaa1b01fe709d6a236a9cec74813e0400601fd23 ] For devices with multiple clock sources connected to a selector, we need to check what a clock selector control request has returned. This is needed to ensure that a requested clock source is indeed selected and for autoclock feature to work. For devices with single clock source connected, if we get an error there is nothing else we can do about it. We can't skip clock selector setup as it is required by some devices. So lets just ignore error in this case. This should fix various buggy Mackie devices: [ 649.109785] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.111946] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.113822] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) There is also interesting info from the Windows documentation [1] (this is probably why manufacturers dont't even test this feature): "The USB Audio 2.0 driver doesn't support clock selection. The driver uses the Clock Source Entity, which is selected by default and never issues a Clock Selector Control SET CUR request." Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/audio/usb-2-0-audio-drivers [1] Link: https://bugzilla.kernel.org/show_bug.cgi?id=217314 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218175 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218342 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240201115308.17838-1-alexander@tsoy.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/clock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 33db334e65566..a676ad093d189 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -328,8 +328,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); - if (err < 0) + if (err < 0) { + if (pins == 1) { + usb_audio_dbg(chip, + "%s(): selector returned an error, " + "assuming a firmware bug, id %d, ret %d\n", + __func__, clock_id, err); + return ret; + } return err; + } } if (!validate || ret > 0 || !chip->autoclock) -- GitLab From 085195aa90a924c79e35569bcdad860d764a8e17 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:01 +0100 Subject: [PATCH 0318/2290] nvme-fc: do not wait in vain when unloading module [ Upstream commit 70fbfc47a392b98e5f8dba70c6efc6839205c982 ] The module exit path has race between deleting all controllers and freeing 'left over IDs'. To prevent double free a synchronization between nvme_delete_ctrl and ida_destroy has been added by the initial commit. There is some logic around trying to prevent from hanging forever in wait_for_completion, though it does not handling all cases. E.g. blktests is able to reproduce the situation where the module unload hangs forever. If we completely rely on the cleanup code executed from the nvme_delete_ctrl path, all IDs will be freed eventually. This makes calling ida_destroy unnecessary. We only have to ensure that all nvme_delete_ctrl code has been executed before we leave nvme_fc_exit_module. This is done by flushing the nvme_delete_wq workqueue. While at it, remove the unused nvme_fc_wq workqueue too. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 47 ++++++------------------------------------ 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 177a365b8ec55..3dbf926fd99fd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3869,10 +3862,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3890,7 +3879,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3914,8 +3903,6 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3935,45 +3922,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); -- GitLab From 16b2b31ba886694d9fc06950a1a230adc4357350 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:02 +0100 Subject: [PATCH 0319/2290] nvmet-fcloop: swap the list_add_tail arguments [ Upstream commit dcfad4ab4d6733f2861cd241d8532a0004fc835a ] The first argument of list_add_tail function is the new element which should be added to the list which is the second argument. Swap the arguments to allow processing more than one element at a time. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fcloop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c780af36c1d4a..f5b8442b653db 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; -- GitLab From 6319ab29d5e01c7a50e9a6503e0555b834905fc8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:03 +0100 Subject: [PATCH 0320/2290] nvmet-fc: release reference on target port [ Upstream commit c691e6d7e13dab81ac8c7489c83b5dea972522a5 ] In case we return early out of __nvmet_fc_finish_ls_req() we still have to release the reference on the target port. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1ab6601fdd5cf..0075d9636b065 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -359,7 +359,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_puttgtport; } list_del(&lsop->lsreq_list); @@ -372,6 +372,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); +out_puttgtport: nvmet_fc_tgtport_put(tgtport); } -- GitLab From b8338116689a5b541738a8bb89457e0c6c4d497e Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:04 +0100 Subject: [PATCH 0321/2290] nvmet-fc: defer cleanup using RCU properly [ Upstream commit 4049dc96b8de7aeb3addcea039446e464726a525 ] When the target executes a disconnect and the host triggers a reconnect immediately, the reconnect command still finds an existing association. The reconnect crashes later on because nvmet_fc_delete_target_assoc blindly removes resources while the reconnect code wants to use it. To address this, nvmet_fc_find_target_assoc should not be able to lookup an association which is being removed. The association list is already under RCU lifetime management, so let's properly use it and remove the association from the list and wait for a grace period before cleaning up all. This means we also can drop the RCU management on the queues, because this is now handled via the association itself. A second step split the execution context so that the initial disconnect command can complete without running the reconnect code in the same context. As usual, this is done by deferring the ->done to a workqueue. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 83 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 0075d9636b065..c9ef642313c8f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -165,7 +165,7 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; struct rcu_head rcu; @@ -802,14 +802,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -831,15 +828,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -852,12 +847,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); kfree_rcu(queue, rcu); @@ -969,7 +960,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1172,13 +1163,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1208,7 +1204,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1217,29 +1213,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); - for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } + synchronize_rcu(); - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + /* ensure all in-flight I/Os have been processed */ + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1492,9 +1480,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1547,9 +1534,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1581,7 +1567,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1593,9 +1579,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1625,6 +1610,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1870,9 +1857,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1887,8 +1871,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1904,6 +1886,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2902,6 +2887,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2933,6 +2921,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); -- GitLab From 67e2ddf2324ad55a555cdfb21802c8edce250372 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:06 +0100 Subject: [PATCH 0322/2290] nvmet-fc: hold reference on hostport match [ Upstream commit ca121a0f7515591dba0eb5532bfa7ace4dc153ce ] The hostport data structure is shared between the association, this why we keep track of the users via a refcount. So we should not decrement the refcount on a match and free the hostport several times. Reported by KASAN. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index c9ef642313c8f..64c26b703860c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1069,8 +1069,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; -- GitLab From 8b9e4539493b4e3f3a2de7ccdf9e75fc962622d9 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:09 +0100 Subject: [PATCH 0323/2290] nvmet-fc: abort command when there is no binding [ Upstream commit 3146345c2e9c2f661527054e402b0cfad80105a4 ] When the target port has not active port binding, there is no point in trying to process the command as it has to fail anyway. Instead adding checks to all commands abort the command early. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 64c26b703860c..b4b2631eb530e 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1101,6 +1101,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) int idx; bool needrandom = true; + if (!tgtport->pe) + return NULL; + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) return NULL; @@ -2523,8 +2526,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); -- GitLab From 9e6987f8937a7bd7516aa52f25cb7e12c0c92ee8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:10 +0100 Subject: [PATCH 0324/2290] nvmet-fc: avoid deadlock on delete association path [ Upstream commit 710c69dbaccdac312e32931abcb8499c1525d397 ] When deleting an association the shutdown path is deadlocking because we try to flush the nvmet_wq nested. Avoid this by deadlock by deferring the put work into its own work item. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index b4b2631eb530e..36cae038eb045 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -248,6 +250,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -359,7 +368,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - goto out_puttgtport; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -372,8 +381,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); -out_puttgtport: - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -1404,6 +1413,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { -- GitLab From f9eef0e495159b83e46bddd9e2409ad81b2f5f96 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:11 +0100 Subject: [PATCH 0325/2290] nvmet-fc: take ref count on tgtport before delete assoc [ Upstream commit fe506a74589326183297d5abdda02d0c76ae5a8b ] We have to ensure that the tgtport is not going away before be have remove all the associations. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 36cae038eb045..8a02ed63b1566 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1092,13 +1092,28 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); } static struct nvmet_fc_tgt_assoc * @@ -1132,7 +1147,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); while (needrandom) { @@ -1491,7 +1506,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); @@ -1545,7 +1560,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1590,7 +1605,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return; } @@ -1897,7 +1912,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return false; -- GitLab From be36276cb88b361c14472dbd74805b786cf1725c Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 29 Jan 2024 21:04:44 -0300 Subject: [PATCH 0326/2290] smb: client: increase number of PDUs allowed in a compound request [ Upstream commit 11d4d1dba3315f73d2d1d386f5bf4811a8241d45 ] With the introduction of SMB2_OP_QUERY_WSL_EA, the client may now send 5 commands in a single compound request in order to query xattrs from potential WSL reparse points, which should be fine as we currently allow up to 5 PDUs in a single compound request. However, if encryption is enabled (e.g. 'seal' mount option) or enforced by the server, current MAX_COMPOUND(5) won't be enough as we require an extra PDU for the transform header. Fix this by increasing MAX_COMPOUND to 7 and, while we're at it, add an WARN_ON_ONCE() and return -EIO instead of -ENOMEM in case we attempt to send a compound request that couldn't include the extra transform header. Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsglob.h | 2 +- fs/smb/client/transport.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7f1aea4c11b9c..58bb54994e22a 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -86,7 +86,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 5 +#define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 97bf46de8e429..df44acaec9ae9 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -456,8 +456,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (!(flags & CIFS_TRANSFORM_REQ)) return __smb_send_rqst(server, num_rqst, rqst); - if (num_rqst > MAX_COMPOUND - 1) - return -ENOMEM; + if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) + return -EIO; if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); -- GitLab From 200627f46e0335cbef76ef52ce5cf9f1e9f3b135 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:02 +0800 Subject: [PATCH 0327/2290] ext4: correct the hole length returned by ext4_map_blocks() [ Upstream commit 6430dea07e85958fa87d0276c0c4388dd51e630b ] In ext4_map_blocks(), if we can't find a range of mapping in the extents cache, we are calling ext4_ext_map_blocks() to search the real path and ext4_ext_determine_hole() to determine the hole range. But if the querying range was partially or completely overlaped by a delalloc extent, we can't find it in the real extent path, so the returned hole length could be incorrect. Fortunately, ext4_ext_put_gap_in_cache() have already handle delalloc extent, but it searches start from the expanded hole_start, doesn't start from the querying range, so the delalloc extent found could not be the one that overlaped the querying range, plus, it also didn't adjust the hole length. Let's just remove ext4_ext_put_gap_in_cache(), handle delalloc and insert adjusted hole extent in ext4_ext_determine_hole(). Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-4-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/extents.c | 111 +++++++++++++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 41 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index aa5aadd70bbc2..67af684e44e6e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode, /* - * ext4_ext_determine_hole - determine hole around given block + * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole @@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode, * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ -static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *lblk) +static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; @@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, return len; } -/* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static void -ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, - ext4_lblk_t hole_len) -{ - struct extent_status es; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, - hole_start + hole_len - 1, &es); - if (es.es_len) { - /* There's delayed extent containing lblock? */ - if (es.es_lblk <= hole_start) - return; - hole_len = min(es.es_lblk - hole_start, hole_len); - } - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, - EXTENT_STATUS_HOLE); -} - /* * ext4_ext_rm_idx: * removes index from the index block. @@ -4064,6 +4040,69 @@ static int get_implied_cluster_alloc(struct super_block *sb, return 0; } +/* + * Determine hole length around the given logical block, first try to + * locate and expand the hole from the given @path, and then adjust it + * if it's partially or completely converted to delayed extents, insert + * it into the extent cache tree if it's indeed a hole, finally return + * the length of the determined extent. + */ +static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t lblk) +{ + ext4_lblk_t hole_start, len; + struct extent_status es; + + hole_start = lblk; + len = ext4_ext_find_hole(inode, path, &hole_start); +again: + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, + hole_start + len - 1, &es); + if (!es.es_len) + goto insert_hole; + + /* + * There's a delalloc extent in the hole, handle it if the delalloc + * extent is in front of, behind and straddle the queried range. + */ + if (lblk >= es.es_lblk + es.es_len) { + /* + * The delalloc extent is in front of the queried range, + * find again from the queried start block. + */ + len -= lblk - hole_start; + hole_start = lblk; + goto again; + } else if (in_range(lblk, es.es_lblk, es.es_len)) { + /* + * The delalloc extent containing lblk, it must have been + * added after ext4_map_blocks() checked the extent status + * tree, adjust the length to the delalloc extent's after + * lblk. + */ + len = es.es_lblk + es.es_len - lblk; + return len; + } else { + /* + * The delalloc extent is partially or completely behind + * the queried range, update hole length until the + * beginning of the delalloc extent. + */ + len = min(es.es_lblk - hole_start, len); + } + +insert_hole: + /* Put just found gap into cache to speed up subsequent requests */ + ext_debug(inode, " -> %u:%u\n", hole_start, len); + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + + /* Update hole_len to reflect hole size after lblk */ + if (hole_start != lblk) + len -= lblk - hole_start; + + return len; +} /* * Block allocation/map/preallocation routine for extents based files @@ -4181,22 +4220,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { - ext4_lblk_t hole_start, hole_len; + ext4_lblk_t len; - hole_start = map->m_lblk; - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); - /* - * put just found gap into cache to speed up - * subsequent requests - */ - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); - /* Update hole_len to reflect hole size after map->m_lblk */ - if (hole_start != map->m_lblk) - hole_len -= map->m_lblk - hole_start; map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, hole_len); - + map->m_len = min_t(unsigned int, map->m_len, len); goto out; } -- GitLab From b3a996b106948ec2f347090b2550adfe0e6d3d4d Mon Sep 17 00:00:00 2001 From: Szilard Fabian Date: Fri, 2 Feb 2024 10:28:59 -0800 Subject: [PATCH 0328/2290] Input: i8042 - add Fujitsu Lifebook U728 to i8042 quirk table [ Upstream commit 4255447ad34c5c3785fcdcf76cfa0271d6e5ed39 ] Another Fujitsu-related patch. In the initial boot stage the integrated keyboard of Fujitsu Lifebook U728 refuses to work and it's not possible to type for example a dm-crypt passphrase without the help of an external keyboard. i8042.nomux kernel parameter resolves this issue but using that a PS/2 mouse is detected. This input device is unused even when the i2c-hid-acpi kernel module is blacklisted making the integrated ELAN touchpad (04F3:3092) not working at all. So this notebook uses a hid-over-i2c touchpad which is managed by the i2c_designware input driver. Since you can't find a PS/2 mouse port on this computer and you can't connect a PS/2 mouse to it even with an official port replicator I think it's safe to not use the PS/2 mouse port at all. Signed-off-by: Szilard Fabian Link: https://lore.kernel.org/r/20240103014717.127307-2-szfabian@bluemarch.art Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index cd45a65e17f2c..dfc6c581873b7 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -634,6 +634,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOAUX) }, + { + /* Fujitsu Lifebook U728 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { /* Gigabyte M912 */ .matches = { -- GitLab From 9c66843606921d4f4a6f6c3275cded2963f36051 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:17:07 +0300 Subject: [PATCH 0329/2290] fs/ntfs3: Modified fix directory element type detection [ Upstream commit 22457c047ed971f2f2e33be593ddfabd9639a409 ] Unfortunately reparse attribute is used for many purposes (several dozens). It is not possible here to know is this name symlink or not. To get exactly the type of name we should to open inode (read mft). getattr for opened file (fstat) correctly returns symlink. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/dir.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index d4d9f4ffb6d9a..c2fb76bb28f47 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ - if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) - dt_type = DT_LNK; - else - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* + * NTFS: symlinks are "dir + reparse" or "file + reparse" + * Unfortunately reparse attribute is used for many purposes (several dozens). + * It is not possible here to know is this name symlink or not. + * To get exactly the type of name we should to open inode (read mft). + * getattr for opened file (fstat) correctly returns symlink. + */ + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + + /* + * It is not reliable to detect the type of name using duplicated information + * stored in parent directory. + * The only correct way to get the type of name - read MFT record and find ATTR_STD. + * The code below is not good idea. + * It does additional locks/reads just to get the type of name. + * Should we use additional mount option to enable branch below? + */ + if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + ino != ni->mi.rno) { + struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); + if (!IS_ERR_OR_NULL(inode)) { + dt_type = fs_umode_to_dtype(inode->i_mode); + iput(inode); + } + } return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } -- GitLab From 1970b5f2048f646d61327cd9685698b96ba31941 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:24:33 +0300 Subject: [PATCH 0330/2290] fs/ntfs3: Improve ntfs_dir_count [ Upstream commit 6a799c928b78b14999b7705c4cca0f88e297fe96 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/dir.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index c2fb76bb28f47..72cdfa8727d3c 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -515,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, struct INDEX_HDR *hdr; const struct ATTR_FILE_NAME *fname; u32 e_size, off, end; - u64 vbo = 0; size_t drs = 0, fles = 0, bit = 0; - loff_t i_size = ni->vfs_inode.i_size; struct indx_node *node = NULL; - u8 index_bits = ni->dir.index_bits; + size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits; if (is_empty) *is_empty = true; @@ -563,7 +561,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, fles += 1; } - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_used_bit(&ni->dir, ni, &bit); @@ -573,8 +571,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, if (bit == MINUS_ONE_T) goto out; - vbo = (u64)bit << index_bits; - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, @@ -584,7 +581,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, hdr = &node->index->ihdr; bit += 1; - vbo = (u64)bit << ni->dir.idx2vbn_bits; } out: -- GitLab From 25d1694d6e34321d3dacccafe85f6b15719a5c62 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:26:31 +0300 Subject: [PATCH 0331/2290] fs/ntfs3: Correct hard links updating when dealing with DOS names [ Upstream commit 1918c10e137eae266b8eb0ab1cc14421dcb0e3e2 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/record.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index ba336c7280b85..ab03c373cec66 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -491,8 +491,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, return false; if (ni && is_attr_indexed(attr)) { - le16_add_cpu(&ni->mi.mrec->hard_links, -1); - ni->mi.dirty = true; + u16 links = le16_to_cpu(ni->mi.mrec->hard_links); + struct ATTR_FILE_NAME *fname = + attr->type != ATTR_NAME ? + NULL : + resident_data_ex(attr, + SIZEOF_ATTRIBUTE_FILENAME); + if (fname && fname->type == FILE_NAME_DOS) { + /* Do not decrease links count deleting DOS name. */ + } else if (!links) { + /* minor error. Not critical. */ + } else { + ni->mi.mrec->hard_links = cpu_to_le16(links - 1); + ni->mi.dirty = true; + } } used -= asize; -- GitLab From a9f7d7656fbcea654a3db7860033cf2667730c00 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:34:24 +0300 Subject: [PATCH 0332/2290] fs/ntfs3: Print warning while fixing hard links count [ Upstream commit 85ba2a75faee759809a7e43b4c103ac59bac1026 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index dc937089a464a..42dd9fdaf4151 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -402,7 +402,6 @@ end_enum: goto out; if (!is_match && name) { - /* Reuse rec as buffer for ascii name. */ err = -ENOENT; goto out; } @@ -417,6 +416,7 @@ end_enum: if (names != le16_to_cpu(rec->hard_links)) { /* Correct minor error on the fly. Do not mark inode as dirty. */ + ntfs_inode_warn(inode, "Correct links count -> %u.", names); rec->hard_links = cpu_to_le16(names); ni->mi.dirty = true; } -- GitLab From 95bad562e575d6bbe623cfd234946573b0a48812 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:47:30 +0300 Subject: [PATCH 0333/2290] fs/ntfs3: Fix detected field-spanning write (size 8) of single field "le->name" [ Upstream commit d155617006ebc172a80d3eb013c4b867f9a8ada4 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/ntfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 0f38d558169a1..8b580515b1d6e 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -517,7 +517,7 @@ struct ATTR_LIST_ENTRY { __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. - __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset. + __le16 name[]; // 0x1A: Just to align. To get real name can use name_off. }; // sizeof(0x20) -- GitLab From 50545eb6cd5f7ff852a01fa29b7372524ef948cc Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 28 Nov 2023 11:09:34 +0300 Subject: [PATCH 0334/2290] fs/ntfs3: Add NULL ptr dereference checking at the end of attr_allocate_frame() [ Upstream commit aaab47f204aaf47838241d57bf8662c8840de60a ] It is preferable to exit through the out: label because internal debugging functions are located there. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrib.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 2215179c925b3..2618bf5a37892 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1658,8 +1658,10 @@ repack: le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) - return -ENOENT; + if (!attr_b) { + err = -ENOENT; + goto out; + } attr = attr_b; le = le_b; @@ -1740,13 +1742,15 @@ ins_ext: ok: run_truncate_around(run, vcn); out: - if (new_valid > data_size) - new_valid = data_size; + if (attr_b) { + if (new_valid > data_size) + new_valid = data_size; - valid_size = le64_to_cpu(attr_b->nres.valid_size); - if (new_valid != valid_size) { - attr_b->nres.valid_size = cpu_to_le64(valid_size); - mi_b->dirty = true; + valid_size = le64_to_cpu(attr_b->nres.valid_size); + if (new_valid != valid_size) { + attr_b->nres.valid_size = cpu_to_le64(valid_size); + mi_b->dirty = true; + } } return err; -- GitLab From ee12c3102027ba3c1ea08ca292dc9d7ccf7c21e4 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 21 Dec 2023 13:59:43 +0300 Subject: [PATCH 0335/2290] fs/ntfs3: Disable ATTR_LIST_ENTRY size check [ Upstream commit 4cdfb6e7bc9c80142d33bf1d4653a73fa678ba56 ] The use of sizeof(struct ATTR_LIST_ENTRY) has been replaced with le_size(0) due to alignment peculiarities on different platforms. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312071005.g6YrbaIe-lkp@intel.com/ Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrlist.c | 8 ++++---- fs/ntfs3/ntfs.h | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 0c6a68e71e7d4..723e49ec83ce7 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, { size_t off; u16 sz; + const unsigned le_min_size = le_size(0); if (!le) { le = ni->attr_list.le; } else { sz = le16_to_cpu(le->size); - if (sz < sizeof(struct ATTR_LIST_ENTRY)) { + if (sz < le_min_size) { /* Impossible 'cause we should not return such le. */ return NULL; } @@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, /* Check boundary. */ off = PtrOffset(ni->attr_list.le, le); - if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) { + if (off + le_min_size > ni->attr_list.size) { /* The regular end of list. */ return NULL; } @@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, sz = le16_to_cpu(le->size); /* Check le for errors. */ - if (sz < sizeof(struct ATTR_LIST_ENTRY) || - off + sz > ni->attr_list.size || + if (sz < le_min_size || off + sz > ni->attr_list.size || sz < le->name_off + le->name_len * sizeof(short)) { return NULL; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 8b580515b1d6e..ba26a465b3091 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -521,8 +521,6 @@ struct ATTR_LIST_ENTRY { }; // sizeof(0x20) -static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20); - static inline u32 le_size(u8 name_len) { return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) + -- GitLab From 1c005ce9934c0b784b8123e2dee7aa2dbf7ebce2 Mon Sep 17 00:00:00 2001 From: Ism Hong Date: Tue, 26 Dec 2023 16:51:41 +0800 Subject: [PATCH 0336/2290] fs/ntfs3: use non-movable memory for ntfs3 MFT buffer cache [ Upstream commit d6d33f03baa43d763fe094ca926eeae7d3421d07 ] Since the buffer cache for ntfs3 metadata is not released until the file system is unmounted, allocating from the movable zone may result in cma allocation failures. This is due to the page still being used by ntfs3, leading to migration failures. To address this, this commit use sb_bread_umovable() instead of sb_bread(). This change prevents allocation from the movable zone, ensuring compatibility with scenarios where the buffer head is not released until unmount. This patch is inspired by commit a8ac900b8163("ext4: use non-movable memory for the ext4 superblock"). The issue is found when playing video files stored in NTFS on the Android TV platform. During this process, the media parser reads the video file, causing ntfs3 to allocate buffer cache from the CMA area. Subsequently, the hardware decoder attempts to allocate memory from the same CMA area. However, the page is still in use by ntfs3, resulting in a migrate failure in alloc_contig_range(). The pinned page and allocating stacktrace reported by page owner shows below: page:ffffffff00b68880 refcount:3 mapcount:0 mapping:ffffff80046aa828 index:0xc0040 pfn:0x20fa4 aops:def_blk_aops ino:0 flags: 0x2020(active|private) page dumped because: migration failure page last allocated via order 0, migratetype Movable, gfp_mask 0x108c48 (GFP_NOFS|__GFP_NOFAIL|__GFP_HARDWALL|__GFP_MOVABLE), page_owner tracks the page as allocated prep_new_page get_page_from_freelist __alloc_pages_nodemask pagecache_get_page __getblk_gfp __bread_gfp ntfs_read_run_nb ntfs_read_bh mi_read ntfs_iget5 dir_search_u ntfs_lookup __lookup_slow lookup_slow walk_component path_lookupat Signed-off-by: Ism Hong Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/ntfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 74482ef569ab7..c9ba0d27601dc 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -1015,7 +1015,7 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) static inline struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) { - struct buffer_head *bh = sb_bread(sb, block); + struct buffer_head *bh = sb_bread_unmovable(sb, block); if (bh) return bh; -- GitLab From 0d2f804b9f5424d1970403d7bfcf1f22b7853e25 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:03:21 +0300 Subject: [PATCH 0337/2290] fs/ntfs3: Prevent generic message "attempt to access beyond end of device" [ Upstream commit 5ca87d01eba7bdfe9536a157ca33c1455bb8d16c ] It used in test environment. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fsntfs.c | 24 ++++++++++++++++++++++++ fs/ntfs3/ntfs_fs.h | 14 +------------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 4b72bc7f12ca3..1eac80d55b554 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -976,6 +976,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes) return cpu_to_le32(hash); } +/* + * simple wrapper for sb_bread_unmovable. + */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct buffer_head *bh; + + if (unlikely(block >= sbi->volume.blocks)) { + /* prevent generic message "attempt to access beyond end of device" */ + ntfs_err(sb, "try to read out of volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; + } + + bh = sb_bread_unmovable(sb, block); + if (bh) + return bh; + + ntfs_err(sb, "failed to read volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; +} + int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) { struct block_device *bdev = sb->s_bdev; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index c9ba0d27601dc..0f9bec29f2b70 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -580,6 +580,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); int log_replay(struct ntfs_inode *ni, bool *initialized); /* Globals from fsntfs.c */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block); bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes); int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple); @@ -1012,19 +1013,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } -static inline struct buffer_head *ntfs_bread(struct super_block *sb, - sector_t block) -{ - struct buffer_head *bh = sb_bread_unmovable(sb, block); - - if (bh) - return bh; - - ntfs_err(sb, "failed to read volume at offset 0x%llx", - (u64)block << sb->s_blocksize_bits); - return NULL; -} - static inline struct ntfs_inode *ntfs_i(struct inode *inode) { return container_of(inode, struct ntfs_inode, vfs_inode); -- GitLab From 32a3974b26dfbb059d15ab8d5683d8b9de647468 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:13:59 +0300 Subject: [PATCH 0338/2290] fs/ntfs3: Correct function is_rst_area_valid [ Upstream commit 1b7dd28e14c4728ae1a815605ca33ffb4ce1b309 ] Reported-by: Robert Morris Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fslog.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 710cb5aa5a65b..d53ef128fa733 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) { const struct RESTART_AREA *ra; u16 cl, fl, ul; - u32 off, l_size, file_dat_bits, file_size_round; + u32 off, l_size, seq_bits; u16 ro = le16_to_cpu(rhdr->ra_off); u32 sys_page = le32_to_cpu(rhdr->sys_page_size); @@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) /* Make sure the sequence number bits match the log file size. */ l_size = le64_to_cpu(ra->l_size); - file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits); - file_size_round = 1u << (file_dat_bits + 3); - if (file_size_round != l_size && - (file_size_round < l_size || (file_size_round / 2) > l_size)) { - return false; + seq_bits = sizeof(u64) * 8 + 3; + while (l_size) { + l_size >>= 1; + seq_bits -= 1; } + if (seq_bits != ra->seq_num_bits) + return false; + /* The log page data offset and record header length must be quad-aligned. */ if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) || !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8)) -- GitLab From 5d67a4ff3dfe19aceb185d4c3912796e2d0ac4e1 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 29 Jan 2024 10:30:09 +0300 Subject: [PATCH 0339/2290] fs/ntfs3: Update inode->i_size after success write into compressed file [ Upstream commit d68968440b1a75dee05cfac7f368f1aa139e1911 ] Reported-by: Giovanni Santini Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index f31c0389a2e7d..14efe46df91ef 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1110,6 +1110,8 @@ out: iocb->ki_pos += written; if (iocb->ki_pos > ni->i_valid) ni->i_valid = iocb->ki_pos; + if (iocb->ki_pos > i_size) + i_size_write(inode, iocb->ki_pos); return written; } -- GitLab From 6ed6cdbe88334ca3430c5aee7754dc4597498dfb Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Sat, 30 Dec 2023 17:00:03 +0800 Subject: [PATCH 0340/2290] fs/ntfs3: Fix oob in ntfs_listxattr [ Upstream commit 731ab1f9828800df871c5a7ab9ffe965317d3f15 ] The length of name cannot exceed the space occupied by ea. Reported-and-tested-by: syzbot+65e940cfb8f99a97aca7@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index df15e00c2a3a0..d98cf7b382bcc 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -217,6 +217,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (!ea->name_len) break; + if (ea->name_len > ea_size) + break; + if (buffer) { /* Check if we can use field ea->name */ if (off + ea_size > size) -- GitLab From 4e5bd2287021823c10862379312648dd55e17f49 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 15:53:55 +0100 Subject: [PATCH 0341/2290] wifi: mac80211: set station RX-NSS on reconfig [ Upstream commit dd6c064cfc3fc18d871107c6f5db8837e88572e4 ] When a station is added/reconfigured by userspace, e.g. a TDLS peer or a SoftAP client STA, rx_nss is currently not always set, so that it might be left zero. Set it up properly. Link: https://msgid.link/20240129155354.98f148a3d654.I193a02155f557ea54dc9d0232da66cf96734119a@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a2c4866080bd7..6cf0b77839d1d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1775,6 +1775,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sband->band); } + ieee80211_sta_set_rx_nss(link_sta); + return ret; } -- GitLab From 2bf17c3e13aab5e1e867508b2db85927810f9f97 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:48:23 +0100 Subject: [PATCH 0342/2290] wifi: mac80211: adding missing drv_mgd_complete_tx() call [ Upstream commit c042600c17d8c490279f0ae2baee29475fe8047d ] There's a call to drv_mgd_prepare_tx() and so there should be one to drv_mgd_complete_tx(), but on this path it's not. Add it. Link: https://msgid.link/20240131164824.2f0922a514e1.I5aac89b93bcead88c374187d70cad0599d29d2c8@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c6f0da028a2a4..f25dc6931a5b1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7294,6 +7294,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } -- GitLab From 700c3f642c32721f246e09d3a9511acf40ae42be Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 2 Feb 2024 10:07:03 -0800 Subject: [PATCH 0343/2290] efi: runtime: Fix potential overflow of soft-reserved region size [ Upstream commit de1034b38a346ef6be25fe8792f5d1e0684d5ff4 ] md_size will have been narrowed if we have >= 4GB worth of pages in a soft-reserved region. Signed-off-by: Andrew Bresticker Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/riscv-runtime.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 7c48c380d722c..1995f0a2e0fc0 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c index d0daacd2c903f..6b142aa35389e 100644 --- a/drivers/firmware/efi/riscv-runtime.c +++ b/drivers/firmware/efi/riscv-runtime.c @@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) -- GitLab From 48a33c312526857dc8850096cc31c53b2d6b7e27 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 2 Feb 2024 10:07:04 -0800 Subject: [PATCH 0344/2290] efi: Don't add memblocks for soft-reserved memory [ Upstream commit 0bcff59ef7a652fcdc6d535554b63278c2406c8f ] Adding memblocks for soft-reserved regions prevents them from later being hotplugged in by dax_kmem. Signed-off-by: Andrew Bresticker Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/efi-init.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index 2fd770b499a35..ff9791ce2e156 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -116,15 +116,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - /* - * Special purpose memory is 'soft reserved', which means it - * is set aside initially, but can be hotplugged back in or - * be assigned to the dax driver after boot. - */ - if (efi_soft_reserve_enabled() && - (md->attribute & EFI_MEMORY_SP)) - return false; - /* * According to the spec, these regions are no longer reserved * after calling ExitBootServices(). However, we can only use @@ -169,6 +160,16 @@ static __init void reserve_regions(void) size = npages << PAGE_SHIFT; if (is_memory(md)) { + /* + * Special purpose memory is 'soft reserved', which + * means it is set aside initially. Don't add a memblock + * for it now so that it can be hotplugged back in or + * be assigned to the dax driver after boot. + */ + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + early_init_dt_add_memory_arch(paddr, size); if (!is_usable_memory(md)) -- GitLab From 76ee44af09755afaa24964bb639730f66685657e Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 2 Feb 2024 17:21:36 +0800 Subject: [PATCH 0345/2290] hwmon: (coretemp) Enlarge per package core count limit [ Upstream commit 34cf8c657cf0365791cdc658ddbca9cc907726ce ] Currently, coretemp driver supports only 128 cores per package. This loses some core temperature information on systems that have more than 128 cores per package. [ 58.685033] coretemp coretemp.0: Adding Core 128 failed [ 58.692009] coretemp coretemp.0: Adding Core 129 failed ... Enlarge the limitation to 512 because there are platforms with more than 256 cores per package. Signed-off-by: Zhang Rui Link: https://lore.kernel.org/r/20240202092144.71180-4-rui.zhang@intel.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/coretemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 59344ad62822d..c0aa6bfa66b24 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -40,7 +40,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); #define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */ #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */ +#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */ #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) -- GitLab From 75745f2b7453f21df39d913ec1e6cfee7887c88d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 20 Dec 2023 17:26:58 +0100 Subject: [PATCH 0346/2290] scsi: lpfc: Use unsigned type for num_sge [ Upstream commit d6c1b19153f92e95e5e1801d540e98771053afae ] LUNs going into "failed ready running" state observed on >1T and on even numbers of size (2T, 4T, 6T, 8T and 10T). The issue occurs when DIF is enabled at the host. The kernel logs: Cannot setup S/G List for HBAIO segs 1/1 SGL 512 SCSI 256: 3 0 The host lpfc driver is failing to setup scatter/gather list (protection data) for the I/Os. The return type lpfc_bg_setup_sgl()/lpfc_bg_setup_sgl_prot() causes the compiler to remove the most significant bit. Use an unsigned type instead. Signed-off-by: Hannes Reinecke [dwagner: added commit message] Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20231220162658.12392-1-dwagner@suse.de Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_scsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 7aac9fc719675..0bb7e164b525f 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1919,7 +1919,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datasegcnt, struct lpfc_io_buf *lpfc_cmd) @@ -1927,8 +1927,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct scatterlist *sgde = NULL; /* s/g data entry */ struct sli4_sge_diseed *diseed = NULL; dma_addr_t physaddr; - int i = 0, num_sge = 0, status; - uint32_t reftag; + int i = 0, status; + uint32_t reftag, num_sge = 0; uint8_t txop, rxop; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t rc; @@ -2100,7 +2100,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datacnt, int protcnt, struct lpfc_io_buf *lpfc_cmd) @@ -2124,8 +2124,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, uint32_t rc; #endif uint32_t checking = 1; - uint32_t dma_offset = 0; - int num_sge = 0, j = 2; + uint32_t dma_offset = 0, num_sge = 0; + int j = 2; struct sli4_hybrid_sgl *sgl_xtra = NULL; sgpe = scsi_prot_sglist(sc); -- GitLab From c7ac9c1f7f934df56fe1c1bb5ac970d543d4dc09 Mon Sep 17 00:00:00 2001 From: SEO HOYOUNG Date: Mon, 22 Jan 2024 17:33:24 +0900 Subject: [PATCH 0347/2290] scsi: ufs: core: Remove the ufshcd_release() in ufshcd_err_handling_prepare() [ Upstream commit 17e94b2585417e04dabc2f13bc03b4665ae687f3 ] If ufshcd_err_handler() is called in a suspend/resume situation, ufs_release() can be called twice and active_reqs end up going negative. This is because ufshcd_err_handling_prepare() and ufshcd_err_handling_unprepare() both call ufshcd_release(). Remove superfluous call to ufshcd_release(). Signed-off-by: SEO HOYOUNG Link: https://lore.kernel.org/r/20240122083324.11797-1-hy50.seo@samsung.com Reviewed-by: Bart Van Assche Reviewed-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 9fd4e9ed93b8b..f3c25467e571f 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6159,7 +6159,6 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) ufshcd_hold(hba, false); if (!ufshcd_is_clkgating_allowed(hba)) ufshcd_setup_clocks(hba, true); - ufshcd_release(hba); pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM; ufshcd_vops_resume(hba, pm_op); } else { -- GitLab From c920f604e0c896aab911efb5913ce89f74b9b69c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0348/2290] LoongArch: Select ARCH_ENABLE_THP_MIGRATION instead of redefining it [ Upstream commit b3ff2d9c3a9c64cd0a011cdd407ffc38a6ea8788 ] ARCH_ENABLE_THP_MIGRATION is supposed to be selected by arch Kconfig. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e737dc8cd660c..b1b4396dbac6c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -9,6 +9,7 @@ config LOONGARCH select ARCH_BINFMT_ELF_STATE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL @@ -495,10 +496,6 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. -config ARCH_ENABLE_THP_MIGRATION - def_bool y - depends on TRANSPARENT_HUGEPAGE - config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -- GitLab From c0b07b4237e42cf952dc72d0f459e86fc3e2e121 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0349/2290] LoongArch: Select HAVE_ARCH_SECCOMP to use the common SECCOMP menu [ Upstream commit 6b79ecd084c99b31c8b4d0beda08893716d5558e ] LoongArch missed the refactoring made by commit 282a181b1a0d ("seccomp: Move config option SECCOMP to arch/Kconfig") because LoongArch was not mainlined at that time. The 'depends on PROC_FS' statement is stale as described in that commit. Select HAVE_ARCH_SECCOMP, and remove the duplicated config entry. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/Kconfig | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index b1b4396dbac6c..fa3171f563274 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -81,6 +81,7 @@ config LOONGARCH select GPIOLIB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE @@ -462,23 +463,6 @@ config PHYSICAL_START specified in the "crashkernel=YM@XM" command line boot parameter passed to the panic-ed kernel). -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - endmenu config ARCH_SELECT_MEMORY_MODEL -- GitLab From 6b82ffe7a265b832b40fd31ee5503170cb3407e2 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 7 Feb 2024 08:01:17 +0900 Subject: [PATCH 0350/2290] firewire: core: send bus reset promptly on gap count error [ Upstream commit 7ed4380009e96d9e9c605e12822e987b35b05648 ] If we are bus manager and the bus has inconsistent gap counts, send a bus reset immediately instead of trying to read the root node's config ROM first. Otherwise, we could spend a lot of time trying to read the config ROM but never succeeding. This eliminates a 50+ second delay before the FireWire bus is usable after a newly connected device is powered on in certain circumstances. The delay occurs if a gap count inconsistency occurs, we are not the root node, and we become bus manager. One scenario that causes this is with a TI XIO2213B OHCI, the first time a Sony DSR-25 is powered on after being connected to the FireWire cable. In this configuration, the Linux box will not receive the initial PHY configuration packet sent by the DSR-25 as IRM, resulting in the DSR-25 having a gap count of 44 while the Linux box has a gap count of 63. FireWire devices have a gap count parameter, which is set to 63 on power-up and can be changed with a PHY configuration packet. This determines the duration of the subaction and arbitration gaps. For reliable communication, all nodes on a FireWire bus must have the same gap count. A node may have zero or more of the following roles: root node, bus manager (BM), isochronous resource manager (IRM), and cycle master. Unless a root node was forced with a PHY configuration packet, any node might become root node after a bus reset. Only the root node can become cycle master. If the root node is not cycle master capable, the BM or IRM should force a change of root node. After a bus reset, each node sends a self-ID packet, which contains its current gap count. A single bus reset does not change the gap count, but two bus resets in a row will set the gap count to 63. Because a consistent gap count is required for reliable communication, IEEE 1394a-2000 requires that the bus manager generate a bus reset if it detects that the gap count is inconsistent. When the gap count is inconsistent, build_tree() will notice this after the self identification process. It will set card->gap_count to the invalid value 0. If we become bus master, this will force bm_work() to send a bus reset when it performs gap count optimization. After a bus reset, there is no bus manager. We will almost always try to become bus manager. Once we become bus manager, we will first determine whether the root node is cycle master capable. Then, we will determine if the gap count should be changed. If either the root node or the gap count should be changed, we will generate a bus reset. To determine if the root node is cycle master capable, we read its configuration ROM. bm_work() will wait until we have finished trying to read the configuration ROM. However, an inconsistent gap count can make this take a long time. read_config_rom() will read the first few quadlets from the config ROM. Due to the gap count inconsistency, eventually one of the reads will time out. When read_config_rom() fails, fw_device_init() calls it again until MAX_RETRIES is reached. This takes 50+ seconds. Once we give up trying to read the configuration ROM, bm_work() will wake up, assume that the root node is not cycle master capable, and do a bus reset. Hopefully, this will resolve the gap count inconsistency. This change makes bm_work() check for an inconsistent gap count before waiting for the root node's configuration ROM. If the gap count is inconsistent, bm_work() will immediately do a bus reset. This eliminates the 50+ second delay and rapidly brings the bus to a working state. I considered that if the gap count is inconsistent, a PHY configuration packet might not be successful, so it could be desirable to skip the PHY configuration packet before the bus reset in this case. However, IEEE 1394a-2000 and IEEE 1394-2008 say that the bus manager may transmit a PHY configuration packet before a bus reset when correcting a gap count error. Since the standard endorses this, I decided it's safe to retain the PHY configuration packet transmission. Normally, after a topology change, we will reset the bus a maximum of 5 times to change the root node and perform gap count optimization. However, if there is a gap count inconsistency, we must always generate a bus reset. Otherwise the gap count inconsistency will persist and communication will be unreliable. For that reason, if there is a gap count inconstency, we generate a bus reset even if we already reached the 5 reset limit. Signed-off-by: Adam Goldman Reference: https://sourceforge.net/p/linux1394/mailman/message/58727806/ Signed-off-by: Takashi Sakamoto Signed-off-by: Sasha Levin --- drivers/firewire/core-card.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 6ac5ff20a2fe2..8aaa7fcb2630d 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work) */ card->bm_generation = generation; - if (root_device == NULL) { + if (card->gap_count == 0) { + /* + * If self IDs have inconsistent gap counts, do a + * bus reset ASAP. The config rom read might never + * complete, so don't wait for it. However, still + * send a PHY configuration packet prior to the + * bus reset. The PHY configuration packet might + * fail, but 1394-2008 8.4.5.2 explicitly permits + * it in this case, so it should be safe to try. + */ + new_root_id = local_id; + /* + * We must always send a bus reset if the gap count + * is inconsistent, so bypass the 5-reset limit. + */ + card->bm_retries = 0; + } else if (root_device == NULL) { /* * Either link_on is false, or we failed to read the * config rom. In either case, pick another root. -- GitLab From d56edd0f1b3013e0a452c53cb476b6b7b39210a1 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 16 Jan 2024 19:10:45 +0800 Subject: [PATCH 0351/2290] drm/amdgpu: skip to program GFXDEC registers for suspend abort [ Upstream commit 93bafa32a6918154aa0caf9f66679a32c2431357 ] In the suspend abort cases, the gfx power rail doesn't turn off so some GFXDEC registers/CSB can't reset to default value and at this moment reinitialize GFXDEC/CSB will result in an unexpected error. So let skip those program sequence for the suspend abort case. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c46c6fbd235e8..e636c7850f777 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -999,6 +999,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b9983ca99eb7d..f24c3a20e901d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2414,6 +2414,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2428,6 +2429,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 84ca601f7d5f3..195b298923543 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3064,6 +3064,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); + /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); -- GitLab From 8b661fb17bfad9e2b72ed713c86a52829f9b0473 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 17 Jan 2024 13:39:37 +0800 Subject: [PATCH 0352/2290] drm/amdgpu: reset gpu for s3 suspend abort case [ Upstream commit 6ef82ac664bb9568ca3956e0d9c9c478e25077ff ] In the s3 suspend abort case some type of gfx9 power rail not turn off from FCH side and this will put the GPU in an unknown power status, so let's reset the gpu to a known good power state before reinitialize gpu device. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 811dd3ea63620..489c89465c78b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1285,10 +1285,32 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); } -- GitLab From 0947d0d463d4e6fad75f3a3066613cb3d9689b26 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 2 Feb 2024 12:38:24 -0300 Subject: [PATCH 0353/2290] smb: client: set correct d_type for reparse points under DFS mounts [ Upstream commit 55c7788c37242702868bfac7861cdf0c358d6c3d ] Send query dir requests with an info level of SMB_FIND_FILE_FULL_DIRECTORY_INFO rather than SMB_FIND_FILE_DIRECTORY_INFO when the client is generating its own inode numbers (e.g. noserverino) so that reparse tags still can be parsed directly from the responses, but server won't send UniqueId (server inode number) Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/readdir.c | 15 ++++++++------- fs/smb/client/smb2pdu.c | 6 ++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 2d75ba5aaa8ad..5990bdbae598f 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -304,14 +304,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, } static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr, - SEARCH_ID_FULL_DIR_INFO *info, + const void *info, struct cifs_sb_info *cifs_sb) { + const FILE_FULL_DIRECTORY_INFO *di = info; + __dir_info_to_fattr(fattr, info); - /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */ + /* See MS-FSCC 2.4.14, 2.4.19 */ if (fattr->cf_cifsattrs & ATTR_REPARSE) - fattr->cf_cifstag = le32_to_cpu(info->EaSize); + fattr->cf_cifstag = le32_to_cpu(di->EaSize); cifs_fill_common_info(fattr, cifs_sb); } @@ -425,7 +427,7 @@ ffirst_retry: } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; } else /* not srvinos - BB fixme add check for backlevel? */ { - cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; + cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; } search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; @@ -1019,10 +1021,9 @@ static int cifs_filldir(char *find_entry, struct file *file, (FIND_FILE_STANDARD_INFO *)find_entry, cifs_sb); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: case SMB_FIND_FILE_ID_FULL_DIR_INFO: - cifs_fulldir_info_to_fattr(&fattr, - (SEARCH_ID_FULL_DIR_INFO *)find_entry, - cifs_sb); + cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb); break; default: cifs_dir_info_to_fattr(&fattr, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c1fc1651d8b69..4c1231496a725 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -5010,6 +5010,9 @@ int SMB2_query_directory_init(const unsigned int xid, case SMB_FIND_FILE_POSIX_INFO: req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO; break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION; + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", info_level); @@ -5079,6 +5082,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon, /* note that posix payload are variable size */ info_buf_size = sizeof(struct smb2_posix_info); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO); + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", srch_inf->info_level); -- GitLab From db48acce75d73dfe51c43d56893cce067b73cf08 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 29 Jan 2024 16:52:50 +0800 Subject: [PATCH 0354/2290] virtio-blk: Ensure no requests in virtqueues before deleting vqs. [ Upstream commit 4ce6e2db00de8103a0687fb0f65fd17124a51aaa ] Ensure no remaining requests in virtqueues before resetting vdev and deleting virtqueues. Otherwise these requests will never be completed. It may cause the system to become unresponsive. Function blk_mq_quiesce_queue() can ensure that requests have become in_flight status, but it cannot guarantee that requests have been processed by the device. Virtqueues should never be deleted before all requests become complete status. Function blk_mq_freeze_queue() ensure that all requests in virtqueues become complete status. And no requests can enter in virtqueues. Signed-off-by: Yi Sun Reviewed-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20240129085250.1550594-1-yi.sun@unisoc.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/virtio_blk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3124837aa406f..505026f0025c7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1206,14 +1206,15 @@ static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + /* Ensure no requests in virtqueues before deleting vqs. */ + blk_mq_freeze_queue(vblk->disk->queue); + /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); - blk_mq_quiesce_queue(vblk->disk->queue); - vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -1231,7 +1232,7 @@ static int virtblk_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - blk_mq_unquiesce_queue(vblk->disk->queue); + blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif -- GitLab From 4dd73641d7ac7273751a893c7445f9d2dbcec2cb Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 6 Feb 2024 23:57:18 -0600 Subject: [PATCH 0355/2290] smb3: clarify mount warning [ Upstream commit a5cc98eba2592d6e3c5a4351319595ddde2a5901 ] When a user tries to use the "sec=krb5p" mount parameter to encrypt data on connection to a server (when authenticating with Kerberos), we indicate that it is not supported, but do not note the equivalent recommended mount parameter ("sec=krb5,seal") which turns on encryption for that mount (and uses Kerberos for auth). Update the warning message. Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index f4818599c00a2..4d5302b58b534 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -209,7 +209,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c switch (match_token(value, cifs_secflavor_tokens, args)) { case Opt_sec_krb5p: - cifs_errorf(fc, "sec=krb5p is not supported!\n"); + cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n"); return 1; case Opt_sec_krb5i: ctx->sign = true; -- GitLab From 339ddc983bc1622341d95f244c361cda3da3a4ff Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 25 Dec 2023 15:36:15 +0200 Subject: [PATCH 0356/2290] pmdomain: mediatek: fix race conditions with genpd [ Upstream commit c41336f4d69057cbf88fed47951379b384540df5 ] If the power domains are registered first with genpd and *after that* the driver attempts to power them on in the probe sequence, then it is possible that a race condition occurs if genpd tries to power them on in the same time. The same is valid for powering them off before unregistering them from genpd. Attempt to fix race conditions by first removing the domains from genpd and *after that* powering down domains. Also first power up the domains and *after that* register them to genpd. Fixes: 59b644b01cf4 ("soc: mediatek: Add MediaTek SCPSYS power domains") Signed-off-by: Eugen Hristev Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231225133615.78993-1-eugen.hristev@collabora.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/soc/mediatek/mtk-pm-domains.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c index 474b272f9b02d..832adb570b501 100644 --- a/drivers/soc/mediatek/mtk-pm-domains.c +++ b/drivers/soc/mediatek/mtk-pm-domains.c @@ -499,6 +499,11 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren goto err_put_node; } + /* recursive call to add all subdomains */ + ret = scpsys_add_subdomain(scpsys, child); + if (ret) + goto err_put_node; + ret = pm_genpd_add_subdomain(parent_pd, child_pd); if (ret) { dev_err(scpsys->dev, "failed to add %s subdomain to parent %s\n", @@ -508,11 +513,6 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren dev_dbg(scpsys->dev, "%s add subdomain: %s\n", parent_pd->name, child_pd->name); } - - /* recursive call to add all subdomains */ - ret = scpsys_add_subdomain(scpsys, child); - if (ret) - goto err_put_node; } return 0; @@ -526,9 +526,6 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) { int ret; - if (scpsys_domain_is_on(pd)) - scpsys_power_off(&pd->genpd); - /* * We're in the error cleanup already, so we only complain, * but won't emit another error on top of the original one. @@ -538,6 +535,8 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) dev_err(pd->scpsys->dev, "failed to remove domain '%s' : %d - state may be inconsistent\n", pd->genpd.name, ret); + if (scpsys_domain_is_on(pd)) + scpsys_power_off(&pd->genpd); clk_bulk_put(pd->num_clks, pd->clks); clk_bulk_put(pd->num_subsys_clks, pd->subsys_clks); -- GitLab From 4aa4ea70f37c18d5c2db347858327efea20ade18 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 12 Jan 2024 17:33:55 +0100 Subject: [PATCH 0357/2290] pmdomain: renesas: r8a77980-sysc: CR7 must be always on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f0e4a1356466ec1858ae8e5c70bea2ce5e55008b ] The power domain containing the Cortex-R7 CPU core on the R-Car V3H SoC must always be in power-on state, unlike on other SoCs in the R-Car Gen3 family. See Table 9.4 "Power domains" in the R-Car Series, 3rd Generation Hardware User’s Manual Rev.1.00 and later. Fix this by marking the domain as a CPU domain without control registers, so the driver will not touch it. Fixes: 41d6d8bd8ae9 ("soc: renesas: rcar-sysc: add R8A77980 support") Signed-off-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/fdad9a86132d53ecddf72b734dac406915c4edc0.1705076735.git.geert+renesas@glider.be Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/soc/renesas/r8a77980-sysc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/renesas/r8a77980-sysc.c b/drivers/soc/renesas/r8a77980-sysc.c index 39ca84a67daad..621e411fc9991 100644 --- a/drivers/soc/renesas/r8a77980-sysc.c +++ b/drivers/soc/renesas/r8a77980-sysc.c @@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77980_areas[] __initconst = { PD_CPU_NOCR }, { "ca53-cpu3", 0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU, PD_CPU_NOCR }, - { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON }, + { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON, + PD_CPU_NOCR }, { "a3ir", 0x180, 0, R8A77980_PD_A3IR, R8A77980_PD_ALWAYS_ON }, { "a2ir0", 0x400, 0, R8A77980_PD_A2IR0, R8A77980_PD_A3IR }, { "a2ir1", 0x400, 1, R8A77980_PD_A2IR1, R8A77980_PD_A3IR }, -- GitLab From 52dc9a7a573dbf778625a0efca0fca55489f084b Mon Sep 17 00:00:00 2001 From: Daniel Vacek Date: Thu, 1 Feb 2024 09:10:08 +0100 Subject: [PATCH 0358/2290] IB/hfi1: Fix sdma.h tx->num_descs off-by-one error commit e6f57c6881916df39db7d95981a8ad2b9c3458d6 upstream. Unfortunately the commit `fd8958efe877` introduced another error causing the `descs` array to overflow. This reults in further crashes easily reproducible by `sendmsg` system call. [ 1080.836473] general protection fault, probably for non-canonical address 0x400300015528b00a: 0000 [#1] PREEMPT SMP PTI [ 1080.869326] RIP: 0010:hfi1_ipoib_build_ib_tx_headers.constprop.0+0xe1/0x2b0 [hfi1] -- [ 1080.974535] Call Trace: [ 1080.976990] [ 1081.021929] hfi1_ipoib_send_dma_common+0x7a/0x2e0 [hfi1] [ 1081.027364] hfi1_ipoib_send_dma_list+0x62/0x270 [hfi1] [ 1081.032633] hfi1_ipoib_send+0x112/0x300 [hfi1] [ 1081.042001] ipoib_start_xmit+0x2a9/0x2d0 [ib_ipoib] [ 1081.046978] dev_hard_start_xmit+0xc4/0x210 -- [ 1081.148347] __sys_sendmsg+0x59/0xa0 crash> ipoib_txreq 0xffff9cfeba229f00 struct ipoib_txreq { txreq = { list = { next = 0xffff9cfeba229f00, prev = 0xffff9cfeba229f00 }, descp = 0xffff9cfeba229f40, coalesce_buf = 0x0, wait = 0xffff9cfea4e69a48, complete = 0xffffffffc0fe0760 , packet_len = 0x46d, tlen = 0x0, num_desc = 0x0, desc_limit = 0x6, next_descq_idx = 0x45c, coalesce_idx = 0x0, flags = 0x0, descs = {{ qw = {0x8024000120dffb00, 0x4} # SDMA_DESC0_FIRST_DESC_FLAG (bit 63) }, { qw = { 0x3800014231b108, 0x4} }, { qw = { 0x310000e4ee0fcf0, 0x8} }, { qw = { 0x3000012e9f8000, 0x8} }, { qw = { 0x59000dfb9d0000, 0x8} }, { qw = { 0x78000e02e40000, 0x8} }} }, sdma_hdr = 0x400300015528b000, <<< invalid pointer in the tx request structure sdma_status = 0x0, SDMA_DESC0_LAST_DESC_FLAG (bit 62) complete = 0x0, priv = 0x0, txq = 0xffff9cfea4e69880, skb = 0xffff9d099809f400 } If an SDMA send consists of exactly 6 descriptors and requires dword padding (in the 7th descriptor), the sdma_txreq descriptor array is not properly expanded and the packet will overflow into the container structure. This results in a panic when the send completion runs. The exact panic varies depending on what elements of the container structure get corrupted. The fix is to use the correct expression in _pad_sdma_tx_descs() to test the need to expand the descriptor array. With this patch the crashes are no longer reproducible and the machine is stable. Fixes: fd8958efe877 ("IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors") Cc: stable@vger.kernel.org Reported-by: Mats Kronberg Tested-by: Mats Kronberg Signed-off-by: Daniel Vacek Link: https://lore.kernel.org/r/20240201081009.1109442-1-neelx@redhat.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 26c62162759ba..969c5c3ab859e 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; - if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) { + if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { __sdma_txclean(dd, tx); -- GitLab From a262b78dd085dbe9b3c75dc1d9c4cd102b110b53 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 0359/2290] LoongArch: Disable IRQ before init_fn() for nonboot CPUs commit 1001db6c42e4012b55e5ee19405490f23e033b5a upstream. Disable IRQ before init_fn() for nonboot CPUs when hotplug, in order to silence such warnings (and also avoid potential errors due to unexpected interrupts): WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4503 rcu_cpu_starting+0x214/0x280 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 pc 90000000048e3334 ra 90000000047bd56c tp 900000010039c000 sp 900000010039fdd0 a0 0000000000000001 a1 0000000000000006 a2 900000000802c040 a3 0000000000000000 a4 0000000000000001 a5 0000000000000004 a6 0000000000000000 a7 90000000048e3f4c t0 0000000000000001 t1 9000000005c70968 t2 0000000004000000 t3 000000000005e56e t4 00000000000002e4 t5 0000000000001000 t6 ffffffff80000000 t7 0000000000040000 t8 9000000007931638 u0 0000000000000006 s9 0000000000000004 s0 0000000000000001 s1 9000000006356ac0 s2 9000000007244000 s3 0000000000000001 s4 0000000000000001 s5 900000000636f000 s6 7fffffffffffffff s7 9000000002123940 s8 9000000001ca55f8 ra: 90000000047bd56c tlb_init+0x24c/0x528 ERA: 90000000048e3334 rcu_cpu_starting+0x214/0x280 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000000 (PPLV0 -PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071000 (LIE=12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 Stack : 0000000000000000 9000000006375000 9000000005b61878 900000010039c000 900000010039fa30 0000000000000000 900000010039fa38 900000000619a140 9000000006456888 9000000006456880 900000010039f950 0000000000000001 0000000000000001 cb0cb028ec7e52e1 0000000002b90000 9000000100348700 0000000000000000 0000000000000001 ffffffff916d12f1 0000000000000003 0000000000040000 9000000007930370 0000000002b90000 0000000000000004 9000000006366000 900000000619a140 0000000000000000 0000000000000004 0000000000000000 0000000000000009 ffffffffffc681f2 9000000002123940 9000000001ca55f8 9000000006366000 90000000047a4828 00007ffff057ded8 00000000000000b0 0000000000000000 0000000000000000 0000000000071000 ... Call Trace: [<90000000047a4828>] show_stack+0x48/0x1a0 [<9000000005b61874>] dump_stack_lvl+0x84/0xcc [<90000000047f60ac>] __warn+0x8c/0x1e0 [<9000000005b0ab34>] report_bug+0x1b4/0x280 [<9000000005b63110>] do_bp+0x2d0/0x480 [<90000000047a2e20>] handle_bp+0x120/0x1c0 [<90000000048e3334>] rcu_cpu_starting+0x214/0x280 [<90000000047bd568>] tlb_init+0x248/0x528 [<90000000047a4c44>] per_cpu_trap_init+0x124/0x160 [<90000000047a19f4>] cpu_probe+0x494/0xa00 [<90000000047b551c>] start_secondary+0x3c/0xc0 [<9000000005b66134>] smpboot_entry+0x50/0x58 Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index e0404df2c952f..18a2b37f4aea3 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -297,6 +297,7 @@ void play_dead(void) addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); } while (addr == 0); + local_irq_disable(); init_fn = (void *)TO_CACHE(addr); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); -- GitLab From 47647795a630e74a2a727dfc0fe362b30cbf8905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 21 Feb 2024 08:33:24 +0100 Subject: [PATCH 0360/2290] drm/ttm: Fix an invalid freeing on already freed page in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 40510a941d27d405a82dc3320823d875f94625df upstream. If caching mode change fails due to, for example, OOM we free the allocated pages in a two-step process. First the pages for which the caching change has already succeeded. Secondly the pages for which a caching change did not succeed. However the second step was incorrectly freeing the pages already freed in the first step. Fix. Signed-off-by: Thomas Hellström Fixes: 379989e7cbdc ("drm/ttm/pool: Fix ttm_pool_alloc error path") Cc: Christian König Cc: Dave Airlie Cc: Christian Koenig Cc: Huang Rui Cc: dri-devel@lists.freedesktop.org Cc: # v6.4+ Reviewed-by: Matthew Auld Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240221073324.3303-1-thomas.hellstrom@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 86affe987a1cb..393b97b4a991f 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -383,7 +383,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, enum ttm_caching caching, pgoff_t start_page, pgoff_t end_page) { - struct page **pages = tt->pages; + struct page **pages = &tt->pages[start_page]; unsigned int order; pgoff_t i, nr; -- GitLab From 65c5a1ba2c32dc90bf112c11151bf912d317906d Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 14 Feb 2024 16:06:28 +0100 Subject: [PATCH 0361/2290] s390/cio: fix invalid -EBUSY on ccw_device_start commit 5ef1dc40ffa6a6cb968b0fdc43c3a61727a9e950 upstream. The s390 common I/O layer (CIO) returns an unexpected -EBUSY return code when drivers try to start I/O while a path-verification (PV) process is pending. This can lead to failed device initialization attempts with symptoms like broken network connectivity after boot. Fix this by replacing the -EBUSY return code with a deferred condition code 1 reply to make path-verification handling consistent from a driver's point of view. The problem can be reproduced semi-regularly using the following process, while repeating steps 2-3 as necessary (example assumes an OSA device with bus-IDs 0.0.a000-0.0.a002 on CHPID 0.02): 1. echo 0.0.a000,0.0.a001,0.0.a002 >/sys/bus/ccwgroup/drivers/qeth/group 2. echo 0 > /sys/bus/ccwgroup/devices/0.0.a000/online 3. echo 1 > /sys/bus/ccwgroup/devices/0.0.a000/online ; \ echo on > /sys/devices/css0/chp0.02/status Background information: The common I/O layer starts path-verification I/Os when it receives indications about changes in a device path's availability. This occurs for example when hardware events indicate a change in channel-path status, or when a manual operation such as a CHPID vary or configure operation is performed. If a driver attempts to start I/O while a PV is running, CIO reports a successful I/O start (ccw_device_start() return code 0). Then, after completion of PV, CIO synthesizes an interrupt response that indicates an asynchronous status condition that prevented the start of the I/O (deferred condition code 1). If a PV indication arrives while a device is busy with driver-owned I/O, PV is delayed until after I/O completion was reported to the driver's interrupt handler. To ensure that PV can be started eventually, CIO reports a device busy condition (ccw_device_start() return code -EBUSY) if a driver tries to start another I/O while PV is pending. In some cases this -EBUSY return code causes device drivers to consider a device not operational, resulting in failed device initialization. Note: The code that introduced the problem was added in 2003. Symptoms started appearing with the following CIO commit that causes a PV indication when a device is removed from the cio_ignore list after the associated parent subchannel device was probed, but before online processing of the CCW device has started: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") During boot, the cio_ignore list is modified by the cio_ignore dracut module [1] as well as Linux vendor-specific systemd service scripts[2]. When combined, this commit and boot scripts cause a frequent occurrence of the problem during boot. [1] https://github.com/dracutdevs/dracut/tree/master/modules.d/81cio_ignore [2] https://github.com/SUSE/s390-tools/blob/master/cio_ignore.service Cc: stable@vger.kernel.org # v5.15+ Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Tested-By: Thorsten Winkler Reviewed-by: Thorsten Winkler Signed-off-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/device_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index c533d1dadc6bb..a5dba3829769c 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, return -EINVAL; if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; - if (cdev->private->state == DEV_STATE_VERIFY) { + if (cdev->private->state == DEV_STATE_VERIFY || + cdev->private->flags.doverify) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_CMD_IRB; @@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, } if (cdev->private->state != DEV_STATE_ONLINE || ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) && - !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) || - cdev->private->flags.doverify) + !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS))) return -EBUSY; ret = cio_set_options (sch, flags); if (ret) -- GitLab From 287abdcb9e2b0836716655908d8f4cccd0925da8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Jan 2024 20:51:22 +0900 Subject: [PATCH 0362/2290] ata: libata-core: Do not try to set sleeping devices to standby commit 4b085736e44dbbe69b5eea1a8a294f404678a1f4 upstream. In ata ata_dev_power_set_standby(), check that the target device is not sleeping. If it is, there is no need to do anything. Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fa2fc1953fc26..f14e56a5cff6b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2005,6 +2005,10 @@ void ata_dev_power_set_active(struct ata_device *dev) struct ata_taskfile tf; unsigned int err_mask; + /* If the device is already sleeping, do nothing. */ + if (dev->flags & ATA_DFLAG_SLEEPING) + return; + /* * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only * if supported by the device. -- GitLab From 5583552eec30eb58acac404948e50609b070cb98 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:31:11 +0100 Subject: [PATCH 0363/2290] dm-crypt: recheck the integrity tag after a failure commit 42e15d12070b4ff9af2b980f1b65774c2dab0507 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-crypt reports an authentication error [1]. The error is reported in a log and it may cause RAID leg being kicked out of the array. This commit fixes dm-crypt, so that if integrity verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the integrity tag is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an integrity error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 89 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0e6068ee783e7..cac9b609c63a7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -61,6 +61,8 @@ struct convert_context { struct skcipher_request *req; struct aead_request *req_aead; } r; + bool aead_recheck; + bool aead_failed; }; @@ -81,6 +83,8 @@ struct dm_crypt_io { blk_status_t error; sector_t sector; + struct bvec_iter saved_bi_iter; + struct rb_node rb_node; } CRYPTO_MINALIGN_ATTR; @@ -1365,10 +1369,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { sector_t s = le64_to_cpu(*sector); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -1724,6 +1731,8 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; + io->ctx.aead_recheck = false; + io->ctx.aead_failed = false; io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; @@ -1735,6 +1744,8 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_queue_read(struct dm_crypt_io *io); + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1748,6 +1759,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && + cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { + io->ctx.aead_recheck = true; + io->ctx.aead_failed = false; + io->error = 0; + kcryptd_queue_read(io); + return; + } + if (io->ctx.r.req) crypt_free_req(cc, io->ctx.r.req, base_bio); @@ -1783,15 +1803,19 @@ static void crypt_endio(struct bio *clone) struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned int rw = bio_data_dir(clone); - blk_status_t error; + blk_status_t error = clone->bi_status; + + if (io->ctx.aead_recheck && !error) { + kcryptd_queue_crypt(io); + return; + } /* * free the processed pages */ - if (rw == WRITE) + if (rw == WRITE || io->ctx.aead_recheck) crypt_free_buffer_pages(cc, clone); - error = clone->bi_status; bio_put(clone); if (rw == READ && !error) { @@ -1812,6 +1836,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) struct crypt_config *cc = io->cc; struct bio *clone; + if (io->ctx.aead_recheck) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return 1; + crypt_inc_pending(io); + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + crypt_dec_pending(io); + return 1; + } + clone->bi_iter.bi_sector = cc->start + io->sector; + crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); + io->saved_bi_iter = clone->bi_iter; + dm_submit_bio_remap(io->base_bio, clone); + return 0; + } + /* * We need the original biovec array in order to decrypt the whole bio * data *afterwards* -- thanks to immutable biovecs we don't need to @@ -2074,6 +2114,14 @@ dec: static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { + if (io->ctx.aead_recheck) { + if (!io->error) { + io->ctx.bio_in->bi_iter = io->saved_bi_iter; + bio_copy_data(io->base_bio, io->ctx.bio_in); + } + crypt_free_buffer_pages(io->cc, io->ctx.bio_in); + bio_put(io->ctx.bio_in); + } crypt_dec_pending(io); } @@ -2103,11 +2151,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, - io->sector); + if (io->ctx.aead_recheck) { + io->ctx.cc_sector = io->sector + cc->iv_offset; + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } else { + crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, + io->sector); - r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } /* * Crypto API backlogged the request, because its queue was full * and we're in softirq context, so continue from a workqueue @@ -2150,10 +2204,13 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (error == -EBADMSG) { sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -3079,7 +3136,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); - } else if (strcasecmp(sval, "none")) { + } else if (strcasecmp(sval, "none")) { ti->error = "Unknown integrity profile"; return -EINVAL; } -- GitLab From 6437b0b4ddcd258b400baec9d4601ba9e19e3faf Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 5 Feb 2024 10:39:20 +0100 Subject: [PATCH 0364/2290] Revert "parisc: Only list existing CPUs in cpu_possible_mask" commit 82b143aeb169b8b55798d7d2063032e1a6ceeeb0 upstream. This reverts commit 0921244f6f4f0d05698b953fe632a99b38907226. It broke CPU hotplugging because it modifies the __cpu_possible_mask after bootup, so that it will be different than nr_cpu_ids, which then effictively breaks the workqueue setup code and triggers crashes when shutting down CPUs at runtime. Guenter was the first who noticed the wrong values in __cpu_possible_mask, since the cpumask Kunit tests were failig. Reverting this commit fixes both issues, but sadly brings back this uncritical runtime warning: register_cpu_capacity_sysctl: too early to get CPU4 device! Signed-off-by: Helge Deller Reported-by: Guenter Roeck Link: https://lkml.org/lkml/2024/2/4/146 Link: https://lore.kernel.org/lkml/Zb0mbHlIud_bqftx@slm.duckdns.org/t/ Cc: stable@vger.kernel.org # 6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/processor.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 1f6c776d80813..c67883487ecd3 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -171,7 +171,6 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; - set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -466,13 +465,6 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { - unsigned int cpu; - reset_cpu_topology(); - - /* reset possible mask. We will mark those which are possible. */ - for_each_possible_cpu(cpu) - set_cpu_possible(cpu, false); - register_parisc_driver(&cpu_driver); } -- GitLab From 906414f4596469004632de29126c55751ed82c5e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:27:39 +0100 Subject: [PATCH 0365/2290] dm-integrity: recheck the integrity tag after a failure commit c88f5e553fe38b2ffc4c33d08654e5281b297677 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-integrity reports an error [1]. The error is reported in a log and it may cause RAID leg being kicked out of the array. This commit fixes dm-integrity, so that if integrity verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the integrity tag is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an integrity error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 93 +++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 77fcff82c82ac..10b01b2adc679 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -279,6 +279,8 @@ struct dm_integrity_c { atomic64_t number_of_mismatches; + mempool_t recheck_pool; + struct notifier_block reboot_notifier; }; @@ -1699,6 +1701,79 @@ failed: get_random_bytes(result, ic->tag_size); } +static void integrity_recheck(struct dm_integrity_io *dio) +{ + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct dm_integrity_c *ic = dio->ic; + struct bvec_iter iter; + struct bio_vec bv; + sector_t sector, logical_sector, area, offset; + char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + struct page *page; + void *buffer; + + get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); + dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, + &dio->metadata_offset); + sector = get_data_sector(ic, area, offset); + logical_sector = dio->range.logical_sector; + + page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + unsigned pos = 0; + + do { + char *mem; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = sector; + io_loc.count = ic->sectors_per_block; + + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + integrity_sector_checksum(ic, logical_sector, buffer, + checksum_onstack); + r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, + &dio->metadata_offset, ic->tag_size, TAG_CMP); + if (r) { + if (r > 0) { + DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", + bio->bi_bdev, logical_sector); + atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, logical_sector, 0); + r = -EILSEQ; + } + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + mem = bvec_kmap_local(&bv); + memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT); + kunmap_local(mem); + + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; + logical_sector += ic->sectors_per_block; + } while (pos < bv.bv_len); + } +free_ret: + mempool_free(page, &ic->recheck_pool); +} + static void integrity_metadata(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); @@ -1784,15 +1859,8 @@ again: checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - sector_t s; - - s = sector - ((r + ic->tag_size - 1) / ic->tag_size); - DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", - bio->bi_bdev, s); - r = -EILSEQ; - atomic64_inc(&ic->number_of_mismatches); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", - bio, s, 0); + integrity_recheck(dio); + goto skip_io; } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -4208,6 +4276,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } + r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); + if (r) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); if (!ic->metadata_wq) { @@ -4572,6 +4646,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); if (ic->io) dm_io_client_destroy(ic->io); -- GitLab From e08c2a8d27e989f0f5b0888792643027d7e691e6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:30:10 +0100 Subject: [PATCH 0366/2290] dm-crypt: don't modify the data when using authenticated encryption commit 50c70240097ce41fe6bce6478b80478281e4d0f7 upstream. It was said that authenticated encryption could produce invalid tag when the data that is being encrypted is modified [1]. So, fix this problem by copying the data into the clone bio first and then encrypt them inside the clone bio. This may reduce performance, but it is needed to prevent the user from corrupting the device by writing data with O_DIRECT and modifying them at the same time. [1] https://lore.kernel.org/all/20240207004723.GA35324@sol.localdomain/T/ Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cac9b609c63a7..3e215aa85b99a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2078,6 +2078,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.bio_out = clone; io->ctx.iter_out = clone->bi_iter; + if (crypt_integrity_aead(cc)) { + bio_copy_data(clone, io->base_bio); + io->ctx.bio_in = clone; + io->ctx.iter_in = clone->bi_iter; + } + sector += bio_sectors(clone); crypt_inc_pending(io); -- GitLab From 27c1ade6068fe4de9eee0fa1dc9393fd4b79246c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:28:09 +0100 Subject: [PATCH 0367/2290] dm-verity: recheck the hash after a failure commit 9177f3c0dea6143d05cac1bbd28668fd0e216d11 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-verity reports an error [1]. This commit fixes dm-verity, so that if hash verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the hash is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 86 ++++++++++++++++++++++++++++++++--- drivers/md/dm-verity.h | 6 +++ 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 4669923f4cfb4..3b0d0bcd6f0d6 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -474,6 +474,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memcpy(data, io->recheck_buffer, len); + io->recheck_buffer += len; + + return 0; +} + +static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) +{ + struct page *page; + void *buffer; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + + page = mempool_alloc(&v->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = v->io; + io_loc.bdev = v->data_dev->bdev; + io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); + io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) + goto free_ret; + + r = verity_hash(v, verity_io_hash_req(v, io), buffer, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io), true); + if (unlikely(r)) + goto free_ret; + + if (memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size)) { + r = -EIO; + goto free_ret; + } + + io->recheck_buffer = buffer; + r = verity_for_bv_block(v, io, &start, verity_recheck_copy); + if (unlikely(r)) + goto free_ret; + + r = 0; +free_ret: + mempool_free(page, &v->recheck_pool); + + return r; +} + static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len) { @@ -500,9 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io) { bool is_zero; struct dm_verity *v = io->v; -#if defined(CONFIG_DM_VERITY_FEC) struct bvec_iter start; -#endif struct bvec_iter iter_copy; struct bvec_iter *iter; struct crypto_wait wait; @@ -553,10 +608,7 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; -#if defined(CONFIG_DM_VERITY_FEC) - if (verity_fec_is_enabled(v)) - start = *iter; -#endif + start = *iter; r = verity_for_io_block(v, io, iter, &wait); if (unlikely(r < 0)) return r; @@ -578,6 +630,10 @@ static int verity_verify_io(struct dm_verity_io *io) * tasklet since it may sleep, so fallback to work-queue. */ return -EAGAIN; + } else if (verity_recheck(v, io, start, cur_block) == 0) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); + continue; #if defined(CONFIG_DM_VERITY_FEC) } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) { @@ -928,6 +984,10 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); + mempool_exit(&v->recheck_pool); + if (v->io) + dm_io_client_destroy(v->io); + if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -1364,6 +1424,20 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } v->hash_blocks = hash_position; + r = mempool_init_page_pool(&v->recheck_pool, 1, 0); + if (unlikely(r)) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + + v->io = dm_io_client_create(); + if (IS_ERR(v->io)) { + r = PTR_ERR(v->io); + v->io = NULL; + ti->error = "Cannot allocate dm io"; + goto bad; + } + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), dm_bufio_alloc_callback, NULL, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f3f6070084196..4620a98c99561 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -11,6 +11,7 @@ #ifndef DM_VERITY_H #define DM_VERITY_H +#include #include #include #include @@ -68,6 +69,9 @@ struct dm_verity { unsigned long *validated_blocks; /* bitset blocks validated */ char *signature_key_desc; /* signature keyring reference */ + + struct dm_io_client *io; + mempool_t recheck_pool; }; struct dm_verity_io { @@ -84,6 +88,8 @@ struct dm_verity_io { struct work_struct work; + char *recheck_buffer; + /* * Three variably-size fields follow this struct: * -- GitLab From 031217128990d7f0ab8c46db1afb3cf1e075fd29 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 16 Feb 2024 17:01:13 +0100 Subject: [PATCH 0368/2290] cxl/pci: Fix disabling memory if DVSEC CXL Range does not match a CFMWS window commit 0cab687205986491302cd2e440ef1d253031c221 upstream. The Linux CXL subsystem is built on the assumption that HPA == SPA. That is, the host physical address (HPA) the HDM decoder registers are programmed with are system physical addresses (SPA). During HDM decoder setup, the DVSEC CXL range registers (cxl-3.1, 8.1.3.8) are checked if the memory is enabled and the CXL range is in a HPA window that is described in a CFMWS structure of the CXL host bridge (cxl-3.1, 9.18.1.3). Now, if the HPA is not an SPA, the CXL range does not match a CFMWS window and the CXL memory range will be disabled then. The HDM decoder stops working which causes system memory being disabled and further a system hang during HDM decoder initialization, typically when a CXL enabled kernel boots. Prevent a system hang and do not disable the HDM decoder if the decoder's CXL range is not found in a CFMWS window. Note the change only fixes a hardware hang, but does not implement HPA/SPA translation. Support for this can be added in a follow on patch series. Signed-off-by: Robert Richter Fixes: 34e37b4c432c ("cxl/port: Enable HDM Capability after validating DVSEC Ranges") Cc: Link: https://lore.kernel.org/r/20240216160113.407141-1-rrichter@amd.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 003a44132418a..5584af15300a8 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -376,9 +376,9 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, allowed++; } - if (!allowed) { - cxl_set_mem_enable(cxlds, 0); - info->mem_enabled = 0; + if (!allowed && info->mem_enabled) { + dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); + return -ENXIO; } /* -- GitLab From f49b20fd0134da84a6bd8108f9e73c077b7d6231 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 14 Feb 2024 23:43:56 +0900 Subject: [PATCH 0369/2290] scsi: target: pscsi: Fix bio_put() for error case commit de959094eb2197636f7c803af0943cb9d3b35804 upstream. As of commit 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper"), a bio allocated by bio_kmalloc() must be freed by bio_uninit() and kfree(). That is not done properly for the error case, hitting WARN and NULL pointer dereference in bio_free(). Fixes: 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota Link: https://lore.kernel.org/r/20240214144356.101814-1-naohiro.aota@wdc.com Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_pscsi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 69a4c9581e80e..7aec34c090661 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -910,12 +910,15 @@ new_bio: return 0; fail: - if (bio) - bio_put(bio); + if (bio) { + bio_uninit(bio); + kfree(bio); + } while (req->bio) { bio = req->bio; req->bio = bio->bi_next; - bio_put(bio); + bio_uninit(bio); + kfree(bio); } req->biotail = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; -- GitLab From e3bf0a24e050538d3e4c8d8319b8e04e18188ce7 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 14 Feb 2024 17:14:11 -0500 Subject: [PATCH 0370/2290] scsi: core: Consult supported VPD page list prior to fetching page commit b5fc07a5fb56216a49e6c1d0b172d5464d99a89b upstream. Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") removed the logic which checks whether a VPD page is present on the supported pages list before asking for the page itself. That was done because SPC helpfully states "The Supported VPD Pages VPD page list may or may not include all the VPD pages that are able to be returned by the device server". Testing had revealed a few devices that supported some of the 0xBn pages but didn't actually list them in page 0. Julian Sikorski bisected a problem with his drive resetting during discovery to the commit above. As it turns out, this particular drive firmware will crash if we attempt to fetch page 0xB9. Various approaches were attempted to work around this. In the end, reinstating the logic that consults VPD page 0 before fetching any other page was the path of least resistance. A firmware update for the devices which originally compelled us to remove the check has since been released. Link: https://lore.kernel.org/r/20240214221411.2888112-1-martin.petersen@oracle.com Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") Cc: stable@vger.kernel.org Cc: Bart Van Assche Reported-by: Julian Sikorski Tested-by: Julian Sikorski Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi.c | 22 ++++++++++++++++++++-- include/scsi/scsi_device.h | 4 ---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 3cda5d26b66ca..e70ab8db30142 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, return result + 4; } +enum scsi_vpd_parameters { + SCSI_VPD_HEADER_SIZE = 4, + SCSI_VPD_LIST_SIZE = 36, +}; + static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) { - unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); + unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4); int result; if (sdev->no_vpd_size) return SCSI_DEFAULT_VPD_LEN; + /* + * Fetch the supported pages VPD and validate that the requested page + * number is present. + */ + if (page != 0) { + result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd)); + if (result < SCSI_VPD_HEADER_SIZE) + return 0; + + result -= SCSI_VPD_HEADER_SIZE; + if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) + return 0; + } /* * Fetch the VPD page header to find out how big the page * is. This is done to prevent problems on legacy devices * which can not handle allocation lengths as large as * potentially requested by the caller. */ - result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header)); + result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE); if (result < 0) return 0; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index fdc31fdb612da..d2751ed536df2 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -100,10 +100,6 @@ struct scsi_vpd { unsigned char data[]; }; -enum scsi_vpd_parameters { - SCSI_VPD_HEADER_SIZE = 4, -}; - struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; -- GitLab From 2dedda77d4493f3e92e414b272bfa60f1f51ed95 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 7 Feb 2024 02:25:59 +0800 Subject: [PATCH 0371/2290] mm/swap: fix race when skipping swapcache commit 13ddaf26be324a7f951891ecd9ccd04466d27458 upstream. When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads swapin the same entry at the same time, they get different pages (A, B). Before one thread (T0) finishes the swapin and installs page (A) to the PTE, another thread (T1) could finish swapin of page (B), swap_free the entry, then swap out the possibly modified page reusing the same entry. It breaks the pte_same check in (T0) because PTE value is unchanged, causing ABA problem. Thread (T0) will install a stalled page (A) into the PTE and cause data corruption. One possible callstack is like this: CPU0 CPU1 ---- ---- do_swap_page() do_swap_page() with same entry swap_read_folio() <- read to page A swap_read_folio() <- read to page B ... set_pte_at() swap_free() <- entry is free pte_same() <- Check pass, PTE seems unchanged, but page A is stalled! swap_free() <- page B content lost! set_pte_at() <- staled page A installed! And besides, for ZRAM, swap_free() allows the swap device to discard the entry content, so even if page (B) is not modified, if swap_read_folio() on CPU0 happens later than swap_free() on CPU1, it may also cause data loss. To fix this, reuse swapcache_prepare which will pin the swap entry using the cache flag, and allow only one thread to swap it in, also prevent any parallel code from putting the entry in the cache. Release the pin after PT unlocked. Racers just loop and wait since it's a rare and very short event. A schedule_timeout_uninterruptible(1) call is added to avoid repeated page faults wasting too much CPU, causing livelock or adding too much noise to perf statistics. A similar livelock issue was described in commit 029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead") Reproducer: This race issue can be triggered easily using a well constructed reproducer and patched brd (with a delay in read path) [1]: With latest 6.8 mainline, race caused data loss can be observed easily: $ gcc -g -lpthread test-thread-swap-race.c && ./a.out Polulating 32MB of memory region... Keep swapping out... Starting round 0... Spawning 65536 workers... 32746 workers spawned, wait for done... Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss! Round 0 Failed, 15 data loss! This reproducer spawns multiple threads sharing the same memory region using a small swap device. Every two threads updates mapped pages one by one in opposite direction trying to create a race, with one dedicated thread keep swapping out the data out using madvise. The reproducer created a reproduce rate of about once every 5 minutes, so the race should be totally possible in production. After this patch, I ran the reproducer for over a few hundred rounds and no data loss observed. Performance overhead is minimal, microbenchmark swapin 10G from 32G zram: Before: 10934698 us After: 11157121 us Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag) [kasong@tencent.com: v4] Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/ Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1] Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Acked-by: Yu Zhao Acked-by: David Hildenbrand Acked-by: Chris Li Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Yosry Ahmed Cc: Yu Zhao Cc: Barry Song <21cnbao@gmail.com> Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/swap.h | 5 +++++ mm/memory.c | 20 ++++++++++++++++++++ mm/swap.h | 5 +++++ mm/swapfile.c | 13 +++++++++++++ 4 files changed, 43 insertions(+) diff --git a/include/linux/swap.h b/include/linux/swap.h index a18cf4b7c724c..add47f43e568e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -571,6 +571,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/mm/memory.c b/mm/memory.c index fc8b264ec0cac..fb83cf56377ab 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3761,6 +3761,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct page *page; struct swap_info_struct *si = NULL; rmap_t rmap_flags = RMAP_NONE; + bool need_clear_cache = false; bool exclusive = false; swp_entry_t entry; pte_t pte; @@ -3822,6 +3823,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); @@ -4073,6 +4088,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -4086,6 +4104,8 @@ out_release: folio_unlock(swapcache); folio_put(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/swap.h b/mm/swap.h index cc08c459c6190..5eff40ef76934 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -39,6 +39,7 @@ void __delete_from_swap_cache(struct folio *folio, void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); @@ -98,6 +99,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 71db6d8a1ea30..cca9fda9d036f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3373,6 +3373,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); -- GitLab From 4c815c3a48349842cc43a4101f7387af8904a0fb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 16 Feb 2024 11:40:25 -0800 Subject: [PATCH 0372/2290] mm/damon/lru_sort: fix quota status loss due to online tunings commit 13d0599ab3b2ff17f798353f24bcbef1659d3cfc upstream. For online parameters change, DAMON_LRU_SORT creates new schemes based on latest values of the parameters and replaces the old schemes with the new one. When creating it, the internal status of the quotas of the old schemes is not preserved. As a result, charging of the quota starts from zero after the online tuning. The data that collected to estimate the throughput of the scheme's action is also reset, and therefore the estimation should start from the scratch again. Because the throughput estimation is being used to convert the time quota to the effective size quota, this could result in temporal time quota inaccuracy. It would be recovered over time, though. In short, the quota accuracy could be temporarily degraded after online parameters update. Fix the problem by checking the case and copying the internal fields for the status. Link: https://lkml.kernel.org/r/20240216194025.9207-3-sj@kernel.org Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") Signed-off-by: SeongJae Park Cc: [6.0+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/lru_sort.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 63bdad20dbaf8..98a678129b067 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); } +static void damon_lru_sort_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_lru_sort_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *hot_scheme, *cold_scheme; + struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL; unsigned int hot_thres, cold_thres; int err = 0; @@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + damon_for_each_scheme(scheme, ctx) { + if (!old_hot_scheme) { + old_hot_scheme = scheme; + continue; + } + old_cold_scheme = scheme; + } + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * hot_thres_access_freq / 1000; - scheme = damon_lru_sort_new_hot_scheme(hot_thres); - if (!scheme) + hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); + if (!hot_scheme) return -ENOMEM; - damon_set_schemes(ctx, &scheme, 1); + if (old_hot_scheme) + damon_lru_sort_copy_quota_status(&hot_scheme->quota, + &old_hot_scheme->quota); cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; - scheme = damon_lru_sort_new_cold_scheme(cold_thres); - if (!scheme) + cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); + if (!cold_scheme) { + damon_destroy_scheme(hot_scheme); return -ENOMEM; - damon_add_scheme(ctx, scheme); + } + if (old_cold_scheme) + damon_lru_sort_copy_quota_status(&cold_scheme->quota, + &old_cold_scheme->quota); + + damon_set_schemes(ctx, &hot_scheme, 1); + damon_add_scheme(ctx, cold_scheme); return damon_set_region_biggest_system_ram_default(target, &monitor_region_start, -- GitLab From 19e5dc2e6bf7cb2160bbb996f18f3bfb4621e770 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 13 Feb 2024 03:16:34 -0500 Subject: [PATCH 0373/2290] mm: memcontrol: clarify swapaccount=0 deprecation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 118642d7f606fc9b9c92ee611275420320290ffb upstream. The swapaccount deprecation warning is throwing false positives. Since we deprecated the knob and defaulted to enabling, the only reports we've been getting are from folks that set swapaccount=1. While this is a nice affirmation that always-enabling was the right choice, we certainly don't want to warn when users request the supported mode. Only warn when disabling is requested, and clarify the warning. [colin.i.king@gmail.com: spelling: "commdandline" -> "commandline"] Link: https://lkml.kernel.org/r/20240215090544.1649201-1-colin.i.king@gmail.com Link: https://lkml.kernel.org/r/20240213081634.3652326-1-hannes@cmpxchg.org Fixes: b25806dcd3d5 ("mm: memcontrol: deprecate swapaccounting=0 mode") Signed-off-by: Colin Ian King Reported-by: "Jonas Schäfer" Reported-by: Narcis Garcia Suggested-by: Yosry Ahmed Signed-off-by: Johannes Weiner Reviewed-by: Yosry Ahmed Acked-by: Michal Hocko Acked-by: Shakeel Butt Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9da98e3e71cfe..4570d3e315cf1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7517,9 +7517,13 @@ bool mem_cgroup_swap_full(struct folio *folio) static int __init setup_swap_account(char *s) { - pr_warn_once("The swapaccount= commandline option is deprecated. " - "Please report your usecase to linux-mm@kvack.org if you " - "depend on this functionality.\n"); + bool res; + + if (!kstrtobool(s, &res) && !res) + pr_warn_once("The swapaccount=0 commandline option is deprecated " + "in favor of configuring swap control via cgroupfs. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); return 1; } __setup("swapaccount=", setup_swap_account); -- GitLab From fff39f496265edd728457ac39030c52a2113aa13 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 21:33:00 +0100 Subject: [PATCH 0374/2290] platform/x86: intel-vbtn: Stop calling "VBDL" from notify_handler commit 84c16d01ff219bc0a5dca5219db6b8b86a6854fb upstream. Commit 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") causes 2 issues on the ThinkPad X1 Tablet Gen2: 1. The ThinkPad will wake up immediately from suspend 2. When put in tablet mode SW_TABLET_MODE reverts to 0 after about 1 second Both these issues are caused by the "VBDL" ACPI method call added at the end of the notify_handler. And it never became entirely clear if this call is even necessary to fix the issue of missing tablet-mode-switch events on the Dell Inspiron 7352. Drop the "VBDL" ACPI method call again to fix the 2 issues this is causing on the ThinkPad X1 Tablet Gen2. Fixes: 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") Reported-by: Alexander Kobel Closes: https://lore.kernel.org/platform-driver-x86/295984ce-bd4b-49bd-adc5-ffe7c898d7f0@a-kobel.de/ Cc: regressions@lists.linux.dev Cc: Arnold Gozum Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Tested-by: Alexander Kobel Link: https://lore.kernel.org/r/20240216203300.245826-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel/vbtn.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 8e2b07ed2ce94..c10c99a31a90a 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); - - /* Some devices need this to report further events */ - acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* -- GitLab From 9e7fc40377ec28d15a46cf59c413616fe9a78d57 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2024 13:06:07 +0100 Subject: [PATCH 0375/2290] platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names commit dbcbfd662a725641d118fb3ae5ffb7be4e3d0fb0 upstream. On some devices the ACPI name of the touchscreen is e.g. either MSSL1680:00 or MSSL1680:01 depending on the BIOS version. This happens for example on the "Chuwi Hi8 Air" tablet where the initial commit's ts_data uses "MSSL1680:00" but the tablets from the github issue and linux-hardware.org probe linked below both use "MSSL1680:01". Replace the strcmp() match on ts_data->acpi_name with a strstarts() check to allow using a partial match on just the ACPI HID of "MSSL1680" and change the ts_data->acpi_name for the "Chuwi Hi8 Air" accordingly to fix the touchscreen not working on models where it is "MSSL1680:01". Note this drops the length check for I2C_NAME_SIZE. This never was necessary since the ACPI names used are never more then 11 chars and I2C_NAME_SIZE is 20 so the replaced strncmp() would always stop long before reaching I2C_NAME_SIZE. Link: https://linux-hardware.org/?computer=AC4301C0542A Fixes: bbb97d728f77 ("platform/x86: touchscreen_dmi: Add info for the Chuwi Hi8 Air tablet") Closes: https://github.com/onitake/gsl-firmware/issues/91 Cc: stable@vger.kernel.org Reviewed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240212120608.30469-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/touchscreen_dmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 50ec19188a20d..11d72a3533552 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -50,7 +50,7 @@ static const struct property_entry chuwi_hi8_air_props[] = { }; static const struct ts_dmi_data chuwi_hi8_air_data = { - .acpi_name = "MSSL1680:00", + .acpi_name = "MSSL1680", .properties = chuwi_hi8_air_props, }; @@ -1776,7 +1776,7 @@ static void ts_dmi_add_props(struct i2c_client *client) int error; if (has_acpi_companion(dev) && - !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) { + strstarts(client->name, ts_data->acpi_name)) { error = device_create_managed_software_node(dev, ts_data->properties, NULL); if (error) dev_err(dev, "failed to add properties: %d\n", error); -- GitLab From 8b218e2f0a27a9f09428b1847b4580640b9d1e58 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Sat, 17 Feb 2024 16:14:31 +0800 Subject: [PATCH 0376/2290] cachefiles: fix memory leak in cachefiles_add_cache() commit e21a2f17566cbd64926fb8f16323972f7a064444 upstream. The following memory leak was reported after unbinding /dev/cachefiles: ================================================================== unreferenced object 0xffff9b674176e3c0 (size 192): comm "cachefilesd2", pid 680, jiffies 4294881224 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc ea38a44b): [] kmem_cache_alloc+0x2d5/0x370 [] prepare_creds+0x26/0x2e0 [] cachefiles_determine_cache_security+0x1f/0x120 [] cachefiles_add_cache+0x13c/0x3a0 [] cachefiles_daemon_write+0x146/0x1c0 [] vfs_write+0xcb/0x520 [] ksys_write+0x69/0xf0 [] do_syscall_64+0x72/0x140 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 ================================================================== Put the reference count of cache_cred in cachefiles_daemon_unbind() to fix the problem. And also put cache_cred in cachefiles_add_cache() error branch to avoid memory leaks. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") CC: stable@vger.kernel.org Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240217081431.796809-1-libaokun1@huawei.com Acked-by: David Howells Reviewed-by: Jingbo Xu Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/cache.c | 2 ++ fs/cachefiles/daemon.c | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index 7077f72e6f474..f449f7340aad0 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -168,6 +168,8 @@ error_unsupported: dput(root); error_open_root: cachefiles_end_secure(cache, saved_cred); + put_cred(cache->cache_cred); + cache->cache_cred = NULL; error_getsec: fscache_relinquish_cache(cache_cookie); cache->cache = NULL; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index aa4efcabb5e37..5f4df9588620f 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -805,6 +805,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) cachefiles_put_directory(cache->graveyard); cachefiles_put_directory(cache->store); mntput(cache->mnt); + put_cred(cache->cache_cred); kfree(cache->rootdirname); kfree(cache->secctx); -- GitLab From 6b2ff10390b19a2364af622b6666b690443f9f3f Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:51 +0800 Subject: [PATCH 0377/2290] md: Fix missing release of 'active_io' for flush commit 855678ed8534518e2b428bcbcec695de9ba248e8 upstream. submit_flushes atomic_set(&mddev->flush_pending, 1); rdev_for_each_rcu(rdev, mddev) atomic_inc(&mddev->flush_pending); bi->bi_end_io = md_end_flush submit_bio(bi); /* flush io is done first */ md_end_flush if (atomic_dec_and_test(&mddev->flush_pending)) percpu_ref_put(&mddev->active_io) -> active_io is not released if (atomic_dec_and_test(&mddev->flush_pending)) -> missing release of active_io For consequence, mddev_suspend() will wait for 'active_io' to be zero forever. Fix this problem by releasing 'active_io' in submit_flushes() if 'flush_pending' is decreased to zero. Fixes: fa2bbff7b0b4 ("md: synchronize flush io with array reconfiguration") Cc: stable@vger.kernel.org # v6.1+ Reported-by: Blazej Kucman Closes: https://lore.kernel.org/lkml/20240130172524.0000417b@linux.intel.com/ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-7-yukuai1@huaweicloud.com Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index c7efe15229514..846bdee4daa0e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -564,8 +564,12 @@ static void submit_flushes(struct work_struct *ws) rcu_read_lock(); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + queue_work(md_wq, &mddev->flush_work); + } } static void md_submit_flush_data(struct work_struct *ws) -- GitLab From 72fdbc728c339413f2fee7c042ebc124d6b223d2 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 21 Feb 2024 09:27:32 +0000 Subject: [PATCH 0378/2290] KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler commit 85a71ee9a0700f6c18862ef3b0011ed9dad99aca upstream. It is possible that an LPI mapped in a different ITS gets unmapped while handling the MOVALL command. If that is the case, there is no state that can be migrated to the destination. Silently ignore it and continue migrating other LPIs. Cc: stable@vger.kernel.org Fixes: ff9c114394aa ("KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20240221092732.4126848-3-oliver.upton@linux.dev Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 3c344e4cd4cad..f033b9b0363ff 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1427,6 +1427,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, for (i = 0; i < irq_count; i++) { irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; update_affinity(irq, vcpu2); -- GitLab From 3ac3624a74cf251cf7c32b6b29231d4696e9291d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 21 Feb 2024 09:27:31 +0000 Subject: [PATCH 0379/2290] KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table() commit 8d3a7dfb801d157ac423261d7cd62c33e95375f8 upstream. vgic_get_irq() may not return a valid descriptor if there is no ITS that holds a valid translation for the specified INTID. If that is the case, it is safe to silently ignore it and continue processing the LPI pending table. Cc: stable@vger.kernel.org Fixes: 33d3bc9556a7 ("KVM: arm64: vgic-its: Read initial LPI pending table") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20240221092732.4126848-2-oliver.upton@linux.dev Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index f033b9b0363ff..092327665a6ef 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -462,6 +462,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + if (!irq) + continue; + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); -- GitLab From 3963f16cc7643b461271989b712329520374ad2a Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Wed, 14 Feb 2024 19:27:33 +0300 Subject: [PATCH 0380/2290] gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp() commit 136cfaca22567a03bbb3bf53a43d8cb5748b80ec upstream. The gtp_net_ops pernet operations structure for the subsystem must be registered before registering the generic netlink family. Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 PID: 5826 Comm: gtp Not tainted 6.8.0-rc3-std-def-alt1 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 RIP: 0010:gtp_genl_dump_pdp+0x1be/0x800 [gtp] Code: c6 89 c6 e8 64 e9 86 df 58 45 85 f6 0f 85 4e 04 00 00 e8 c5 ee 86 df 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 de 05 00 00 48 8b 44 24 18 4c 8b 30 4c 39 f0 74 RSP: 0018:ffff888014107220 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff88800fcda588 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f1be4eb05c0(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1be4e766cf CR3: 000000000c33e000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? show_regs+0x90/0xa0 ? die_addr+0x50/0xd0 ? exc_general_protection+0x148/0x220 ? asm_exc_general_protection+0x22/0x30 ? gtp_genl_dump_pdp+0x1be/0x800 [gtp] ? __alloc_skb+0x1dd/0x350 ? __pfx___alloc_skb+0x10/0x10 genl_dumpit+0x11d/0x230 netlink_dump+0x5b9/0xce0 ? lockdep_hardirqs_on_prepare+0x253/0x430 ? __pfx_netlink_dump+0x10/0x10 ? kasan_save_track+0x10/0x40 ? __kasan_kmalloc+0x9b/0xa0 ? genl_start+0x675/0x970 __netlink_dump_start+0x6fc/0x9f0 genl_family_rcv_msg_dumpit+0x1bb/0x2d0 ? __pfx_genl_family_rcv_msg_dumpit+0x10/0x10 ? genl_op_from_small+0x2a/0x440 ? cap_capable+0x1d0/0x240 ? __pfx_genl_start+0x10/0x10 ? __pfx_genl_dumpit+0x10/0x10 ? __pfx_genl_done+0x10/0x10 ? security_capable+0x9d/0xe0 Cc: stable@vger.kernel.org Signed-off-by: Vasiliy Kovalev Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Link: https://lore.kernel.org/r/20240214162733.34214-1-kovalev@altlinux.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index bace989591f75..937dd9cf4fbaf 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1906,20 +1906,20 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family(>p_genl_family); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto unreg_rtnl_link; - err = register_pernet_subsys(>p_net_ops); + err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_genl_family; + goto unreg_pernet_subsys; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_genl_family: - genl_unregister_family(>p_genl_family); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); error_out: -- GitLab From 62f361bfea60c6afc3df09c1ad4152e6507f6f47 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 30 Jan 2024 19:27:40 +0800 Subject: [PATCH 0381/2290] crypto: virtio/akcipher - Fix stack overflow on memcpy commit c0ec2a712daf133d9996a8a1b7ee2d4996080363 upstream. sizeof(struct virtio_crypto_akcipher_session_para) is less than sizeof(struct virtio_crypto_op_ctrl_req::u), copying more bytes from stack variable leads stack overflow. Clang reports this issue by commands: make -j CC=clang-14 mrproper >/dev/null 2>&1 make -j O=/tmp/crypto-build CC=clang-14 allmodconfig >/dev/null 2>&1 make -j O=/tmp/crypto-build W=1 CC=clang-14 drivers/crypto/virtio/ virtio_crypto_akcipher_algs.o Fixes: 59ca6c93387d ("virtio-crypto: implement RSA algorithm") Link: https://lore.kernel.org/all/0a194a79-e3a3-45e7-be98-83abd3e1cb7e@roeck-us.net/ Cc: Signed-off-by: zhenwei pi Tested-by: Nathan Chancellor # build Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 168195672e2e1..d2df97cfcb294 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * } static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, - struct virtio_crypto_ctrl_header *header, void *para, + struct virtio_crypto_ctrl_header *header, + struct virtio_crypto_akcipher_session_para *para, const uint8_t *key, unsigned int keylen) { struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; @@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher ctrl = &vc_ctrl_req->ctrl; memcpy(&ctrl->header, header, sizeof(ctrl->header)); - memcpy(&ctrl->u, para, sizeof(ctrl->u)); + memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para)); input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); -- GitLab From 75eaa3666e2cce5656e8f0564a845a5da5a9617a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 19 Feb 2024 18:58:06 +0000 Subject: [PATCH 0382/2290] irqchip/gic-v3-its: Do not assume vPE tables are preallocated commit ec4308ecfc887128a468f03fb66b767559c57c23 upstream. The GIC/ITS code is designed to ensure to pick up any preallocated LPI tables on the redistributors, as enabling LPIs is a one-way switch. There is no such restriction for vLPIs, and for GICv4.1 it is expected to allocate a new vPE table at boot. This works as intended when initializing an ITS, however when setting up a redistributor in cpu_init_lpis() the early return for preallocated RD tables skips straight past the GICv4 setup. This all comes to a head when trying to kexec() into a new kernel, as the new kernel silently fails to set up GICv4, leading to a complete loss of SGIs and LPIs for KVM VMs. Slap a band-aid on the problem by ensuring its_cpu_init_lpis() always initializes GICv4 on the way out, even if the other RD tables were preallocated. Fixes: 6479450f72c1 ("irqchip/gic-v4: Fix occasional VLPI drop") Reported-by: George Cherian Co-developed-by: Marc Zyngier Signed-off-by: Marc Zyngier Signed-off-by: Oliver Upton Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240219185809.286724-2-oliver.upton@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index b83b39e93e1a9..4d03fb3a82460 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3161,6 +3161,7 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); +out: if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -3196,7 +3197,6 @@ static void its_cpu_init_lpis(void) /* Make sure the GIC has seen the above */ dsb(sy); -out: gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), -- GitLab From e90211b1f7ace2a57838343c2a2847cecebead7d Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 31 Jan 2024 09:19:33 +0100 Subject: [PATCH 0383/2290] irqchip/sifive-plic: Enable interrupt if needed before EOI commit 9c92006b896c767218aabe8947b62026a571cfd0 upstream. RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as explained in the description of Interrupt Completion in the PLIC spec: "The PLIC signals it has completed executing an interrupt handler by writing the interrupt ID it received from the claim to the claim/complete register. The PLIC does not check whether the completion ID is the same as the last claim ID for that target. If the completion ID does not match an interrupt source that *is currently enabled* for the target, the completion is silently ignored." Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked") ensured that EOI is successful by enabling interrupt first, before EOI. Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations") removed the interrupt enabling code from the previous commit, because it assumes that interrupt should already be enabled at the point of EOI. However, this is incorrect: there is a window after a hart claiming an interrupt and before irq_desc->lock getting acquired, interrupt can be disabled during this window. Thus, EOI can be invoked while the interrupt is disabled, effectively nullify this EOI. This results in the interrupt never gets asserted again, and the device who uses this interrupt appears frozen. Make sure that interrupt is really enabled before EOI. Fixes: a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations") Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Samuel Holland Cc: Marc Zyngier Cc: Guo Ren Cc: linux-riscv@lists.infradead.org Cc: Link: https://lore.kernel.org/r/20240131081933.144512-1-namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-sifive-plic.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 2f4784860df5d..be5e19a86ac3b 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -144,7 +144,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (unlikely(irqd_irq_disabled(d))) { + plic_toggle(handler, d->hwirq, 1); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_toggle(handler, d->hwirq, 0); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } #ifdef CONFIG_SMP -- GitLab From 2a19e0042bf14c9f5386b71d17089e2c6fc7dded Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 15 Jan 2024 19:26:49 +0530 Subject: [PATCH 0384/2290] PCI/MSI: Prevent MSI hardware interrupt number truncation commit db744ddd59be798c2627efbfc71f707f5a935a40 upstream. While calculating the hardware interrupt number for a MSI interrupt, the higher bits (i.e. from bit-5 onwards a.k.a domain_nr >= 32) of the PCI domain number gets truncated because of the shifted value casting to return type of pci_domain_nr() which is 'int'. This for example is resulting in same hardware interrupt number for devices 0019:00:00.0 and 0039:00:00.0. To address this cast the PCI domain number to 'irq_hw_number_t' before left shifting it to calculate the hardware interrupt number. Please note that this fixes the issue only on 64-bit systems and doesn't change the behavior for 32-bit systems i.e. the 32-bit systems continue to have the issue. Since the issue surfaces only if there are too many PCIe controllers in the system which usually is the case in modern server systems and they don't tend to run 32-bit kernels. Fixes: 3878eaefb89a ("PCI/MSI: Enhance core to support hierarchy irqdomain") Signed-off-by: Vidya Sagar Signed-off-by: Thomas Gleixner Tested-by: Shanker Donthineni Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115135649.708536-1-vidyas@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index e9cf318e6670f..34877a1f43a15 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -60,7 +60,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) return (irq_hw_number_t)desc->msi_index | pci_dev_id(dev) << 11 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; + ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27; } static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc) -- GitLab From 13cd1daeea848614e585b2c6ecc11ca9c8ab2500 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 20 Feb 2024 12:21:56 +0000 Subject: [PATCH 0385/2290] l2tp: pass correct message length to ip6_append_data commit 359e54a93ab43d32ee1bff3c2f9f10cb9f6b6e79 upstream. l2tp_ip6_sendmsg needs to avoid accounting for the transport header twice when splicing more data into an already partially-occupied skbuff. To manage this, we check whether the skbuff contains data using skb_queue_empty when deciding how much data to append using ip6_append_data. However, the code which performed the calculation was incorrect: ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; ...due to C operator precedence, this ends up setting ulen to transhdrlen for messages with a non-zero length, which results in corrupted packets on the wire. Add parentheses to correct the calculation in line with the original intent. Fixes: 9d4c75800f61 ("ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data()") Cc: David Howells Cc: stable@vger.kernel.org Signed-off-by: Tom Parkin Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220122156.43131-1-tparkin@katalix.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 314ec3a51e8de..bb92dc8b82f39 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -630,7 +630,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, -- GitLab From 786f089086b505372fb3f4f008d57e7845fff0d8 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 5 Feb 2024 11:23:34 +0100 Subject: [PATCH 0386/2290] ARM: ep93xx: Add terminator to gpiod_lookup_table commit fdf87a0dc26d0550c60edc911cda42f9afec3557 upstream. Without the terminator, if a con_id is passed to gpio_find() that does not exist in the lookup table the function will not stop looping correctly, and eventually cause an oops. Cc: stable@vger.kernel.org Fixes: b2e63555592f ("i2c: gpio: Convert to use descriptors") Reported-by: Andy Shevchenko Signed-off-by: Nikita Shubin Reviewed-by: Linus Walleij Acked-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240205102337.439002-1-alexander.sverdlin@gmail.com Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-ep93xx/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 95e731676cea4..961daac653261 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), GPIO_LOOKUP_IDX("G", 0, NULL, 1, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + { } }, }; -- GitLab From 1221b8ea25cc8db5096fa328b835b8b6fdb6180b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:11:25 +0200 Subject: [PATCH 0387/2290] x86/returnthunk: Allow different return thunks Upstream commit: 770ae1b709528a6a173b5c7b183818ee9b45e376 In preparation for call depth tracking on Intel SKL CPUs, make it possible to patch in a SKL specific return thunk. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111147.680469665@infradead.org Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 ++++++ arch/x86/kernel/alternative.c | 17 +++++++++++++---- arch/x86/kernel/ftrace.c | 2 +- arch/x86/kernel/static_call.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- 5 files changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2f123d4fb85b5..04035edc5c758 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -222,6 +222,12 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_CALL_THUNKS +extern void (*x86_return_thunk)(void); +#else +#define x86_return_thunk (&__x86_return_thunk) +#endif + #ifdef CONFIG_RETPOLINE #define GEN(reg) \ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6b8c93989aa31..5c11f688a1011 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -536,6 +536,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } #ifdef CONFIG_RETHUNK + +#ifdef CONFIG_CALL_THUNKS +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; +#endif + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -551,14 +556,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - return -1; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (x86_return_thunk == __x86_return_thunk) + return -1; - bytes[i++] = RET_INSN_OPCODE; + i = JMP32_INSN_SIZE; + __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); + } else { + bytes[i++] = RET_INSN_OPCODE; + } for (; i < insn->length;) bytes[i++] = INT3_INSN_OPCODE; - return i; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e07234ec7e237..ec51ce713dea4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -361,7 +361,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 3fbb491688275..b32134b093ec8 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -80,7 +80,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case RET: if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; break; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b69aee6245e4a..7913440c0fd46 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -432,7 +432,7 @@ static void emit_return(u8 **pprog, u8 *ip) u8 *prog = *pprog; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { - emit_jump(&prog, &__x86_return_thunk, ip); + emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ if (IS_ENABLED(CONFIG_SLS)) -- GitLab From b012dcf39d9e79f9a71aa72b485c740aba84b47f Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 24 Feb 2024 12:01:34 +0100 Subject: [PATCH 0388/2290] Revert "x86/alternative: Make custom return thunk unconditional" This reverts commit 53ebbe1c8c02aa7b7f072dd2f96bca4faa1daa59. Revert the backport of upstream commit: 095b8303f383 ("x86/alternative: Make custom return thunk unconditional") in order to backport the full version now that 770ae1b70952 ("x86/returnthunk: Allow different return thunks") has been backported. Revert it here so that the build breakage is kept at minimum. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 4 ---- arch/x86/kernel/cpu/bugs.c | 2 -- 2 files changed, 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 04035edc5c758..44c78af98af4b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -205,11 +205,7 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; -#ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); -#else -static inline void __x86_return_thunk(void) {} -#endif extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 13dffc43ded02..208e4938f35ba 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -62,8 +62,6 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd); static DEFINE_MUTEX(spec_ctrl_mutex); -void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; - /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { -- GitLab From 545a94ffc29a4afb3bec7cbb4b830cc9c35fad7a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:30 +0200 Subject: [PATCH 0389/2290] x86/alternative: Make custom return thunk unconditional Upstream commit: 095b8303f3835c68ac4a8b6d754ca1c3b6230711 There is infrastructure to rewrite return thunks to point to any random thunk one desires, unwrap that from CALL_THUNKS, which up to now was the sole user of that. [ bp: Make the thunks visible on 32-bit and add ifdeffery for the 32-bit builds. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.775293785@infradead.org Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 8 ++++---- arch/x86/kernel/alternative.c | 4 ---- arch/x86/kernel/cpu/bugs.c | 2 ++ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 44c78af98af4b..d3706de91a934 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -205,7 +205,11 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; +#ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); +#else +static inline void __x86_return_thunk(void) {} +#endif extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); @@ -218,11 +222,7 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); -#ifdef CONFIG_CALL_THUNKS extern void (*x86_return_thunk)(void); -#else -#define x86_return_thunk (&__x86_return_thunk) -#endif #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5c11f688a1011..69f85e2746119 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -537,10 +537,6 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) #ifdef CONFIG_RETHUNK -#ifdef CONFIG_CALL_THUNKS -void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; -#endif - /* * Rewrite the compiler generated return thunk tail-calls. * diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 208e4938f35ba..13dffc43ded02 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -62,6 +62,8 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd); static DEFINE_MUTEX(spec_ctrl_mutex); +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { -- GitLab From 943c8b1fcc86210ac7a62cbba77360936554477c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 24 Feb 2024 14:48:03 +0100 Subject: [PATCH 0390/2290] dm-integrity, dm-verity: reduce stack usage for recheck commit 66ad2fbcdbeab0edfd40c5d94f32f053b98c2320 upstream. The newly added integrity_recheck() function has another larger stack allocation, just like its caller integrity_metadata(). When it gets inlined, the combination of the two exceeds the warning limit for 32-bit architectures and possibly risks an overflow when this is called from a deep call chain through a file system: drivers/md/dm-integrity.c:1767:13: error: stack frame size (1048) exceeds limit (1024) in 'integrity_metadata' [-Werror,-Wframe-larger-than] 1767 | static void integrity_metadata(struct work_struct *w) Since the caller at this point is done using its checksum buffer, just reuse the same buffer in the new function to avoid the double allocation. [Mikulas: add "noinline" to integrity_recheck and verity_recheck. These functions are only called on error, so they shouldn't bloat the stack frame or code size of the caller.] Fixes: c88f5e553fe3 ("dm-integrity: recheck the integrity tag after a failure") Fixes: 9177f3c0dea6 ("dm-verity: recheck the hash after a failure") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 10 ++++------ drivers/md/dm-verity-target.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 10b01b2adc679..3da4359f51645 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1701,14 +1701,13 @@ failed: get_random_bytes(result, ic->tag_size); } -static void integrity_recheck(struct dm_integrity_io *dio) +static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum) { struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); struct dm_integrity_c *ic = dio->ic; struct bvec_iter iter; struct bio_vec bv; sector_t sector, logical_sector, area, offset; - char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; struct page *page; void *buffer; @@ -1744,9 +1743,8 @@ static void integrity_recheck(struct dm_integrity_io *dio) goto free_ret; } - integrity_sector_checksum(ic, logical_sector, buffer, - checksum_onstack); - r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, + integrity_sector_checksum(ic, logical_sector, buffer, checksum); + r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block, &dio->metadata_offset, ic->tag_size, TAG_CMP); if (r) { if (r > 0) { @@ -1859,7 +1857,7 @@ again: checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - integrity_recheck(dio); + integrity_recheck(dio, checksums); goto skip_io; } if (likely(checksums != checksums_onstack)) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 3b0d0bcd6f0d6..b48e1b59e6da4 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -483,8 +483,8 @@ static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, return 0; } -static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter start, sector_t cur_block) +static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) { struct page *page; void *buffer; -- GitLab From d9d24262535360fceebacaf93eaba281d4135c52 Mon Sep 17 00:00:00 2001 From: Sandeep Dhavale Date: Wed, 21 Feb 2024 13:03:47 -0800 Subject: [PATCH 0391/2290] erofs: fix refcount on the metabuf used for inode lookup commit 56ee7db31187dc36d501622cb5f1415e88e01c2a upstream. In erofs_find_target_block() when erofs_dirnamecmp() returns 0, we do not assign the target metabuf. This causes the caller erofs_namei()'s erofs_put_metabuf() at the end to be not effective leaving the refcount on the page. As the page from metabuf (buf->page) is never put, such page cannot be migrated or reclaimed. Fix it now by putting the metabuf from previous loop and assigning the current metabuf to target before returning so caller erofs_namei() can do the final put as it was intended. Fixes: 500edd095648 ("erofs: use meta buffers for inode lookup") Cc: # 5.18+ Signed-off-by: Sandeep Dhavale Reviewed-by: Gao Xiang Reviewed-by: Jingbo Xu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20240221210348.3667795-1-dhavale@google.com Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- fs/erofs/namei.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 0dc34721080c7..e8ccaa761bd63 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -137,24 +137,24 @@ static void *find_target_block_classic(struct erofs_buf *target, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - if (!diff) { - *_ndirents = 0; - goto out; - } else if (diff > 0) { - head = mid + 1; - startprfx = matched; - - if (!IS_ERR(candidate)) - erofs_put_metabuf(target); - *target = buf; - candidate = de; - *_ndirents = ndirents; - } else { + if (diff < 0) { erofs_put_metabuf(&buf); - back = mid - 1; endprfx = matched; + continue; + } + + if (!IS_ERR(candidate)) + erofs_put_metabuf(target); + *target = buf; + if (!diff) { + *_ndirents = 0; + return de; } + head = mid + 1; + startprfx = matched; + candidate = de; + *_ndirents = ndirents; continue; } out: /* free if the candidate is valid */ -- GitLab From d4c7e4b1b0249598d2093d09be5a286f81fc4a36 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 16 Feb 2024 23:47:08 +0100 Subject: [PATCH 0392/2290] serial: amba-pl011: Fix DMA transmission in RS485 mode commit 3b69e32e151bc4a4e3c785cbdb1f918d5ee337ed upstream. When DMA is used in RS485 mode make sure that the UARTs tx section is enabled before the DMA buffers are queued for transmission. Cc: stable@vger.kernel.org Fixes: 8d479237727c ("serial: amba-pl011: add RS485 support") Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20240216224709.9928-2-l.sanfilippo@kunbus.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 60 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index c74eaf2552c32..2f0f05259778a 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1345,11 +1345,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap) } } +static void pl011_rs485_tx_start(struct uart_amba_port *uap) +{ + struct uart_port *port = &uap->port; + u32 cr; + + /* Enable transmitter */ + cr = pl011_read(uap, REG_CR); + cr |= UART011_CR_TXE; + + /* Disable receiver if half-duplex */ + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + cr &= ~UART011_CR_RXE; + + if (port->rs485.flags & SER_RS485_RTS_ON_SEND) + cr &= ~UART011_CR_RTS; + else + cr |= UART011_CR_RTS; + + pl011_write(cr, uap, REG_CR); + + if (port->rs485.delay_rts_before_send) + mdelay(port->rs485.delay_rts_before_send); + + uap->rs485_tx_started = true; +} + static void pl011_start_tx(struct uart_port *port) { struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + if ((uap->port.rs485.flags & SER_RS485_ENABLED) && + !uap->rs485_tx_started) + pl011_rs485_tx_start(uap); + if (!pl011_dma_tx_start(uap)) pl011_start_tx_pio(uap); } @@ -1431,42 +1461,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, return true; } -static void pl011_rs485_tx_start(struct uart_amba_port *uap) -{ - struct uart_port *port = &uap->port; - u32 cr; - - /* Enable transmitter */ - cr = pl011_read(uap, REG_CR); - cr |= UART011_CR_TXE; - - /* Disable receiver if half-duplex */ - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - cr &= ~UART011_CR_RXE; - - if (port->rs485.flags & SER_RS485_RTS_ON_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - - pl011_write(cr, uap, REG_CR); - - if (port->rs485.delay_rts_before_send) - mdelay(port->rs485.delay_rts_before_send); - - uap->rs485_tx_started = true; -} - /* Returns true if tx interrupts have to be (kept) enabled */ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) { struct circ_buf *xmit = &uap->port.state->xmit; int count = uap->fifosize >> 1; - if ((uap->port.rs485.flags & SER_RS485_ENABLED) && - !uap->rs485_tx_started) - pl011_rs485_tx_start(uap); - if (uap->port.x_char) { if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) return true; -- GitLab From 4dc87908b1836ac7194fd4bd2c89ff3d79ebc075 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 16 Feb 2024 00:41:02 +0000 Subject: [PATCH 0393/2290] usb: dwc3: gadget: Don't disconnect if not started commit b191a18cb5c47109ca696370a74a5062a70adfd0 upstream. Don't go through soft-disconnection sequence if the controller hasn't started. Otherwise, there will be timeout and warning reports from the soft-disconnection flow. Cc: stable@vger.kernel.org Fixes: 61a348857e86 ("usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/linux-usb/20240215233536.7yejlj3zzkl23vjd@synopsys.com/T/#mb0661cd5f9272602af390c18392b9a36da4f96e6 Tested-by: Marek Szyprowski Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/e3be9b929934e0680a6f4b8f6eb11b18ae9c7e07.1708043922.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 576c21bf77cda..b134110cc2ed5 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2548,6 +2548,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) int ret; spin_lock_irqsave(&dwc->lock, flags); + if (!dwc->pullups_connected) { + spin_unlock_irqrestore(&dwc->lock, flags); + return 0; + } + dwc->connected = false; /* -- GitLab From 748cee4417f68b09ca8dbf8b702803ad2d082010 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 6 Feb 2024 11:40:18 +0100 Subject: [PATCH 0394/2290] usb: cdnsp: blocked some cdns3 specific code commit 18a6be674306c9acb05c08e5c3fd376ef50a917c upstream. host.c file has some parts of code that were introduced for CDNS3 driver and should not be used with CDNSP driver. This patch blocks using these parts of codes by CDNSP driver. These elements include: - xhci_plat_cdns3_xhci object - cdns3 specific XECP_PORT_CAP_REG register - cdns3 specific XECP_AUX_CTRL_REG1 register cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20240206104018.48272-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/host.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 6164fc4c96a49..ceca4d839dfd4 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -18,6 +18,11 @@ #include "../host/xhci.h" #include "../host/xhci-plat.h" +/* + * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only + * in Cadence USB3 dual-role controller, so it can't be used + * with Cadence CDNSP dual-role controller. + */ #define XECP_PORT_CAP_REG 0x8000 #define XECP_AUX_CTRL_REG1 0x8120 @@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .resume_quirk = xhci_cdns3_resume_quirk, }; +static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; + static int __cdns_host_init(struct cdns *cdns) { struct platform_device *xhci; @@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns) goto err1; } - cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, - sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (cdns->version < CDNSP_CONTROLLER_V2) + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + else + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (!cdns->xhci_plat_data) { ret = -ENOMEM; goto err1; -- GitLab From c66a8008489bddef8d9bab3545a7d868d95a8b38 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 15 Feb 2024 13:16:09 +0100 Subject: [PATCH 0395/2290] usb: cdnsp: fixed issue with incorrect detecting CDNSP family controllers commit 47625b018c6bc788bc10dd654c82696eb0a5ef11 upstream. Cadence have several controllers from 0x000403xx family but current driver suuport detecting only one with DID equal 0x0004034E. It causes that if someone uses different CDNSP controller then driver will use incorrect version and register space. Patch fix this issue. cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20240215121609.259772-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.c | 1 - drivers/usb/cdns3/drd.c | 13 +++++++++---- drivers/usb/cdns3/drd.h | 6 +++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 7b20d2d5c262e..7242591b346bc 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -394,7 +394,6 @@ pm_put: return ret; } - /** * cdns_wakeup_irq - interrupt handler for wakeup events * @irq: irq number for cdns3/cdnsp core device diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index d00ff98dffabf..33ba30f79b337 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns) */ static void cdns_otg_disable_irq(struct cdns *cdns) { - writel(0, &cdns->otg_irq_regs->ien); + if (cdns->version) + writel(0, &cdns->otg_irq_regs->ien); } /** @@ -418,15 +419,20 @@ int cdns_drd_init(struct cdns *cdns) cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd; - if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) { + state = readl(&cdns->otg_cdnsp_regs->did); + + if (OTG_CDNSP_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_cdnsp_regs->ien; cdns->version = CDNSP_CONTROLLER_V2; - } else { + } else if (OTG_CDNS3_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_v1_regs->ien; writel(1, &cdns->otg_v1_regs->simulate); cdns->version = CDNS3_CONTROLLER_V1; + } else { + dev_err(cdns->dev, "not supporte DID=0x%08x\n", state); + return -EINVAL; } dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n", @@ -479,7 +485,6 @@ int cdns_drd_exit(struct cdns *cdns) return 0; } - /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h index cbdf94f73ed91..d72370c321d39 100644 --- a/drivers/usb/cdns3/drd.h +++ b/drivers/usb/cdns3/drd.h @@ -79,7 +79,11 @@ struct cdnsp_otg_regs { __le32 susp_timing_ctrl; }; -#define OTG_CDNSP_DID 0x0004034E +/* CDNSP driver supports 0x000403xx Cadence USB controller family. */ +#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300) + +/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */ +#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200) /* * Common registers interface for both CDNS3 and CDNSP version of DRD. -- GitLab From 2134e9906e17b1e5284300fab547869ebacfd7d9 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 2 Feb 2024 10:42:16 -0500 Subject: [PATCH 0396/2290] usb: cdns3: fixed memory use after free at cdns3_gadget_ep_disable() commit cd45f99034b0c8c9cb346dd0d6407a95ca3d36f6 upstream. ... cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); list_del_init(&priv_req->list); ... 'priv_req' actually free at cdns3_gadget_ep_free_request(). But list_del_init() use priv_req->list after it. [ 1542.642868][ T534] BUG: KFENCE: use-after-free read in __list_del_entry_valid+0x10/0xd4 [ 1542.642868][ T534] [ 1542.653162][ T534] Use-after-free read at 0x000000009ed0ba99 (in kfence-#3): [ 1542.660311][ T534] __list_del_entry_valid+0x10/0xd4 [ 1542.665375][ T534] cdns3_gadget_ep_disable+0x1f8/0x388 [cdns3] [ 1542.671571][ T534] usb_ep_disable+0x44/0xe4 [ 1542.675948][ T534] ffs_func_eps_disable+0x64/0xc8 [ 1542.680839][ T534] ffs_func_set_alt+0x74/0x368 [ 1542.685478][ T534] ffs_func_disable+0x18/0x28 Move list_del_init() before cdns3_gadget_ep_free_request() to resolve this problem. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240202154217.661867-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index ccdd525bd7c80..fb7d9d35cf625 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2537,11 +2537,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep) while (!list_empty(&priv_ep->wa2_descmiss_req_list)) { priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list); + list_del_init(&priv_req->list); kfree(priv_req->request.buf); cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); - list_del_init(&priv_req->list); --priv_ep->wa2_counter; } -- GitLab From 9a52b694b066f299d8b9800854a8503457a8b64c Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 2 Feb 2024 10:42:17 -0500 Subject: [PATCH 0397/2290] usb: cdns3: fix memory double free when handle zero packet commit 5fd9e45f1ebcd57181358af28506e8a661a260b3 upstream. 829 if (request->complete) { 830 spin_unlock(&priv_dev->lock); 831 usb_gadget_giveback_request(&priv_ep->endpoint, 832 request); 833 spin_lock(&priv_dev->lock); 834 } 835 836 if (request->buf == priv_dev->zlp_buf) 837 cdns3_gadget_ep_free_request(&priv_ep->endpoint, request); Driver append an additional zero packet request when queue a packet, which length mod max packet size is 0. When transfer complete, run to line 831, usb_gadget_giveback_request() will free this requestion. 836 condition is true, so cdns3_gadget_ep_free_request() free this request again. Log: [ 1920.140696][ T150] BUG: KFENCE: use-after-free read in cdns3_gadget_giveback+0x134/0x2c0 [cdns3] [ 1920.140696][ T150] [ 1920.151837][ T150] Use-after-free read at 0x000000003d1cd10b (in kfence-#36): [ 1920.159082][ T150] cdns3_gadget_giveback+0x134/0x2c0 [cdns3] [ 1920.164988][ T150] cdns3_transfer_completed+0x438/0x5f8 [cdns3] Add check at line 829, skip call usb_gadget_giveback_request() if it is additional zero length packet request. Needn't call usb_gadget_giveback_request() because it is allocated in this driver. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240202154217.661867-2-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index fb7d9d35cf625..2b8f98f0707e7 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -826,7 +826,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, return; } - if (request->complete) { + /* + * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify + * gadget composite driver. + */ + if (request->complete && request->buf != priv_dev->zlp_buf) { spin_unlock(&priv_dev->lock); usb_gadget_giveback_request(&priv_ep->endpoint, request); -- GitLab From 35b604a37ec70d68b19dafd10bbacf1db505c9ca Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Mon, 5 Feb 2024 13:16:50 +0530 Subject: [PATCH 0398/2290] usb: gadget: ncm: Avoid dropping datagrams of properly parsed NTBs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 76c51146820c5dac629f21deafab0a7039bc3ccd upstream. It is observed sometimes when tethering is used over NCM with Windows 11 as host, at some instances, the gadget_giveback has one byte appended at the end of a proper NTB. When the NTB is parsed, unwrap call looks for any leftover bytes in SKB provided by u_ether and if there are any pending bytes, it treats them as a separate NTB and parses it. But in case the second NTB (as per unwrap call) is faulty/corrupt, all the datagrams that were parsed properly in the first NTB and saved in rx_list are dropped. Adding a few custom traces showed the following: [002] d..1 7828.532866: dwc3_gadget_giveback: ep1out: req 000000003868811a length 1025/16384 zsI ==> 0 [002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb toprocess: 1025 [002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb seq: 0xce67 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x400 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb ndp_len: 0x10 [002] d..1 7828.532869: ncm_unwrap_ntb: K: Parsed NTB with 1 frames In this case, the giveback is of 1025 bytes and block length is 1024. The rest 1 byte (which is 0x00) won't be parsed resulting in drop of all datagrams in rx_list. Same is case with packets of size 2048: [002] d..1 7828.557948: dwc3_gadget_giveback: ep1out: req 0000000011dfd96e length 2049/16384 zsI ==> 0 [002] d..1 7828.557949: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342 [002] d..1 7828.557950: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x800 Lecroy shows one byte coming in extra confirming that the byte is coming in from PC: Transfer 2959 - Bytes Transferred(1025) Timestamp((18.524 843 590) - Transaction 8391 - Data(1025 bytes) Timestamp(18.524 843 590) --- Packet 4063861 Data(1024 bytes) Duration(2.117us) Idle(14.700ns) Timestamp(18.524 843 590) --- Packet 4063863 Data(1 byte) Duration(66.160ns) Time(282.000ns) Timestamp(18.524 845 722) According to Windows driver, no ZLP is needed if wBlockLength is non-zero, because the non-zero wBlockLength has already told the function side the size of transfer to be expected. However, there are in-market NCM devices that rely on ZLP as long as the wBlockLength is multiple of wMaxPacketSize. To deal with such devices, it pads an extra 0 at end so the transfer is no longer multiple of wMaxPacketSize. Cc: Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240205074650.200304-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index bbb6ff6b11aa1..5e78fcc63e4d3 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1340,7 +1340,15 @@ parse_ntb: "Parsed NTB with %d frames\n", dgram_counter); to_process -= block_len; - if (to_process != 0) { + + /* + * Windows NCM driver avoids USB ZLPs by adding a 1-byte + * zero pad as needed. + */ + if (to_process == 1 && + (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { + to_process--; + } else if (to_process > 0) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } -- GitLab From 0158216805ca7e498d07de38840d2732166ae5fa Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:38 +0800 Subject: [PATCH 0399/2290] usb: roles: fix NULL pointer issue when put module's reference commit 1c9be13846c0b2abc2480602f8ef421360e1ad9e upstream. In current design, usb role class driver will get usb_role_switch parent's module reference after the user get usb_role_switch device and put the reference after the user put the usb_role_switch device. However, the parent device of usb_role_switch may be removed before the user put the usb_role_switch. If so, then, NULL pointer issue will be met when the user put the parent module's reference. This will save the module pointer in structure of usb_role_switch. Then, we don't need to find module by iterating long relations. Fixes: 5c54fcac9a9d ("usb: roles: Take care of driver module reference counting") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index 32e6d19f7011a..d90d164238099 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -19,6 +19,7 @@ static struct class *role_class; struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ + struct module *module; /* the module this device depends on */ enum usb_role role; /* From descriptor */ @@ -133,7 +134,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -155,7 +156,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -170,7 +171,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev.parent->driver->owner); + module_put(sw->module); put_device(&sw->dev); } } @@ -187,15 +188,18 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; + struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(role_class, fwnode); - if (dev) - WARN_ON(!try_module_get(dev->parent->driver->owner)); + if (dev) { + sw = to_role_switch(dev); + WARN_ON(!try_module_get(sw->module)); + } - return dev ? to_role_switch(dev) : NULL; + return sw; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -337,6 +341,7 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; + sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = role_class; -- GitLab From 2f414a56b369129cfdac6ecaf9899f9807a264aa Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:39 +0800 Subject: [PATCH 0400/2290] usb: roles: don't get/set_role() when usb_role_switch is unregistered commit b787a3e781759026a6212736ef8e52cf83d1821a upstream. There is a possibility that usb_role_switch device is unregistered before the user put usb_role_switch. In this case, the user may still want to get/set_role() since the user can't sense the changes of usb_role_switch. This will add a flag to show if usb_role_switch is already registered and avoid unwanted behaviors. Fixes: fde0aa6c175a ("usb: common: Small class for USB role switches") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index d90d164238099..a327f8bc57043 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -21,6 +21,7 @@ struct usb_role_switch { struct mutex lock; /* device lock*/ struct module *module; /* the module this device depends on */ enum usb_role role; + bool registered; /* From descriptor */ struct device *usb2_port; @@ -47,6 +48,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) if (IS_ERR_OR_NULL(sw)) return 0; + if (!sw->registered) + return -EOPNOTSUPP; + mutex_lock(&sw->lock); ret = sw->set(sw, role); @@ -72,7 +76,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) { enum usb_role role; - if (IS_ERR_OR_NULL(sw)) + if (IS_ERR_OR_NULL(sw) || !sw->registered) return USB_ROLE_NONE; mutex_lock(&sw->lock); @@ -356,6 +360,8 @@ usb_role_switch_register(struct device *parent, return ERR_PTR(ret); } + sw->registered = true; + /* TODO: Symlinks for the host port and the device controller. */ return sw; @@ -370,8 +376,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register); */ void usb_role_switch_unregister(struct usb_role_switch *sw) { - if (!IS_ERR_OR_NULL(sw)) + if (!IS_ERR_OR_NULL(sw)) { + sw->registered = false; device_unregister(&sw->dev); + } } EXPORT_SYMBOL_GPL(usb_role_switch_unregister); -- GitLab From 42a841a84ffd84f8166b9b4c5897a5f30feb7e1c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 14 Apr 2023 17:47:06 +0200 Subject: [PATCH 0401/2290] mptcp: make userspace_pm_append_new_local_addr static commit aa5887dca2d236fc50000e27023d4d78dce3af30 upstream. mptcp_userspace_pm_append_new_local_addr() has always exclusively been used in pm_userspace.c since its introduction in commit 4638de5aefe5 ("mptcp: handle local addrs announced by userspace PMs"). So make it static. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 4 ++-- net/mptcp/protocol.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2e1e0d0e3ec60..4acd31f84d213 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -25,8 +25,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } } -int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) +static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 259672cc344f3..b092205213234 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -834,8 +834,6 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); -int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry); void mptcp_free_local_addr_list(struct mptcp_sock *msk); int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); -- GitLab From 9e8e59af3a4aad2494e0ea23c8bda2433a338349 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 15 Feb 2024 19:25:28 +0100 Subject: [PATCH 0402/2290] mptcp: add needs_id for userspace appending addr commit 6c347be62ae963b301ead8e7fa7b9973e6e0d6e1 upstream. When userspace PM requires to create an ID 0 subflow in "userspace pm create id 0 subflow" test like this: userspace_pm_add_sf $ns2 10.0.3.2 0 An ID 1 subflow, in fact, is created. Since in mptcp_pm_nl_append_new_local_addr(), 'id 0' will be treated as no ID is set by userspace, and will allocate a new ID immediately: if (!e->addr.id) e->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); To solve this issue, a new parameter needs_id is added for mptcp_userspace_pm_append_new_local_addr() to distinguish between whether userspace PM has set an ID 0 or whether userspace PM has not set any address. needs_id is true in mptcp_userspace_pm_get_local_id(), but false in mptcp_pm_nl_announce_doit() and mptcp_pm_nl_subflow_create_doit(). Fixes: e5ed101a6028 ("mptcp: userspace pm allow creating id 0 subflow") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 4acd31f84d213..2895be3046f79 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; @@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); - if (addr_match && entry->addr.id == 0) + if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match && id_match) { @@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, } *e = *entry; - if (!e->addr.id) + if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); @@ -155,7 +156,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, if (new_entry.addr.port == msk_sport) new_entry.addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) @@ -197,7 +198,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto announce_err; @@ -335,7 +336,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) } local.addr = addr_l; - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; -- GitLab From 71787c665d09a970b9280c285181d3a2d1bf3bb0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:30 +0100 Subject: [PATCH 0403/2290] mptcp: fix lockless access in subflow ULP diag commit b8adb69a7d29c2d33eb327bca66476fb6066516b upstream. Since the introduction of the subflow ULP diag interface, the dump callback accessed all the subflow data with lockless. We need either to annotate all the read and write operation accordingly, or acquire the subflow socket lock. Let's do latter, even if slower, to avoid a diffstat havoc. Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- net/mptcp/diag.c | 6 +++++- net/tls/tls_main.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4c838f7290dd9..8ea1fba84eff9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2290,7 +2290,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a536586742f28..e57c5f47f0351 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,19 @@ #include #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -69,11 +71,13 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 80b42a3e78830..6b7189a520af7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1098,7 +1098,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; -- GitLab From a6cada89ee5af2194f3ae62bebe4a821eaebe690 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Mon, 29 Jan 2024 17:33:40 -0500 Subject: [PATCH 0404/2290] Revert "drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz" commit a538dabf772c169641e151834e161e241802ab33 upstream. [why]: This reverts commit 2ff33c759a4247c84ec0b7815f1f223e155ba82a. The commit caused corruption when running some applications in fullscreen Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Alvin Lee Acked-by: Aurabindo Pillai Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index baecc0ffe7580..85e0d1c2a9085 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2123,7 +2123,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; -- GitLab From f0d857ce31a6bc7a82afcdbadb8f7417d482604b Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 12 Jan 2024 16:55:23 +0800 Subject: [PATCH 0405/2290] IB/hfi1: Fix a memleak in init_credit_return [ Upstream commit 809aa64ebff51eb170ee31a95f83b2d21efa32e2 ] When dma_alloc_coherent fails to allocate dd->cr_base[i].va, init_credit_return should deallocate dd->cr_base and dd->cr_base[i] that allocated before. Or those resources would be never freed and a memleak is triggered. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Signed-off-by: Zhipeng Lu Link: https://lore.kernel.org/r/20240112085523.3731720-1-alexious@zju.edu.cn Acked-by: Dennis Dalessandro Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/pio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 51ae58c02b15c..802b0e5801a7d 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -2089,7 +2089,7 @@ int init_credit_return(struct hfi1_devdata *dd) "Unable to allocate credit return DMA range for NUMA %d\n", i); ret = -ENOMEM; - goto done; + goto free_cr_base; } } set_dev_node(&dd->pcidev->dev, dd->node); @@ -2097,6 +2097,10 @@ int init_credit_return(struct hfi1_devdata *dd) ret = 0; done: return ret; + +free_cr_base: + free_credit_return(dd); + goto done; } void free_credit_return(struct hfi1_devdata *dd) -- GitLab From 75a64c641cf2c694556e8113e475235d7994c15e Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:36 -0800 Subject: [PATCH 0406/2290] RDMA/bnxt_re: Return error for SRQ resize [ Upstream commit 3687b450c5f32e80f179ce4b09e0454da1449eac ] SRQ resize is not supported in the driver. But driver is not returning error from bnxt_re_modify_srq() for SRQ resize. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4ed8814efde6f..6ed0568747eaa 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1710,7 +1710,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, switch (srq_attr_mask) { case IB_SRQ_MAX_WR: /* SRQ resize is not supported */ - break; + return -EINVAL; case IB_SRQ_LIMIT: /* Change the SRQ threshold */ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe) @@ -1725,13 +1725,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ - break; + return 0; default: ibdev_err(&rdev->ibdev, "Unsupported srq_attr_mask 0x%x", srq_attr_mask); return -EINVAL; } - return 0; } int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) -- GitLab From b2e4a5266e3d133b4c7f0e43bf40d13ce14fd1aa Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 31 Jan 2024 17:38:46 -0600 Subject: [PATCH 0407/2290] RDMA/irdma: Fix KASAN issue with tasklet [ Upstream commit bd97cea7b18a0a553773af806dfbfac27a7c4acb ] KASAN testing revealed the following issue assocated with freeing an IRQ. [50006.466686] Call Trace: [50006.466691] [50006.489538] dump_stack+0x5c/0x80 [50006.493475] print_address_description.constprop.6+0x1a/0x150 [50006.499872] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.505742] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.511644] kasan_report.cold.11+0x7f/0x118 [50006.516572] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.522473] irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.528232] irdma_process_ceq+0xb2/0x400 [irdma] [50006.533601] ? irdma_hw_flush_wqes_callback+0x370/0x370 [irdma] [50006.540298] irdma_ceq_dpc+0x44/0x100 [irdma] [50006.545306] tasklet_action_common.isra.14+0x148/0x2c0 [50006.551096] __do_softirq+0x1d0/0xaf8 [50006.555396] irq_exit_rcu+0x219/0x260 [50006.559670] irq_exit+0xa/0x20 [50006.563320] smp_apic_timer_interrupt+0x1bf/0x690 [50006.568645] apic_timer_interrupt+0xf/0x20 [50006.573341] The issue is that a tasklet could be pending on another core racing the delete of the irq. Fix by insuring any scheduled tasklet is killed after deleting the irq. Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-2-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/hw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 311a1138e838d..12bd38b1e2c15 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -562,6 +562,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); + if (rf == dev_id) { + tasklet_kill(&rf->dpc_tasklet); + } else { + struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id; + + tasklet_kill(&iwceq->dpc_tasklet); + } } /** -- GitLab From 429999729d4a85126cec4a1305db67d9a7774545 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 31 Jan 2024 17:38:47 -0600 Subject: [PATCH 0408/2290] RDMA/irdma: Validate max_send_wr and max_recv_wr [ Upstream commit ee107186bcfd25d7873258f3f75440e20f5e6416 ] Validate that max_send_wr and max_recv_wr is within the supported range. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Change-Id: I2fc8b10292b641fddd20b36986a9dae90a93f4be Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-3-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 01faec6ea5285..0fbc39df1e7d4 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -762,7 +762,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || - init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) + init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || + init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || + init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { -- GitLab From 6f4553096eceefe5b41e4c1bc03d9892cb903284 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:38:48 -0600 Subject: [PATCH 0409/2290] RDMA/irdma: Set the CQ read threshold for GEN 1 [ Upstream commit 666047f3ece9f991774c1fe9b223139a9ef8908d ] The CQ shadow read threshold is currently not set for GEN 2. This could cause an invalid CQ overflow condition, so remove the GEN check that exclused GEN 1. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-4-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 0fbc39df1e7d4..42c671f209233 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2121,9 +2121,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.cq_base_pa = iwcq->kmem.pa; } - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) - info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, - (u32)IRDMA_MAX_CQ_READ_THRESH); + info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n"); -- GitLab From edc2a9afbebda04a43dd9bb4217b1becce0efc03 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:38:49 -0600 Subject: [PATCH 0410/2290] RDMA/irdma: Add AE for too many RNRS [ Upstream commit 630bdb6f28ca9e5ff79e244030170ac788478332 ] Add IRDMA_AE_LLP_TOO_MANY_RNRS to the list of AE's processed as an abnormal asyncronous event. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-5-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/defs.h | 1 + drivers/infiniband/hw/irdma/hw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index ad54260cb58c9..ebe98fa2b1cd2 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -345,6 +345,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 12bd38b1e2c15..918a2d783141f 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -379,6 +379,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: + case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: -- GitLab From aee4dcfe17219fe60f2821923adea98549060af8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 4 Feb 2024 16:42:07 -0800 Subject: [PATCH 0411/2290] RDMA/srpt: Support specifying the srpt_service_guid parameter [ Upstream commit fdfa083549de5d50ebf7f6811f33757781e838c0 ] Make loading ib_srpt with this parameter set work. The current behavior is that setting that parameter while loading the ib_srpt kernel module triggers the following kernel crash: BUG: kernel NULL pointer dereference, address: 0000000000000000 Call Trace: parse_one+0x18c/0x1d0 parse_args+0xe1/0x230 load_module+0x8de/0xa60 init_module_from_file+0x8b/0xd0 idempotent_init_module+0x181/0x240 __x64_sys_finit_module+0x5a/0xb0 do_syscall_64+0x5f/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Cc: LiHonggang Reported-by: LiHonggang Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240205004207.17031-1-bvanassche@acm.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 25e799dba999e..4607d37b9224a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); -- GitLab From 8d3a5cbc1e92a0da912578afa44506679339aaf3 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:07 +0000 Subject: [PATCH 0412/2290] iommufd/iova_bitmap: Bounds check mapped::pages access [ Upstream commit a4ab7dedaee0e39b15653c5fd0367e420739f7ef ] Dirty IOMMU hugepages reported on a base page page-size granularity can lead to an attempt to set dirty pages in the bitmap beyond the limits that are pinned. Bounds check the page index of the array we are trying to access is within the limits before we kmap() and return otherwise. While it is also a defensive check, this is also in preparation to defer setting bits (outside the mapped range) to the next iteration(s) when the pages become available. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-2-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 0f19d502f351b..5b540a164c98f 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -405,6 +405,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_page_idx = mapped->npages - 1; do { unsigned int page_idx = cur_bit / BITS_PER_PAGE; @@ -413,6 +414,9 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, last_bit - cur_bit + 1); void *kaddr; + if (unlikely(page_idx > last_page_idx)) + break; + kaddr = kmap_local_page(mapped->pages[page_idx]); bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); -- GitLab From c5bc02f60d278637101837c3c295021efeca74d9 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:08 +0000 Subject: [PATCH 0413/2290] iommufd/iova_bitmap: Switch iova_bitmap::bitmap to an u8 array [ Upstream commit d18411ec305728c6371806c4fb09be07016aad0b ] iova_bitmap_mapped_length() don't deal correctly with the small bitmaps (< 2M bitmaps) when the starting address isn't u64 aligned, leading to skipping a tiny part of the IOVA range. This is materialized as not marking data dirty that should otherwise have been. Fix that by using a u8 * in the internal state of IOVA bitmap. Most of the data structures use the type of the bitmap to adjust its indexes, thus changing the type of the bitmap decreases the granularity of the bitmap indexes. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-3-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 5b540a164c98f..c748a1e3ba53a 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -99,7 +99,7 @@ struct iova_bitmap { struct iova_bitmap_map mapped; /* userspace address of the bitmap */ - u64 __user *bitmap; + u8 __user *bitmap; /* u64 index that @mapped points to */ unsigned long mapped_base_index; @@ -161,7 +161,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) { struct iova_bitmap_map *mapped = &bitmap->mapped; unsigned long npages; - u64 __user *addr; + u8 __user *addr; long ret; /* @@ -246,7 +246,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, mapped = &bitmap->mapped; mapped->pgshift = __ffs(page_size); - bitmap->bitmap = data; + bitmap->bitmap = (u8 __user *)data; bitmap->mapped_total_index = iova_bitmap_offset_to_index(bitmap, length - 1) + 1; bitmap->iova = iova; @@ -301,7 +301,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - bytes / sizeof(*bitmap->bitmap)); + DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); return remaining; } -- GitLab From 47e93d2f286eb062175ee1d89128887f0a2e8dc5 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:15 +0000 Subject: [PATCH 0414/2290] iommufd/iova_bitmap: Consider page offset for the pages to be pinned [ Upstream commit 4bbcbc6ea2fa379632a24c14cfb47aa603816ac6 ] For small bitmaps that aren't PAGE_SIZE aligned *and* that are less than 512 pages in bitmap length, use an extra page to be able to cover the entire range e.g. [1M..3G] which would be iterated more efficiently in a single iteration, rather than two. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-10-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index c748a1e3ba53a..dfab5b742191a 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -174,18 +174,19 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) bitmap->mapped_base_index) * sizeof(*bitmap->bitmap), PAGE_SIZE); - /* - * We always cap at max number of 'struct page' a base page can fit. - * This is, for example, on x86 means 2M of bitmap data max. - */ - npages = min(npages, PAGE_SIZE / sizeof(struct page *)); - /* * Bitmap address to be pinned is calculated via pointer arithmetic * with bitmap u64 word index. */ addr = bitmap->bitmap + bitmap->mapped_base_index; + /* + * We always cap at max number of 'struct page' a base page can fit. + * This is, for example, on x86 means 2M of bitmap data max. + */ + npages = min(npages + !!offset_in_page(addr), + PAGE_SIZE / sizeof(struct page *)); + ret = pin_user_pages_fast((unsigned long)addr, npages, FOLL_WRITE, mapped->pages); if (ret <= 0) -- GitLab From 7f31a244c753aacf40b71d01f03ca6742f81bbbc Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 8 Feb 2024 17:36:28 -0500 Subject: [PATCH 0415/2290] RDMA/qedr: Fix qedr_create_user_qp error flow [ Upstream commit 5ba4e6d5863c53e937f49932dee0ecb004c65928 ] Avoid the following warning by making sure to free the allocated resources in case that qedr_init_user_queue() fail. -----------[ cut here ]----------- WARNING: CPU: 0 PID: 143192 at drivers/infiniband/core/rdma_core.c:874 uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Modules linked in: tls target_core_user uio target_core_pscsi target_core_file target_core_iblock ib_srpt ib_srp scsi_transport_srp nfsd nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs 8021q garp mrp stp llc ext4 mbcache jbd2 opa_vnic ib_umad ib_ipoib sunrpc rdma_ucm ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm hfi1 intel_rapl_msr intel_rapl_common mgag200 qedr sb_edac drm_shmem_helper rdmavt x86_pkg_temp_thermal drm_kms_helper intel_powerclamp ib_uverbs coretemp i2c_algo_bit kvm_intel dell_wmi_descriptor ipmi_ssif sparse_keymap kvm ib_core rfkill syscopyarea sysfillrect video sysimgblt irqbypass ipmi_si ipmi_devintf fb_sys_fops rapl iTCO_wdt mxm_wmi iTCO_vendor_support intel_cstate pcspkr dcdbas intel_uncore ipmi_msghandler lpc_ich acpi_power_meter mei_me mei fuse drm xfs libcrc32c qede sd_mod ahci libahci t10_pi sg crct10dif_pclmul crc32_pclmul crc32c_intel qed libata tg3 ghash_clmulni_intel megaraid_sas crc8 wmi [last unloaded: ib_srpt] CPU: 0 PID: 143192 Comm: fi_rdm_tagged_p Kdump: loaded Not tainted 5.14.0-408.el9.x86_64 #1 Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 2.14.0 01/25/2022 RIP: 0010:uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Code: 5d 41 5c 41 5d 41 5e e9 0f 26 1b dd 48 89 df e8 67 6a ff ff 49 8b 86 10 01 00 00 48 85 c0 74 9c 4c 89 e7 e8 83 c0 cb dd eb 92 <0f> 0b eb be 0f 0b be 04 00 00 00 48 89 df e8 8e f5 ff ff e9 6d ff RSP: 0018:ffffb7c6cadfbc60 EFLAGS: 00010286 RAX: ffff8f0889ee3f60 RBX: ffff8f088c1a5200 RCX: 00000000802a0016 RDX: 00000000802a0017 RSI: 0000000000000001 RDI: ffff8f0880042600 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8f11fffd5000 R11: 0000000000039000 R12: ffff8f0d5b36cd80 R13: ffff8f088c1a5250 R14: ffff8f1206d91000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8f11d7c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000147069200e20 CR3: 00000001c7210002 CR4: 00000000001706f0 Call Trace: ? show_trace_log_lvl+0x1c4/0x2df ? show_trace_log_lvl+0x1c4/0x2df ? ib_uverbs_close+0x1f/0xb0 [ib_uverbs] ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? __warn+0x81/0x110 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? report_bug+0x10a/0x140 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ib_uverbs_close+0x1f/0xb0 [ib_uverbs] __fput+0x94/0x250 task_work_run+0x5c/0x90 do_exit+0x270/0x4a0 do_group_exit+0x2d/0x90 get_signal+0x87c/0x8c0 arch_do_signal_or_restart+0x25/0x100 ? ib_uverbs_ioctl+0xc2/0x110 [ib_uverbs] exit_to_user_mode_loop+0x9c/0x130 exit_to_user_mode_prepare+0xb6/0x100 syscall_exit_to_user_mode+0x12/0x40 do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? do_syscall_64+0x69/0x90 ? common_interrupt+0x43/0xa0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x1470abe3ec6b Code: Unable to access opcode bytes at RIP 0x1470abe3ec41. RSP: 002b:00007fff13ce9108 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: fffffffffffffffc RBX: 00007fff13ce9218 RCX: 00001470abe3ec6b RDX: 00007fff13ce9200 RSI: 00000000c0181b01 RDI: 0000000000000004 RBP: 00007fff13ce91e0 R08: 0000558d9655da10 R09: 0000558d9655dd00 R10: 00007fff13ce95c0 R11: 0000000000000246 R12: 00007fff13ce9358 R13: 0000000000000013 R14: 0000558d9655db50 R15: 00007fff13ce9470 --[ end trace 888a9b92e04c5c97 ]-- Fixes: df15856132bc ("RDMA/qedr: restructure functions that create/destroy QPs") Signed-off-by: Kamal Heib Link: https://lore.kernel.org/r/20240208223628.2040841-1-kheib@redhat.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/verbs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d745ce9dc88aa..61755b5f3e20d 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); - if (rc) + if (rc) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, + qp->usq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + } return rc; + } } memset(&in_params, 0, sizeof(in_params)); -- GitLab From 5f69c475c147f24003081cc6d9a77eec97b5a64d Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 19 Jan 2024 11:16:56 +0100 Subject: [PATCH 0416/2290] arm64: dts: rockchip: set num-cs property for spi on px30 [ Upstream commit 334bf0710c98d391f4067b72f535d6c4c84dfb6f ] The px30 has two spi controllers with two chip-selects each. The num-cs property is specified as the total number of chip selects a controllers has and is used since 2020 to find uses of chipselects outside that range in the Rockchip spi driver. Without the property set, the default is 1, so spi devices using the second chipselect will not be created. Fixes: eb1262e3cc8b ("spi: spi-rockchip: use num-cs property and ctlr->enable_gpiods") Signed-off-by: Heiko Stuebner Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240119101656.965744-1-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/px30.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index bfa3580429d10..61f0186447dad 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -607,6 +607,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 12>, <&dmac 13>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>; #address-cells = <1>; @@ -622,6 +623,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 14>, <&dmac 15>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>; #address-cells = <1>; -- GitLab From a9409d33af61c8c7f58540aaca8abf12c0c99589 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:07:13 +0100 Subject: [PATCH 0417/2290] RDMA/srpt: fix function pointer cast warnings [ Upstream commit eb5c7465c3240151cd42a55c7ace9da0026308a1 ] clang-16 notices that srpt_qp_event() gets called through an incompatible pointer here: drivers/infiniband/ulp/srpt/ib_srpt.c:1815:5: error: cast from 'void (*)(struct ib_event *, struct srpt_rdma_ch *)' to 'void (*)(struct ib_event *, void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1815 | = (void(*)(struct ib_event *, void*))srpt_qp_event; Change srpt_qp_event() to use the correct prototype and adjust the argument inside of it. Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213100728.458348-1-arnd@kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 4607d37b9224a..cffa93f114a73 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -214,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s) /** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. - * @ch: SRPT RDMA channel. + * @ptr: SRPT RDMA channel. */ -static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +static void srpt_qp_event(struct ib_event *event, void *ptr) { + struct srpt_rdma_ch *ch = ptr; + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", event->event, ch, ch->sess_name, ch->qp->qp_num, get_ch_state_name(ch->state)); @@ -1811,8 +1813,7 @@ retry: ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; - qp_init->event_handler - = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->event_handler = srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; -- GitLab From 758b8f5e04988e95a9493fe7882075e73c76a82d Mon Sep 17 00:00:00 2001 From: Gianmarco Lusvardi Date: Tue, 13 Feb 2024 23:05:46 +0000 Subject: [PATCH 0418/2290] bpf, scripts: Correct GPL license name [ Upstream commit e37243b65d528a8a9f8b9a57a43885f8e8dfc15c ] The bpf_doc script refers to the GPL as the "GNU Privacy License". I strongly suspect that the author wanted to refer to the GNU General Public License, under which the Linux kernel is released, as, to the best of my knowledge, there is no license named "GNU Privacy License". This patch corrects the license name in the script accordingly. Fixes: 56a092c89505 ("bpf: add script and prepare bpf.h for new helpers documentation") Signed-off-by: Gianmarco Lusvardi Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240213230544.930018-3-glusvardi@posteo.net Signed-off-by: Sasha Levin --- scripts/bpf_doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index d5c389df6045e..4de98b7bbea95 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -495,7 +495,7 @@ eBPF programs can have an associated license, passed along with the bytecode instructions to the kernel when the programs are loaded. The format for that string is identical to the one in use for kernel modules (Dual licenses, such as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). +programs that are compatible with the GNU General Public License (GNU GPL). In order to use such helpers, the eBPF program must be loaded with the correct license string passed (via **attr**) to the **bpf**\ () system call, and this -- GitLab From 3c31b18a8dd8b7bf36af1cd723d455853b8f94fe Mon Sep 17 00:00:00 2001 From: Don Brace Date: Tue, 13 Feb 2024 10:22:00 -0600 Subject: [PATCH 0419/2290] scsi: smartpqi: Fix disable_managed_interrupts [ Upstream commit 5761eb9761d2d5fe8248a9b719efc4d8baf1f24a ] Correct blk-mq registration issue with module parameter disable_managed_interrupts enabled. When we turn off the default PCI_IRQ_AFFINITY flag, the driver needs to register with blk-mq using blk_mq_map_queues(). The driver is currently calling blk_mq_pci_map_queues() which results in a stack trace and possibly undefined behavior. Stack Trace: [ 7.860089] scsi host2: smartpqi [ 7.871934] WARNING: CPU: 0 PID: 238 at block/blk-mq-pci.c:52 blk_mq_pci_map_queues+0xca/0xd0 [ 7.889231] Modules linked in: sd_mod t10_pi sg uas smartpqi(+) crc32c_intel scsi_transport_sas usb_storage dm_mirror dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse [ 7.924755] CPU: 0 PID: 238 Comm: kworker/0:3 Not tainted 4.18.0-372.88.1.el8_6_smartpqi_test.x86_64 #1 [ 7.944336] Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 03/08/2022 [ 7.963026] Workqueue: events work_for_cpu_fn [ 7.978275] RIP: 0010:blk_mq_pci_map_queues+0xca/0xd0 [ 7.978278] Code: 48 89 de 89 c7 e8 f6 0f 4f 00 3b 05 c4 b7 8e 01 72 e1 5b 31 c0 5d 41 5c 41 5d 41 5e 41 5f e9 7d df 73 00 31 c0 e9 76 df 73 00 <0f> 0b eb bc 90 90 0f 1f 44 00 00 41 57 49 89 ff 41 56 41 55 41 54 [ 7.978280] RSP: 0018:ffffa95fc3707d50 EFLAGS: 00010216 [ 7.978283] RAX: 00000000ffffffff RBX: 0000000000000000 RCX: 0000000000000010 [ 7.978284] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffff9190c32d4310 [ 7.978286] RBP: 0000000000000000 R08: ffffa95fc3707d38 R09: ffff91929b81ac00 [ 7.978287] R10: 0000000000000001 R11: ffffa95fc3707ac0 R12: 0000000000000000 [ 7.978288] R13: ffff9190c32d4000 R14: 00000000ffffffff R15: ffff9190c4c950a8 [ 7.978290] FS: 0000000000000000(0000) GS:ffff9193efc00000(0000) knlGS:0000000000000000 [ 7.978292] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.172814] CR2: 000055d11166c000 CR3: 00000002dae10002 CR4: 00000000007706f0 [ 8.172816] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 8.172817] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 8.172818] PKRU: 55555554 [ 8.172819] Call Trace: [ 8.172823] blk_mq_alloc_tag_set+0x12e/0x310 [ 8.264339] scsi_add_host_with_dma.cold.9+0x30/0x245 [ 8.279302] pqi_ctrl_init+0xacf/0xc8e [smartpqi] [ 8.294085] ? pqi_pci_probe+0x480/0x4c8 [smartpqi] [ 8.309015] pqi_pci_probe+0x480/0x4c8 [smartpqi] [ 8.323286] local_pci_probe+0x42/0x80 [ 8.337855] work_for_cpu_fn+0x16/0x20 [ 8.351193] process_one_work+0x1a7/0x360 [ 8.364462] ? create_worker+0x1a0/0x1a0 [ 8.379252] worker_thread+0x1ce/0x390 [ 8.392623] ? create_worker+0x1a0/0x1a0 [ 8.406295] kthread+0x10a/0x120 [ 8.418428] ? set_kthread_struct+0x50/0x50 [ 8.431532] ret_from_fork+0x1f/0x40 [ 8.444137] ---[ end trace 1bf0173d39354506 ]--- Fixes: cf15c3e734e8 ("scsi: smartpqi: Add module param to disable managed ints") Tested-by: Yogesh Chandra Pandey Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mahesh Rajashekhara Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20240213162200.1875970-2-don.brace@microchip.com Reviewed-by: Tomas Henzl Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 47d487729635c..e44f6bb25a8ea 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6449,8 +6449,11 @@ static void pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + if (!ctrl_info->disable_managed_interrupts) + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], ctrl_info->pci_dev, 0); + else + return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); } static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device) -- GitLab From 44148c1c82459e3227dca917f888c9bd3d73daae Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 13 Feb 2024 21:59:53 -0800 Subject: [PATCH 0420/2290] scsi: jazz_esp: Only build if SCSI core is builtin [ Upstream commit 9ddf190a7df77b77817f955fdb9c2ae9d1c9c9a3 ] JAZZ_ESP is a bool kconfig symbol that selects SCSI_SPI_ATTRS. When CONFIG_SCSI=m, this results in SCSI_SPI_ATTRS=m while JAZZ_ESP=y, which causes many undefined symbol linker errors. Fix this by only offering to build this driver when CONFIG_SCSI=y. [mkp: JAZZ_ESP is unique in that it does not support being compiled as a module unlike the remaining SPI SCSI HBA drivers] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20240214055953.9612-1-rdunlap@infradead.org Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nicolas Schier Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: linux-scsi@vger.kernel.org Cc: Geert Uytterhoeven Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402112222.Gl0udKyU-lkp@intel.com/ Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 03e71e3d5e5b3..3b990cf2c1954 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1285,7 +1285,7 @@ source "drivers/scsi/arm/Kconfig" config JAZZ_ESP bool "MIPS JAZZ FAS216 SCSI support" - depends on MACH_JAZZ && SCSI + depends on MACH_JAZZ && SCSI=y select SCSI_SPI_ATTRS help This is the driver for the onboard SCSI host adapter of MIPS Magnum -- GitLab From 2d5b4b3376fa146a23917b8577064906d643925f Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:03 +0100 Subject: [PATCH 0421/2290] net: bridge: switchdev: Skip MDB replays of deferred events on offload [ Upstream commit dc489f86257cab5056e747344f17a164f63bff4b ] Before this change, generation of the list of MDB events to replay would race against the creation of new group memberships, either from the IGMP/MLD snooping logic or from user configuration. While new memberships are immediately visible to walkers of br->mdb_list, the notification of their existence to switchdev event subscribers is deferred until a later point in time. So if a replay list was generated during a time that overlapped with such a window, it would also contain a replay of the not-yet-delivered event. The driver would thus receive two copies of what the bridge internally considered to be one single event. On destruction of the bridge, only a single membership deletion event was therefore sent. As a consequence of this, drivers which reference count memberships (at least DSA), would be left with orphan groups in their hardware database when the bridge was destroyed. This is only an issue when replaying additions. While deletion events may still be pending on the deferred queue, they will already have been removed from br->mdb_list, so no duplicates can be generated in that scenario. To a user this meant that old group memberships, from a bridge in which a port was previously attached, could be reanimated (in hardware) when the port joined a new bridge, without the new bridge's knowledge. For example, on an mv88e6xxx system, create a snooping bridge and immediately add a port to it: root@infix-06-0b-00:~$ ip link add dev br0 up type bridge mcast_snooping 1 && \ > ip link set dev x3 up master br0 And then destroy the bridge: root@infix-06-0b-00:~$ ip link del dev br0 root@infix-06-0b-00:~$ mvls atu ADDRESS FID STATE Q F 0 1 2 3 4 5 6 7 8 9 a DEV:0 Marvell 88E6393X 33:33:00:00:00:6a 1 static - - 0 . . . . . . . . . . 33:33:ff:87:e4:3f 1 static - - 0 . . . . . . . . . . ff:ff:ff:ff:ff:ff 1 static - - 0 1 2 3 4 5 6 7 8 9 a root@infix-06-0b-00:~$ The two IPv6 groups remain in the hardware database because the port (x3) is notified of the host's membership twice: once via the original event and once via a replay. Since only a single delete notification is sent, the count remains at 1 when the bridge is destroyed. Then add the same port (or another port belonging to the same hardware domain) to a new bridge, this time with snooping disabled: root@infix-06-0b-00:~$ ip link add dev br1 up type bridge mcast_snooping 0 && \ > ip link set dev x3 up master br1 All multicast, including the two IPv6 groups from br0, should now be flooded, according to the policy of br1. But instead the old memberships are still active in the hardware database, causing the switch to only forward traffic to those groups towards the CPU (port 0). Eliminate the race in two steps: 1. Grab the write-side lock of the MDB while generating the replay list. This prevents new memberships from showing up while we are generating the replay list. But it leaves the scenario in which a deferred event was already generated, but not delivered, before we grabbed the lock. Therefore: 2. Make sure that no deferred version of a replay event is already enqueued to the switchdev deferred queue, before adding it to the replay list, when replaying additions. Fixes: 4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined mdb entries") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/switchdev.h | 3 ++ net/bridge/br_switchdev.c | 74 ++++++++++++++++++++++++--------------- net/switchdev/switchdev.c | 73 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 28 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 7dcdc97c0bc33..a3d8f013adcd5 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -303,6 +303,9 @@ void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct netlink_ext_ack *extack); +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4b3982c368b35..65567d1c8b853 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -593,21 +593,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + struct net_device *dev, + unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { - struct switchdev_obj_port_mdb *mdb; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = id, + .orig_dev = orig_dev, + }, + }; + struct switchdev_obj_port_mdb *pmdb; - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) - return -ENOMEM; + br_switchdev_mdb_populate(&mdb, mp); - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); + if (action == SWITCHDEV_PORT_OBJ_ADD && + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { + /* This event is already in the deferred queue of + * events, so this replay must be elided, lest the + * driver receives duplicate events for it. This can + * only happen when replaying additions, since + * modifications are always immediately visible in + * br->mdb_list, whereas actual event delivery may be + * delayed. + */ + return 0; + } + + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); + if (!pmdb) + return -ENOMEM; + list_add_tail(&pmdb->obj.list, mdb_list); return 0; } @@ -675,51 +694,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + /* br_switchdev_mdb_queue_one() will take care to not queue a + * replay of an event that is already pending in the switchdev + * deferred queue. In order to safely determine that, there + * must be no new deferred MDB notifications enqueued for the + * duration of the MDB scan. Therefore, grab the write-side + * lock to avoid racing with any concurrent IGMP/MLD snooping. */ - rcu_read_lock(); + spin_lock_bh(&br->multicast_lock); - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; + spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8cc42aea19c7e..2e14d4c37e2dc 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,6 +19,35 @@ #include #include +static bool switchdev_obj_eq(const struct switchdev_obj *a, + const struct switchdev_obj *b) +{ + const struct switchdev_obj_port_vlan *va, *vb; + const struct switchdev_obj_port_mdb *ma, *mb; + + if (a->id != b->id || a->orig_dev != b->orig_dev) + return false; + + switch (a->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + va = SWITCHDEV_OBJ_PORT_VLAN(a); + vb = SWITCHDEV_OBJ_PORT_VLAN(b); + return va->flags == vb->flags && + va->vid == vb->vid && + va->changed == vb->changed; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + ma = SWITCHDEV_OBJ_PORT_MDB(a); + mb = SWITCHDEV_OBJ_PORT_MDB(b); + return ma->vid == mb->vid && + ether_addr_equal(ma->addr, mb->addr); + default: + break; + } + + BUG(); +} + static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_act_is_deferred - Is object action pending? + * + * @dev: port device + * @nt: type of action; add or delete + * @obj: object to test + * + * Returns true if a deferred item is pending, which is + * equivalent to the action @nt on an object @obj. + * + * rtnl_lock must be held. + */ +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj) +{ + struct switchdev_deferred_item *dfitem; + bool found = false; + + ASSERT_RTNL(); + + spin_lock_bh(&deferred_lock); + + list_for_each_entry(dfitem, &deferred, list) { + if (dfitem->dev != dev) + continue; + + if ((dfitem->func == switchdev_port_obj_add_deferred && + nt == SWITCHDEV_PORT_OBJ_ADD) || + (dfitem->func == switchdev_port_obj_del_deferred && + nt == SWITCHDEV_PORT_OBJ_DEL)) { + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { + found = true; + break; + } + } + } + + spin_unlock_bh(&deferred_lock); + + return found; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); -- GitLab From 91ac2c79e896b28a4a3a262384689ee6dfeaf083 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:04 +0100 Subject: [PATCH 0422/2290] net: bridge: switchdev: Ensure deferred event delivery on unoffload [ Upstream commit f7a70d650b0b6b0134ccba763d672c8439d9f09b ] When unoffloading a device, it is important to ensure that all relevant deferred events are delivered to it before it disassociates itself from the bridge. Before this change, this was true for the normal case when a device maps 1:1 to a net_bridge_port, i.e. br0 / swp0 When swp0 leaves br0, the call to switchdev_deferred_process() in del_nbp() makes sure to process any outstanding events while the device is still associated with the bridge. In the case when the association is indirect though, i.e. when the device is attached to the bridge via an intermediate device, like a LAG... br0 / lag0 / swp0 ...then detaching swp0 from lag0 does not cause any net_bridge_port to be deleted, so there was no guarantee that all events had been processed before the device disassociated itself from the bridge. Fix this by always synchronously processing all deferred events before signaling completion of unoffloading back to the driver. Fixes: 4e51bf44a03a ("net: bridge: move the switchdev object replay helpers to "push" mode") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_switchdev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 65567d1c8b853..b61ef2dff7a4b 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -798,6 +798,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); + + /* Make sure that the device leaving this bridge has seen all + * relevant events before it is disassociated. In the normal + * case, when the device is directly attached to the bridge, + * this is covered by del_nbp(). If the association was indirect + * however, e.g. via a team or bond, and the device is leaving + * that intermediate device, then the bridge port remains in + * place. + */ + switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a -- GitLab From 729bc77af438a6e67914c97f6f3d3af8f72c0131 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 14 Feb 2024 11:13:08 -0800 Subject: [PATCH 0423/2290] dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished(). [ Upstream commit 66b60b0c8c4a163b022a9f0ad6769b0fd3dc662f ] syzkaller reported a warning [0] in inet_csk_destroy_sock() with no repro. WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); However, the syzkaller's log hinted that connect() failed just before the warning due to FAULT_INJECTION. [1] When connect() is called for an unbound socket, we search for an available ephemeral port. If a bhash bucket exists for the port, we call __inet_check_established() or __inet6_check_established() to check if the bucket is reusable. If reusable, we add the socket into ehash and set inet_sk(sk)->inet_num. Later, we look up the corresponding bhash2 bucket and try to allocate it if it does not exist. Although it rarely occurs in real use, if the allocation fails, we must revert the changes by check_established(). Otherwise, an unconnected socket could illegally occupy an ehash entry. Note that we do not put tw back into ehash because sk might have already responded to a packet for tw and it would be better to free tw earlier under such memory presure. [0]: WARNING: CPU: 0 PID: 350830 at net/ipv4/inet_connection_sock.c:1193 inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Modules linked in: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Code: 41 5c 41 5d 41 5e e9 2d 4a 3d fd e8 28 4a 3d fd 48 89 ef e8 f0 cd 7d ff 5b 5d 41 5c 41 5d 41 5e e9 13 4a 3d fd e8 0e 4a 3d fd <0f> 0b e9 61 fe ff ff e8 02 4a 3d fd 4c 89 e7 be 03 00 00 00 e8 05 RSP: 0018:ffffc9000b21fd38 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000009e78 RCX: ffffffff840bae40 RDX: ffff88806e46c600 RSI: ffffffff840bb012 RDI: ffff88811755cca8 RBP: ffff88811755c880 R08: 0000000000000003 R09: 0000000000000000 R10: 0000000000009e78 R11: 0000000000000000 R12: ffff88811755c8e0 R13: ffff88811755c892 R14: ffff88811755c918 R15: 0000000000000000 FS: 00007f03e5243800(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32f21000 CR3: 0000000112ffe001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: ? inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) dccp_close (net/dccp/proto.c:1078) inet_release (net/ipv4/af_inet.c:434) __sock_release (net/socket.c:660) sock_close (net/socket.c:1423) __fput (fs/file_table.c:377) __fput_sync (fs/file_table.c:462) __x64_sys_close (fs/open.c:1557 fs/open.c:1539 fs/open.c:1539) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e53852bb Code: 03 00 00 00 0f 05 48 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 43 c9 f5 ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 c9 f5 ff 8b 44 RSP: 002b:00000000005dfba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f03e53852bb RDX: 0000000000000002 RSI: 0000000000000002 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000000167c R10: 0000000008a79680 R11: 0000000000000293 R12: 00007f03e4e43000 R13: 00007f03e4e43170 R14: 00007f03e4e43178 R15: 00007f03e4e43170 [1]: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 PID: 350833 Comm: syz-executor.1 Not tainted 6.7.0-12272-g2121c43f88f5 #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) should_fail_ex (lib/fault-inject.c:52 lib/fault-inject.c:153) should_failslab (mm/slub.c:3748) kmem_cache_alloc (mm/slub.c:3763 mm/slub.c:3842 mm/slub.c:3867) inet_bind2_bucket_create (net/ipv4/inet_hashtables.c:135) __inet_hash_connect (net/ipv4/inet_hashtables.c:1100) dccp_v4_connect (net/dccp/ipv4.c:116) __inet_stream_connect (net/ipv4/af_inet.c:676) inet_stream_connect (net/ipv4/af_inet.c:747) __sys_connect_file (net/socket.c:2048 (discriminator 2)) __sys_connect (net/socket.c:2065) __x64_sys_connect (net/socket.c:2072) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e5284e5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007f03e4641cc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f03e5284e5d RDX: 0000000000000010 RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 000000000000000b R14: 00007f03e52e5530 R15: 0000000000000000 Reported-by: syzkaller Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index f2ed2aed08ab3..56776e1b1de52 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1111,10 +1111,33 @@ ok: return 0; error: + if (sk_hashed(sk)) { + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); + + sock_prot_inuse_add(net, sk->sk_prot, -1); + + spin_lock(lock); + sk_nulls_del_node_init_rcu(sk); + spin_unlock(lock); + + sk->sk_hash = 0; + inet_sk(sk)->inet_sport = 0; + inet_sk(sk)->inet_num = 0; + + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + } + spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - spin_unlock_bh(&head->lock); + spin_unlock(&head->lock); + + if (tw) + inet_twsk_deschedule_put(tw); + + local_bh_enable(); + return -ENOMEM; } -- GitLab From b06a3b1cbdfbfec85af1efda3c55cc9d8a492b08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:57:37 +0100 Subject: [PATCH 0424/2290] nouveau: fix function cast warnings [ Upstream commit 0affdba22aca5573f9d989bcb1d71d32a6a03efe ] clang-16 warns about casting between incompatible function types: drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c:161:10: error: cast from 'void (*)(const struct firmware *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 161 | .fini = (void(*)(void *))release_firmware, This one was done to use the generic shadow_fw_release() function as a callback for struct nvbios_source. Change it to use the same prototype as the other five instances, with a trivial helper function that actually calls release_firmware. Fixes: 70c0f263cc2e ("drm/nouveau/bios: pull in basic vbios subdev, more to come later") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240213095753.455062-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 19188683c8fca..8c2bf1c16f2a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name) return (void *)fw; } +static void +shadow_fw_release(void *fw) +{ + release_firmware(fw); +} + static const struct nvbios_source shadow_fw = { .name = "firmware", .init = shadow_fw_init, - .fini = (void(*)(void *))release_firmware, + .fini = shadow_fw_release, .read = shadow_fw_read, .rw = false, }; -- GitLab From ca4a1c00beffe9c1f090ea0a5912e00975146578 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:50 -0800 Subject: [PATCH 0425/2290] x86/numa: Fix the address overlap check in numa_fill_memblks() [ Upstream commit 9b99c17f7510bed2adbe17751fb8abddba5620bc ] numa_fill_memblks() fills in the gaps in numa_meminfo memblks over a physical address range. To do so, it first creates a list of existing memblks that overlap that address range. The issue is that it is off by one when comparing to the end of the address range, so memblks that do not overlap are selected. The impact of selecting a memblk that does not actually overlap is that an existing memblk may be filled when the expected action is to do nothing and return NUMA_NO_MEMBLK to the caller. The caller can then add a new NUMA node and memblk. Replace the broken open-coded search for address overlap with the memblock helper memblock_addrs_overlap(). Update the kernel doc and in code comments. Suggested by: "Huang, Ying" Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Mike Rapoport (IBM) Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/10a3e6109c34c21a8dd4c513cf63df63481a2b07.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- arch/x86/mm/numa.c | 19 +++++++------------ include/linux/memblock.h | 2 ++ mm/memblock.c | 5 +++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index aa39d678fe81d..e60c61b8bbc61 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -971,14 +971,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; * @start: address to begin fill * @end: address to end fill * - * Find and extend numa_meminfo memblks to cover the @start-@end - * physical address range, such that the first memblk includes - * @start, the last memblk includes @end, and any gaps in between - * are filled. + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end * * RETURNS: * 0 : Success - * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end */ int __init numa_fill_memblks(u64 start, u64 end) @@ -990,17 +988,14 @@ int __init numa_fill_memblks(u64 start, u64 end) /* * Create a list of pointers to numa_meminfo memblks that - * overlap start, end. Exclude (start == bi->end) since - * end addresses in both a CFMWS range and a memblk range - * are exclusive. - * - * This list of pointers is used to make in-place changes - * that fill out the numa_meminfo memblks. + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. */ for (int i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - if (start < bi->end && end >= bi->start) { + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { blk[count] = &mi->blk[i]; count++; } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 50ad19662a322..6790f08066b72 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,6 +118,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index 511d4783dcf1d..516efec80851a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -175,8 +175,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) +unsigned long __init_memblock +memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, + phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -- GitLab From 25bd33b87f8a337c7dbbb2887c82120ef99851e7 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:51 -0800 Subject: [PATCH 0426/2290] x86/numa: Fix the sort compare func used in numa_fill_memblks() [ Upstream commit b626070ffc14acca5b87a2aa5f581db98617584c ] The compare function used to sort memblks into starting address order fails when the result of its u64 address subtraction gets truncated to an int upon return. The impact of the bad sort is that memblks will be filled out incorrectly. Depending on the set of memblks, a user may see no errors at all but still have a bad fill, or see messages reporting a node overlap that leads to numa init failure: [] node 0 [mem: ] overlaps with node 1 [mem: ] [] No NUMA configuration found Replace with a comparison that can only result in: 1, 0, -1. Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/99dcb3ae87e04995e9f293f6158dc8fa0749a487.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- arch/x86/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index e60c61b8bbc61..dae5c952735c7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -961,7 +961,7 @@ static int __init cmp_memblk(const void *a, const void *b) const struct numa_memblk *ma = *(const struct numa_memblk **)a; const struct numa_memblk *mb = *(const struct numa_memblk **)b; - return ma->start - mb->start; + return (ma->start > mb->start) - (ma->start < mb->start); } static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; -- GitLab From 8e29f988ad32e87fbfe263d7d22d24c6a9eaba05 Mon Sep 17 00:00:00 2001 From: Pavel Sakharov Date: Wed, 14 Feb 2024 12:27:17 +0300 Subject: [PATCH 0427/2290] net: stmmac: Fix incorrect dereference in interrupt handlers [ Upstream commit 97dde84026339e4b4af9a6301f825d1828d7874b ] If 'dev' or 'data' is NULL, the 'priv' variable has an incorrect address when dereferencing calling netdev_err(). Since we get as 'dev_id' or 'data' what was passed as the 'dev' argument to request_irq() during interrupt initialization (that is, the net_device and rx/tx queue pointers initialized at the time of the call) and since there are usually no checks for the 'dev_id' argument in such handlers in other drivers, remove these checks from the handlers in stmmac driver. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8532f613bc78 ("net: stmmac: introduce MSI Interrupt routines for mac, safety, RX & TX") Signed-off-by: Pavel Sakharov Reviewed-by: Serge Semin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 66178ce6d000e..91b2aa81914ba 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5823,11 +5823,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5843,11 +5838,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5869,11 +5859,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5900,11 +5885,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; -- GitLab From b43a4fb42fefa69b00a4bc697c59b8107d9293cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2024 17:21:06 +0000 Subject: [PATCH 0428/2290] ipv4: properly combine dev_base_seq and ipv4.dev_addr_genid [ Upstream commit 081a0e3b0d4c061419d3f4679dec9f68725b17e4 ] net->dev_base_seq and ipv4.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 0465277f6b3f ("ipv4: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/devinet.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 35d6e74be8406..bb0d1252cad86 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1804,6 +1804,21 @@ done: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; @@ -1855,8 +1870,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &tgt_net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ - tgt_net->dev_base_seq; + cb->seq = inet_base_seq(tgt_net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -2257,8 +2271,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; -- GitLab From e5703735e57a3f1171b9b708f58850af55ef9602 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2024 17:21:07 +0000 Subject: [PATCH 0429/2290] ipv6: properly combine dev_base_seq and ipv6.dev_addr_genid [ Upstream commit e898e4cd1aab271ca414f9ac6e08e4c761f6913c ] net->dev_base_seq and ipv6.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 63998ac24f83 ("ipv6: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Signed-off-by: Eric Dumazet Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b8dc20fe7a4e2..46527b5cc8f0c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -706,6 +706,22 @@ errout: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -739,8 +755,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -5326,7 +5341,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; -- GitLab From 9a581b17b7227bfba3f109f0452d61582b3b662e Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Fri, 16 Feb 2024 23:44:57 +0530 Subject: [PATCH 0430/2290] ata: ahci_ceva: fix error handling for Xilinx GT PHY support [ Upstream commit 26c8404e162b43dddcb037ba2d0cb58c0ed60aab ] Platform clock and phy error resources are not cleaned up in Xilinx GT PHY error path. To fix introduce the function ceva_ahci_platform_enable_resources() which is a customized version of ahci_platform_enable_resources() and inline with SATA IP programming sequence it does: - Assert SATA reset - Program PS GTR phy - Bring SATA by de-asserting the reset - Wait for GT lane PLL to be locked ceva_ahci_platform_enable_resources() is also used in the resume path as the same SATA programming sequence (as in probe) should be followed. Also cleanup the mixed usage of ahci_platform_enable_resources() and custom implementation in the probe function as both are not required. Fixes: 9a9d3abe24bb ("ata: ahci: ceva: Update the driver to support xilinx GT phy") Signed-off-by: Radhey Shyam Pandey Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci_ceva.c | 125 +++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index cb24ecf36fafe..50e07ea60e45c 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -88,7 +88,6 @@ struct ceva_ahci_priv { u32 axicc; bool is_cci_enabled; int flags; - struct reset_control *rst; }; static unsigned int ceva_ahci_read_id(struct ata_device *dev, @@ -189,6 +188,60 @@ static struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), }; +static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc, i; + + rc = ahci_platform_enable_regulators(hpriv); + if (rc) + return rc; + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + /* Assert the controller reset */ + rc = ahci_platform_assert_rsts(hpriv); + if (rc) + goto disable_clks; + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_init(hpriv->phys[i]); + if (rc) + goto disable_rsts; + } + + /* De-assert the controller reset */ + ahci_platform_deassert_rsts(hpriv); + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_power_on(hpriv->phys[i]); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + } + + return 0; + +disable_rsts: + ahci_platform_deassert_rsts(hpriv); + +disable_phys: + while (--i >= 0) { + phy_power_off(hpriv->phys[i]); + phy_exit(hpriv->phys[i]); + } + +disable_clks: + ahci_platform_disable_clks(hpriv); + +disable_regulator: + ahci_platform_disable_regulators(hpriv); + + return rc; +} + static int ceva_ahci_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev) return -ENOMEM; cevapriv->ahci_pdev = pdev; - - cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, - NULL); - if (IS_ERR(cevapriv->rst)) - dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst), - "failed to get reset\n"); - hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); - if (!cevapriv->rst) { - rc = ahci_platform_enable_resources(hpriv); - if (rc) - return rc; - } else { - int i; + hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev, + NULL); + if (IS_ERR(hpriv->rsts)) + return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts), + "failed to get reset\n"); - rc = ahci_platform_enable_clks(hpriv); - if (rc) - return rc; - /* Assert the controller reset */ - reset_control_assert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_init(hpriv->phys[i]); - if (rc) - return rc; - } - - /* De-assert the controller reset */ - reset_control_deassert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_power_on(hpriv->phys[i]); - if (rc) { - phy_exit(hpriv->phys[i]); - return rc; - } - } - } + rc = ceva_ahci_platform_enable_resources(hpriv); + if (rc) + return rc; if (of_property_read_bool(np, "ceva,broken-gen2")) cevapriv->flags = CEVA_FLAG_BROKEN_GEN2; @@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev) if (of_property_read_u8_array(np, "ceva,p0-cominit-params", (u8 *)&cevapriv->pp2c[0], 4) < 0) { dev_warn(dev, "ceva,p0-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-cominit-params", (u8 *)&cevapriv->pp2c[1], 4) < 0) { dev_warn(dev, "ceva,p1-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read OOB timing value for COMWAKE from device-tree*/ if (of_property_read_u8_array(np, "ceva,p0-comwake-params", (u8 *)&cevapriv->pp3c[0], 4) < 0) { dev_warn(dev, "ceva,p0-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-comwake-params", (u8 *)&cevapriv->pp3c[1], 4) < 0) { dev_warn(dev, "ceva,p1-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy BURST timing value from device-tree */ if (of_property_read_u8_array(np, "ceva,p0-burst-params", (u8 *)&cevapriv->pp4c[0], 4) < 0) { dev_warn(dev, "ceva,p0-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-burst-params", (u8 *)&cevapriv->pp4c[1], 4) < 0) { dev_warn(dev, "ceva,p1-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy RETRY interval timing value from device-tree */ if (of_property_read_u16_array(np, "ceva,p0-retry-params", (u16 *)&cevapriv->pp5c[0], 2) < 0) { dev_warn(dev, "ceva,p0-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u16_array(np, "ceva,p1-retry-params", (u16 *)&cevapriv->pp5c[1], 2) < 0) { dev_warn(dev, "ceva,p1-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* @@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - rc = ahci_platform_enable_resources(hpriv); + rc = ceva_ahci_platform_enable_resources(hpriv); if (rc) return rc; -- GitLab From addf5e297e6cbf5341f9c07720693ca9ba0057b5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Feb 2024 13:12:17 -0800 Subject: [PATCH 0431/2290] bpf: Fix racing between bpf_timer_cancel_and_free and bpf_timer_cancel [ Upstream commit 0281b919e175bb9c3128bd3872ac2903e9436e3f ] The following race is possible between bpf_timer_cancel_and_free and bpf_timer_cancel. It will lead a UAF on the timer->timer. bpf_timer_cancel(); spin_lock(); t = timer->time; spin_unlock(); bpf_timer_cancel_and_free(); spin_lock(); t = timer->timer; timer->timer = NULL; spin_unlock(); hrtimer_cancel(&t->timer); kfree(t); /* UAF on t */ hrtimer_cancel(&t->timer); In bpf_timer_cancel_and_free, this patch frees the timer->timer after a rcu grace period. This requires a rcu_head addition to the "struct bpf_hrtimer". Another kfree(t) happens in bpf_timer_init, this does not need a kfree_rcu because it is still under the spin_lock and timer->timer has not been visible by others yet. In bpf_timer_cancel, rcu_read_lock() is added because this helper can be used in a non rcu critical section context (e.g. from a sleepable bpf prog). Other timer->timer usages in helpers.c have been audited, bpf_timer_cancel() is the only place where timer->timer is used outside of the spin_lock. Another solution considered is to mark a t->flag in bpf_timer_cancel and clear it after hrtimer_cancel() is done. In bpf_timer_cancel_and_free, it busy waits for the flag to be cleared before kfree(t). This patch goes with a straight forward solution and frees timer->timer after a rcu grace period. Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") Suggested-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240215211218.990808-1-martin.lau@linux.dev Signed-off-by: Sasha Levin --- kernel/bpf/helpers.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6a61a98d602cd..83f8f67e933df 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1091,6 +1091,7 @@ struct bpf_hrtimer { struct bpf_prog *prog; void __rcu *callback_fn; void *value; + struct rcu_head rcu; }; /* the actual struct hidden inside uapi struct bpf_timer */ @@ -1312,6 +1313,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) if (in_nmi()) return -EOPNOTSUPP; + rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { @@ -1333,6 +1335,7 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + rcu_read_unlock(); return ret; } @@ -1387,7 +1390,7 @@ out: */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); - kfree(t); + kfree_rcu(t, rcu); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) -- GitLab From e8530b170e464017203e3b8c6c49af6e916aece1 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Mon, 19 Feb 2024 14:39:03 +0000 Subject: [PATCH 0432/2290] afs: Increase buffer size in afs_update_volume_status() [ Upstream commit 6ea38e2aeb72349cad50e38899b0ba6fbcb2af3d ] The max length of volume->vid value is 20 characters. So increase idbuf[] size up to 24 to avoid overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. [DH: Actually, it's 20 + NUL, so increase it to 24 and use snprintf()] Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: Daniil Dulov Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240211150442.3416-1-d.dulov@aladdin.ru/ # v1 Link: https://lore.kernel.org/r/20240212083347.10742-1-d.dulov@aladdin.ru/ # v2 Link: https://lore.kernel.org/r/20240219143906.138346-3-dhowells@redhat.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/afs/volume.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 1c9144e3e83ac..a146d70efa650 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -341,7 +341,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { struct afs_server_list *new, *old, *discard; struct afs_vldb_entry *vldb; - char idbuf[16]; + char idbuf[24]; int ret, idsz; _enter(""); @@ -349,7 +349,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%llu", volume->vid); + idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { -- GitLab From 8391b9b651cfdf80ab0f1dc4a489f9d67386e197 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Thu, 15 Feb 2024 23:27:17 +0300 Subject: [PATCH 0433/2290] ipv6: sr: fix possible use-after-free and null-ptr-deref [ Upstream commit 5559cea2d5aa3018a5f00dd2aca3427ba09b386b ] The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Fixes: 915d7e5e5930 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240215202717.29815-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/seg6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec9ff..35508abd76f43 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } -- GitLab From fd84a5fae03c0a6ca66d6d7eb9f473b2c7957c21 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Oct 2022 02:56:03 -0700 Subject: [PATCH 0434/2290] net: dev: Convert sa_data to flexible array in struct sockaddr [ Upstream commit b5f0de6df6dce8d641ef58ef7012f3304dffb9a1 ] One of the worst offenders of "fake flexible arrays" is struct sockaddr, as it is the classic example of why GCC and Clang have been traditionally forced to treat all trailing arrays as fake flexible arrays: in the distant misty past, sa_data became too small, and code started just treating it as a flexible array, even though it was fixed-size. The special case by the compiler is specifically that sizeof(sa->sa_data) and FORTIFY_SOURCE (which uses __builtin_object_size(sa->sa_data, 1)) do not agree (14 and -1 respectively), which makes FORTIFY_SOURCE treat it as a flexible array. However, the coming -fstrict-flex-arrays compiler flag will remove these special cases so that FORTIFY_SOURCE can gain coverage over all the trailing arrays in the kernel that are _not_ supposed to be treated as a flexible array. To deal with this change, convert sa_data to a true flexible array. To keep the structure size the same, move sa_data into a union with a newly introduced sa_data_min with the original size. The result is that FORTIFY_SOURCE can continue to have no idea how large sa_data may actually be, but anything using sizeof(sa->sa_data) must switch to sizeof(sa->sa_data_min). Cc: Jens Axboe Cc: Pavel Begunkov Cc: David Ahern Cc: Dylan Yudaken Cc: Yajun Deng Cc: Petr Machata Cc: Hangbin Liu Cc: Leon Romanovsky Cc: syzbot Cc: Willem de Bruijn Cc: Pablo Neira Ayuso Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221018095503.never.671-kees@kernel.org Signed-off-by: Jakub Kicinski Stable-dep-of: a7d6027790ac ("arp: Prevent overflow in arp_req_get().") Signed-off-by: Sasha Levin --- include/linux/socket.h | 5 ++++- net/core/dev.c | 2 +- net/core/dev_ioctl.c | 2 +- net/packet/af_packet.c | 10 +++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index b3c58042bd254..d79efd0268809 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - char sa_data[14]; /* 14 bytes of protocol address */ + union { + char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ + DECLARE_FLEX_ARRAY(char, sa_data); + }; }; struct linger { diff --git a/net/core/dev.c b/net/core/dev.c index 1ba3662faf0aa..60619fe8af5fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8861,7 +8861,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data); + size_t size = sizeof(sa->sa_data_min); struct net_device *dev; int ret = 0; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7674bb9f3076c..5cdbfbf9a7dcf 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -342,7 +342,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data), + min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 51882f07ef70c..c3117350f5fbb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3284,7 +3284,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality @@ -3295,8 +3295,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, 0); } @@ -3566,11 +3566,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); -- GitLab From 38c83c2488dc3726aad855c05ce91d28e968b9f3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 23:06:32 +0000 Subject: [PATCH 0435/2290] arm64/sme: Restore SME registers on exit from suspend [ Upstream commit 9533864816fb4a6207c63b7a98396351ce1a9fae ] The fields in SMCR_EL1 and SMPRI_EL1 reset to an architecturally UNKNOWN value. Since we do not otherwise manage the traps configured in this register at runtime we need to reconfigure them after a suspend in case nothing else was kind enough to preserve them for us. The vector length will be restored as part of restoring the SME state for the next SME using task. Fixes: a1f4ccd25cc2 ("arm64/sme: Provide Kconfig for SME") Reported-by: Jackson Cooper-Driver Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240213-arm64-sme-resume-v3-1-17e05e493471@kernel.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/kernel/fpsimd.c | 14 ++++++++++++++ arch/arm64/kernel/suspend.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index d720b6f7e5f9c..da18413712c04 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -343,6 +343,7 @@ extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +extern void sme_suspend_exit(void); /* * Return how many bytes of memory are required to store the full SME @@ -372,6 +373,7 @@ static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline void sme_suspend_exit(void) { } static inline size_t za_state_size(struct task_struct const *task) { diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8c226d79abdfc..59b5a16bab5d6 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1347,6 +1347,20 @@ void __init sme_setup(void) get_sme_default_vl()); } +void sme_suspend_exit(void) +{ + u64 smcr = 0; + + if (!system_supports_sme()) + return; + + if (system_supports_fa64()) + smcr |= SMCR_ELx_FA64; + + write_sysreg_s(smcr, SYS_SMCR_EL1); + write_sysreg_s(0, SYS_SMPRI_EL1); +} + #endif /* CONFIG_ARM64_SME */ static void sve_init_regs(void) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 8b02d310838f9..064d996cc55b2 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,8 @@ void notrace __cpu_suspend_exit(void) */ spectre_v4_enable_mitigation(NULL); + sme_suspend_exit(); + /* Restore additional feature-specific configuration */ ptrauth_suspend_exit(); } -- GitLab From 6216509a2e11d6600aa24b92ba2735f820be4eae Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Feb 2024 20:23:11 -0600 Subject: [PATCH 0436/2290] platform/x86: thinkpad_acpi: Only update profile if successfully converted [ Upstream commit 427c70dec738318b7f71e1b9d829ff0e9771d493 ] Randomly a Lenovo Z13 will trigger a kernel warning traceback from this condition: ``` if (WARN_ON((profile < 0) || (profile >= ARRAY_SIZE(profile_names)))) ``` This happens because thinkpad-acpi always assumes that convert_dytc_to_profile() successfully updated the profile. On the contrary a condition can occur that when dytc_profile_refresh() is called the profile doesn't get updated as there is a -EOPNOTSUPP branch. Catch this situation and avoid updating the profile. Also log this into dynamic debugging in case any other modes should be added in the future. Fixes: c3bfcd4c6762 ("platform/x86: thinkpad_acpi: Add platform profile support") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240217022311.113879-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 6edd2e294750e..c2fb19af10705 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10511,6 +10511,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, return 0; default: /* Unknown function */ + pr_debug("unknown function 0x%x\n", funcmode); return -EOPNOTSUPP; } return 0; @@ -10696,8 +10697,8 @@ static void dytc_profile_refresh(void) return; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(funcmode, perfmode, &profile); - if (profile != dytc_current_profile) { + err = convert_dytc_to_profile(funcmode, perfmode, &profile); + if (!err && profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); } -- GitLab From 18580e48e624f04bcd1ed854ccc2d8f2e2225e02 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Feb 2024 18:25:14 +0530 Subject: [PATCH 0437/2290] octeontx2-af: Consider the action set by PF [ Upstream commit 3b1ae9b71c2a97f848b00fb085a2bd29bddbe8d9 ] AF reserves MCAM entries for each PF, VF present in the system and populates the entry with DMAC and action with default RSS so that basic packet I/O works. Since PF/VF is not aware of the RSS action installed by AF, AF only fixup the actions of the rules installed by PF/VF with corresponding default RSS action. This worked well for rules installed by PF/VF for features like RX VLAN offload and DMAC filters but rules involving action like drop/forward to queue are also getting modified by AF. Hence fix it by setting the default RSS action only if requested by PF/VF. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 3784347b6fd88..55639c133dd02 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -437,6 +437,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, return; } + /* AF modifies given action iff PF/VF has requested for it */ + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) + return; + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); -- GitLab From 11277d18926717a3c00ef745714b16f82b8f8504 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 16 Feb 2024 20:48:14 -0400 Subject: [PATCH 0438/2290] s390: use the correct count for __iowrite64_copy() [ Upstream commit 723a2cc8d69d4342b47dfddbfe6c19f1b135f09b ] The signature for __iowrite64_copy() requires the number of 64 bit quantities, not bytes. Multiple by 8 to get to a byte length before invoking zpci_memcpy_toio() Fixes: 87bc359b9822 ("s390/pci: speed up __iowrite64_copy by using pci store block insn") Acked-by: Niklas Schnelle Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-9223d11a7662+1d7785-s390_iowrite64_jgg@nvidia.com Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 2c99f9552b2f5..394c69fda399e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -241,7 +241,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, /* combine single writes by using store-block insn */ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { - zpci_memcpy_toio(to, from, count); + zpci_memcpy_toio(to, from, count * 8); } static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot) -- GitLab From 4588b13abcbd561ec67f5b3c1cb2eff690990a54 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Mon, 19 Feb 2024 00:09:33 +0900 Subject: [PATCH 0439/2290] bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready() [ Upstream commit 4cd12c6065dfcdeba10f49949bffcf383b3952d8 ] syzbot reported the following NULL pointer dereference issue [1]: BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] RIP: 0010:0x0 [...] Call Trace: sk_psock_verdict_data_ready+0x232/0x340 net/core/skmsg.c:1230 unix_stream_sendmsg+0x9b4/0x1230 net/unix/af_unix.c:2293 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 If sk_psock_verdict_data_ready() and sk_psock_stop_verdict() are called concurrently, psock->saved_data_ready can be NULL, causing the above issue. This patch fixes this issue by calling the appropriate data ready function using the sk_psock_data_ready() helper and protecting it from concurrency with sk->sk_callback_lock. Fixes: 6df7f764cd3c ("bpf, sockmap: Wake up polling after data copy") Reported-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Signed-off-by: Daniel Borkmann Tested-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Acked-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=fd7b34375c1c8ce29c93 [1] Link: https://lore.kernel.org/bpf/20240218150933.6004-1-syoshida@redhat.com Signed-off-by: Sasha Levin --- net/core/skmsg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 3818035ea0021..39643f78cf782 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1217,8 +1217,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } -- GitLab From ca89b4f5034d5c775956cd3830e9e7930d7053ff Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:29 +0100 Subject: [PATCH 0440/2290] tls: break out of main loop when PEEK gets a non-data record [ Upstream commit 10f41d0710fc81b7af93fa6106678d57b1ff24a7 ] PEEK needs to leave decrypted records on the rx_list so that we can receive them later on, so it jumps back into the async code that queues the skb. Unfortunately that makes us skip the TLS_RECORD_TYPE_DATA check at the bottom of the main loop, so if two records of the same (non-DATA) type are queued, we end up merging them. Add the same record type check, and make it unlikely to not penalize the async fastpath. Async decrypt only applies to data record, so this check is only needed for PEEK. process_rx_list also has similar issues. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/3df2eef4fdae720c55e69472b5bea668772b45a2.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c8cbdd02a784e..cd86c271c1348 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2137,6 +2137,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } -- GitLab From 6756168add1c6c3ef1c32c335bb843a5d1f99a75 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:30 +0100 Subject: [PATCH 0441/2290] tls: stop recv() if initial process_rx_list gave us non-DATA [ Upstream commit fdfbaec5923d9359698cbb286bc0deadbb717504 ] If we have a non-DATA record on the rx_list and another record of the same type still on the queue, we will end up merging them: - process_rx_list copies the non-DATA record - we start the loop and process the first available record since it's of the same type - we break out of the loop since the record was not DATA Just check the record type and jump to the end in case process_rx_list did some work. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/bd31449e43bd4b6ff546f5c51cf958c31c511deb.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index cd86c271c1348..20d2877bf22ad 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2044,7 +2044,7 @@ int tls_sw_recvmsg(struct sock *sk, goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); -- GitLab From bdaf6bbfc1f231bacc22d222d830b9dc817d7d23 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:31 +0100 Subject: [PATCH 0442/2290] tls: don't skip over different type records from the rx_list [ Upstream commit ec823bf3a479d42c589dc0f28ef4951c49cd2d2a ] If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA and do a recv(PEEK), the rx_list will contain the first two records. The next large recv will walk through the rx_list and copy data from record 1, then stop because record 2 is a different type. Since we haven't filled up our buffer, we will process the next available record. It's also DATA, so we can merge it with the current read. We shouldn't do that, since there was a record in between that we ignored. Add a flag to let process_rx_list inform tls_sw_recvmsg that it had more data available. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/f00c0c0afa080c60f016df1471158c1caf983c34.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 20d2877bf22ad..93e1bfa72d791 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1845,7 +1845,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1858,7 +1859,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1876,12 +1877,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1917,6 +1918,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -2020,6 +2025,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -2039,12 +2045,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2203,10 +2209,10 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + decrypted, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); + async_copy_bytes, is_peek, NULL); } copied += decrypted; -- GitLab From 0c9302a6da262e6ab6a6c1d30f04a6130ed97376 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 19 Feb 2024 16:58:04 +0100 Subject: [PATCH 0443/2290] netfilter: nf_tables: set dormant flag on hook register failure [ Upstream commit bccebf64701735533c8db37773eeacc6566cc8ec ] We need to set the dormant flag again if we fail to register the hooks. During memory pressure hook registration can fail and we end up with a table marked as active but no registered hooks. On table/base chain deletion, nf_tables will attempt to unregister the hook again which yields a warn splat from the nftables core. Reported-and-tested-by: syzbot+de4025c006ec68ac56fc@syzkaller.appspotmail.com Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 49acb89ba9c56..f1a74b0949999 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1245,6 +1245,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } -- GitLab From 9c5662e95a8dcc232c3ef4deb21033badcd260f6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 11 May 2023 07:35:33 +0200 Subject: [PATCH 0444/2290] netfilter: flowtable: simplify route logic [ Upstream commit fa502c86566680ac62bc28ec883a069bf7a2aa5e ] Grab reference to dst from skbuff earlier to simplify route caching. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Stable-dep-of: 9e0f0430389b ("netfilter: nft_flow_offload: reset dst in route object after setting up flow") Signed-off-by: Sasha Levin --- include/net/netfilter/nf_flow_table.h | 4 ++-- net/netfilter/nf_flow_table_core.c | 24 +++--------------------- net/netfilter/nft_flow_offload.c | 12 ++++++++---- 3 files changed, 13 insertions(+), 27 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index dde4dd9c4012c..692d5955911c7 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -274,8 +274,8 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, flow_table->type->put(flow_table); } -int flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); +void flow_offload_route_init(struct flow_offload *flow, + const struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index c1d99cb370b44..78e4aba52b22a 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -125,9 +125,6 @@ static int flow_offload_fill_route(struct flow_offload *flow, break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: - if (!dst_hold_safe(route->tuple[dir].dst)) - return -1; - flow_tuple->dst_cache = dst; flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); break; @@ -148,27 +145,12 @@ static void nft_flow_dst_release(struct flow_offload *flow, dst_release(flow->tuplehash[dir].tuple.dst_cache); } -int flow_offload_route_init(struct flow_offload *flow, +void flow_offload_route_init(struct flow_offload *flow, const struct nf_flow_route *route) { - int err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); - if (err < 0) - return err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); - if (err < 0) - goto err_route_reply; - + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); flow->type = NF_FLOW_OFFLOAD_ROUTE; - - return 0; - -err_route_reply: - nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); - - return err; } EXPORT_SYMBOL_GPL(flow_offload_route_init); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 3d9f6dda5aeb2..7a8707632a815 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, break; } + if (!dst_hold_safe(this_dst)) + return -ENOENT; + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); - if (!other_dst) + if (!other_dst) { + dst_release(this_dst); return -ENOENT; + } nft_default_forward_path(route, this_dst, dir); nft_default_forward_path(route, other_dst, !dir); @@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; - if (flow_offload_route_init(flow, &route) < 0) - goto err_flow_add; + flow_offload_route_init(flow, &route); if (tcph) { ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; @@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (ret < 0) goto err_flow_add; - dst_release(route.tuple[!dir].dst); return; err_flow_add: flow_offload_free(flow); err_flow_alloc: + dst_release(route.tuple[dir].dst); dst_release(route.tuple[!dir].dst); err_flow_route: clear_bit(IPS_OFFLOAD_BIT, &ct->status); -- GitLab From 012df10717da02367aaf92c65f9c89db206c15f4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 21 Feb 2024 12:32:58 +0100 Subject: [PATCH 0445/2290] netfilter: nft_flow_offload: reset dst in route object after setting up flow [ Upstream commit 9e0f0430389be7696396c62f037be4bf72cf93e3 ] dst is transferred to the flow object, route object does not own it anymore. Reset dst in route object, otherwise if flow_offload_add() fails, error path releases dst twice, leading to a refcount underflow. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_flow_table.h | 2 +- net/netfilter/nf_flow_table_core.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 692d5955911c7..4a767b3d20b9d 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -275,7 +275,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); + struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 78e4aba52b22a..2036c7a27075b 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) return 0; } +static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, + enum flow_offload_tuple_dir dir) +{ + struct dst_entry *dst = route->tuple[dir].dst; + + route->tuple[dir].dst = NULL; + + return dst; +} + static int flow_offload_fill_route(struct flow_offload *flow, - const struct nf_flow_route *route, + struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; - struct dst_entry *dst = route->tuple[dir].dst; + struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { @@ -146,7 +156,7 @@ static void nft_flow_dst_release(struct flow_offload *flow, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route) + struct nf_flow_route *route) { flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); -- GitLab From a6cafdb49a7bbf4a88367db209703eee6941e023 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Feb 2024 21:36:39 +0100 Subject: [PATCH 0446/2290] netfilter: nft_flow_offload: release dst in case direct xmit path is used [ Upstream commit 8762785f459be1cfe6fcf7285c123aad6a3703f0 ] Direct xmit does not use it since it calls dev_queue_xmit() to send packets, hence it calls dst_release(). kmemleak reports: unreferenced object 0xffff88814f440900 (size 184): comm "softirq", pid 0, jiffies 4294951896 hex dump (first 32 bytes): 00 60 5b 04 81 88 ff ff 00 e6 e8 82 ff ff ff ff .`[............. 21 0b 50 82 ff ff ff ff 00 00 00 00 00 00 00 00 !.P............. backtrace (crc cb2bf5d6): [<000000003ee17107>] kmem_cache_alloc+0x286/0x340 [<0000000021a5de2c>] dst_alloc+0x43/0xb0 [<00000000f0671159>] rt_dst_alloc+0x2e/0x190 [<00000000fe5092c9>] __mkroute_output+0x244/0x980 [<000000005fb96fb0>] ip_route_output_flow+0xc0/0x160 [<0000000045367433>] nf_ip_route+0xf/0x30 [<0000000085da1d8e>] nf_route+0x2d/0x60 [<00000000d1ecd1cb>] nft_flow_route+0x171/0x6a0 [nft_flow_offload] [<00000000d9b2fb60>] nft_flow_offload_eval+0x4e8/0x700 [nft_flow_offload] [<000000009f447dbb>] expr_call_ops_eval+0x53/0x330 [nf_tables] [<00000000072e1be6>] nft_do_chain+0x17c/0x840 [nf_tables] [<00000000d0551029>] nft_do_chain_inet+0xa1/0x210 [nf_tables] [<0000000097c9d5c6>] nf_hook_slow+0x5b/0x160 [<0000000005eccab1>] ip_forward+0x8b6/0x9b0 [<00000000553a269b>] ip_rcv+0x221/0x230 [<00000000412872e5>] __netif_receive_skb_one_core+0xfe/0x110 Fixes: fa502c865666 ("netfilter: flowtable: simplify route logic") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 2036c7a27075b..99195cf6b2657 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -132,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: -- GitLab From 26994a04b0ba50388600a617afe7baaa239cad25 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 21 Apr 2023 00:34:30 +0200 Subject: [PATCH 0447/2290] netfilter: nf_tables: rename function to destroy hook list [ Upstream commit cdc32546632354305afdcf399a5431138a31c9e0 ] Rename nft_flowtable_hooks_destroy() by nft_hooks_destroy() to prepare for netdev chain device updates. Signed-off-by: Pablo Neira Ayuso Stable-dep-of: d472e9853d7b ("netfilter: nf_tables: register hooks last when adding new chain/flowtable") Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f1a74b0949999..c7b543d1a0516 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7938,7 +7938,7 @@ err_unregister_net_hooks: return err; } -static void nft_flowtable_hooks_destroy(struct list_head *hook_list) +static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; @@ -8123,7 +8123,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb, &flowtable->hook_list, flowtable); if (err < 0) { - nft_flowtable_hooks_destroy(&flowtable->hook_list); + nft_hooks_destroy(&flowtable->hook_list); goto err4; } @@ -8893,7 +8893,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; @@ -9850,7 +9850,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; -- GitLab From f305359186724ac4bc058d5cd01782e6e6f9a3e7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Feb 2024 19:43:53 +0100 Subject: [PATCH 0448/2290] netfilter: nf_tables: register hooks last when adding new chain/flowtable [ Upstream commit d472e9853d7b46a6b094224d131d09ccd3a03daf ] Register hooks last when adding chain/flowtable to ensure that packets do not walk over datastructure that is being released in the error path without waiting for the rcu grace period. Fixes: 91c7b38dc9f0 ("netfilter: nf_tables: use new transaction infrastructure to handle chain") Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 78 ++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c7b543d1a0516..a29313e0aaa4d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -686,15 +686,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } -static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, - struct nft_flowtable *flowtable) +static struct nft_trans * +nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -703,22 +704,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); - return 0; + return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { - int err; + struct nft_trans *trans; - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); - if (err < 0) - return err; + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (IS_ERR(trans)) + return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); - return err; + return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) @@ -2459,19 +2460,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); - err = nf_tables_register_hook(net, table, chain); - if (err < 0) - goto err_destroy_chain; - if (!nft_use_inc(&table->use)) { err = -EMFILE; - goto err_use; + goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err_unregister_hook; + goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2479,17 +2476,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); - if (err < 0) { - nft_trans_destroy(trans); - goto err_unregister_hook; - } + if (err < 0) + goto err_chain_add; + + /* This must be LAST to ensure no packets are walking over this chain. */ + err = nf_tables_register_hook(net, table, chain); + if (err < 0) + goto err_register_hook; return 0; -err_unregister_hook: +err_register_hook: + nft_chain_del(chain); +err_chain_add: + nft_trans_destroy(trans); +err_trans: nft_use_dec_restore(&table->use); -err_use: - nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -8031,9 +8033,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; - struct nft_hook *hook, *next; struct net *net = info->net; struct nft_table *table; + struct nft_trans *trans; struct nft_ctx ctx; int err; @@ -8113,34 +8115,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb, err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], &flowtable_hook, flowtable, true); if (err < 0) - goto err4; + goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto err_flowtable_trans; + } + + /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); - if (err < 0) { - nft_hooks_destroy(&flowtable->hook_list); - goto err4; - } - - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; + goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; -err5: - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nft_unregister_flowtable_hook(net, flowtable, hook); - list_del_rcu(&hook->list); - kfree_rcu(hook, rcu); - } -err4: + +err_flowtable_hooks: + nft_trans_destroy(trans); +err_flowtable_trans: + nft_hooks_destroy(&flowtable->hook_list); +err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); -- GitLab From ea33b816691255d1d5eeb7f5a02f2acf6a556393 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Feb 2024 18:38:45 +0100 Subject: [PATCH 0449/2290] netfilter: nf_tables: use kzalloc for hook allocation [ Upstream commit 195e5f88c2e48330ba5483e0bad2de3b3fad484f ] KMSAN reports unitialized variable when registering the hook, reg->hook_ops_type == NF_HOOK_OP_BPF) ~~~~~~~~~~~ undefined This is a small structure, just use kzalloc to make sure this won't happen again when new fields get added to nf_hook_ops. Fixes: 7b4b2fa37587 ("netfilter: annotate nf_tables base hook ops") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a29313e0aaa4d..e21ec3ad80939 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2059,7 +2059,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, struct nft_hook *hook; int err; - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) { err = -ENOMEM; goto err_hook_alloc; -- GitLab From c22ad76cfc43a2c7923de1a9acc0d05b9fa2c63c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 15 Feb 2024 15:53:08 +0800 Subject: [PATCH 0450/2290] net: mctp: put sock on tag allocation failure [ Upstream commit 9990889be14288d4f1743e4768222d5032a79c27 ] We may hold an extra reference on a socket if a tag allocation fails: we optimistically allocate the sk_key, and take a ref there, but do not drop if we end up not using the allocated key. Ensure we're dropping the sock on this failure by doing a proper unref rather than directly kfree()ing. Fixes: de8a6b15d965 ("net: mctp: add an explicit reference from a mctp_sk_key to sock") Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ce9b61e44d1cdae7797be0c5e3141baf582d23a0.1707983487.git.jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/mctp/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 68be8f2b622dd..256bf0b89e6ca 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } -- GitLab From 1623161f80a4b8bb284b0cc3118daf3ccd5bf923 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 19 Feb 2024 09:00:43 +0100 Subject: [PATCH 0451/2290] net: sparx5: Add spinlock for frame transmission from CPU [ Upstream commit 603ead96582d85903baec2d55f021b8dac5c25d2 ] Both registers used when doing manual injection or fdma injection are shared between all the net devices of the switch. It was noticed that when having two process which each of them trying to inject frames on different ethernet ports, that the HW started to behave strange, by sending out more frames then expected. When doing fdma injection it is required to set the frame in the DCB and then make sure that the next pointer of the last DCB is invalid. But because there is no locks for this, then easily this pointer between the DCB can be broken and then it would create a loop of DCBs. And that means that the HW will continuously transmit these frames in a loop. Until the SW will break this loop. Therefore to fix this issue, add a spin lock for when accessing the registers for manual or fdma injection. Signed-off-by: Horatiu Vultur Reviewed-by: Daniel Machon Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Link: https://lore.kernel.org/r/20240219080043.1561014-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_main.h | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 3423c95cc84ae..7031f41287e09 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -744,6 +744,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sparx5); sparx5->pdev = pdev; sparx5->dev = &pdev->dev; + spin_lock_init(&sparx5->tx_lock); /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 7a83222caa737..cb3173d2b0e8d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -278,6 +278,7 @@ struct sparx5 { int xtr_irq; /* Frame DMA */ int fdma_irq; + spinlock_t tx_lock; /* lock for frame transmission */ struct sparx5_rx rx; struct sparx5_tx tx; /* PTP */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 6db6ac6a3bbc2..ac7e1cffbcecf 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); + spin_lock(&sparx5->tx_lock); if (sparx5->fdma_irq > 0) ret = sparx5_fdma_xmit(sparx5, ifh, skb); else ret = sparx5_inject(sparx5, ifh, skb, dev); + spin_unlock(&sparx5->tx_lock); if (ret == -EBUSY) goto busy; -- GitLab From f556a352fdb2489d50f1231f58b6dfbadb4e3756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 18 Feb 2024 10:12:13 +0200 Subject: [PATCH 0452/2290] phonet: take correct lock to peek at the RX queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3b2d9bc4d4acdf15a876eae2c0d83149250e85ba ] The receive queue is protected by its embedded spin-lock, not the socket lock, so we need the former lock here (and only that one). Fixes: 107d0d9b8d9a ("Phonet: Phonet datagram transport protocol") Reported-by: Luosili Signed-off-by: Rémi Denis-Courmont Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240218081214.4806-1-remi@remlab.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/phonet/datagram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index ff5f49ab236ed..39a6c5713d0b2 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -35,10 +35,10 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); answ = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(answ, (int __user *)arg); case SIOCPNADDRESOURCE: -- GitLab From 9d5523e065b568e79dfaa2ea1085a5bcf74baf78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 18 Feb 2024 10:12:14 +0200 Subject: [PATCH 0453/2290] phonet/pep: fix racy skb_queue_empty() use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d2a894d7f487dcb894df023e9d3014cf5b93fe5 ] The receive queues are protected by their respective spin-lock, not the socket lock. This could lead to skb_peek() unexpectedly returning NULL or a pointer to an already dequeued socket buffer. Fixes: 9641458d3ec4 ("Phonet: Pipe End Point for Phonet Pipes protocol") Signed-off-by: Rémi Denis-Courmont Link: https://lore.kernel.org/r/20240218081214.4806-2-remi@remlab.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/phonet/pep.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 83ea13a50690b..607f54c23647a 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct pep_sock *pn = pep_sk(sk); @@ -930,15 +961,7 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - answ = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - answ = skb_peek(&sk->sk_receive_queue)->len; - else - answ = 0; - release_sock(sk); + answ = pep_first_packet_length(sk); ret = put_user(answ, (int __user *)arg); break; -- GitLab From 37919ef31d7c9967ce299af0151bd7d22d7e53f7 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 19 Feb 2024 14:52:54 +0100 Subject: [PATCH 0454/2290] Fix write to cloned skb in ipv6_hop_ioam() [ Upstream commit f198d933c2e4f8f89e0620fbaf1ea7eac384a0eb ] ioam6_fill_trace_data() writes inside the skb payload without ensuring it's writeable (e.g., not cloned). This function is called both from the input and output path. The output path (ioam6_iptunnel) already does the check. This commit provides a fix for the input path, inside ipv6_hop_ioam(). It also updates ip6_parse_tlv() to refresh the network header pointer ("nh") when returning from ipv6_hop_ioam(). Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Reported-by: Paolo Abeni Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/exthdrs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5fa0e37305d9d..1cfdd9d950123 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -180,6 +180,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -974,6 +976,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: -- GitLab From b9196289e36cc520ad9fa2d2f4b1c54b06f364a4 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 20 Feb 2024 12:30:07 +0530 Subject: [PATCH 0455/2290] net: phy: realtek: Fix rtl8211f_config_init() for RTL8211F(D)(I)-VD-CG PHY [ Upstream commit 3489182b11d35f1944c1245fc9c4867cf622c50f ] Commit bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") extended support of the driver from the existing support for RTL8211F(D)(I)-CG PHY to the newer RTL8211F(D)(I)-VD-CG PHY. While that commit indicated that the RTL8211F_PHYCR2 register is not supported by the "VD-CG" PHY model and therefore updated the corresponding section in rtl8211f_config_init() to be invoked conditionally, the call to "genphy_soft_reset()" was left as-is, when it should have also been invoked conditionally. This is because the call to "genphy_soft_reset()" was first introduced by the commit 0a4355c2b7f8 ("net: phy: realtek: add dt property to disable CLKOUT clock") since the RTL8211F guide indicates that a PHY reset should be issued after setting bits in the PHYCR2 register. As the PHYCR2 register is not applicable to the "VD-CG" PHY model, fix the rtl8211f_config_init() function by invoking "genphy_soft_reset()" conditionally based on the presence of the "PHYCR2" register. Fixes: bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") Signed-off-by: Siddharth Vadapalli Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220070007.968762-1-s-vadapalli@ti.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 3d99fd6664d7a..70e52d27064ec 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -414,9 +414,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) ERR_PTR(ret)); return ret; } + + return genphy_soft_reset(phydev); } - return genphy_soft_reset(phydev); + return 0; } static int rtl821x_resume(struct phy_device *phydev) -- GitLab From fd7b4f4fdc7cc00fb5e6812b67decdde33b3786e Mon Sep 17 00:00:00 2001 From: Erik Kurzinger Date: Fri, 19 Jan 2024 08:32:06 -0800 Subject: [PATCH 0456/2290] drm/syncobj: call drm_syncobj_fence_add_wait when WAIT_AVAILABLE flag is set [ Upstream commit 3c43177ffb54ea5be97505eb8e2690e99ac96bc9 ] When waiting for a syncobj timeline point whose fence has not yet been submitted with the WAIT_FOR_SUBMIT flag, a callback is registered using drm_syncobj_fence_add_wait and the thread is put to sleep until the timeout expires. If the fence is submitted before then, drm_syncobj_add_point will wake up the sleeping thread immediately which will proceed to wait for the fence to be signaled. However, if the WAIT_AVAILABLE flag is used instead, drm_syncobj_fence_add_wait won't get called, meaning the waiting thread will always sleep for the full timeout duration, even if the fence gets submitted earlier. If it turns out that the fence *has* been submitted by the time it eventually wakes up, it will still indicate to userspace that the wait completed successfully (it won't return -ETIME), but it will have taken much longer than it should have. To fix this, we must call drm_syncobj_fence_add_wait if *either* the WAIT_FOR_SUBMIT flag or the WAIT_AVAILABLE flag is set. The only difference being that with WAIT_FOR_SUBMIT we will also wait for the fence to be signaled after it has been submitted while with WAIT_AVAILABLE we will return immediately. IGT test patch: https://lists.freedesktop.org/archives/igt-dev/2024-January/067537.html v1 -> v2: adjust lockdep_assert_none_held_once condition (cherry picked from commit 8c44ea81634a4a337df70a32621a5f3791be23df) Fixes: 01d6c3578379 ("drm/syncobj: add support for timeline point wait v8") Signed-off-by: Erik Kurzinger Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Reviewed-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20240119163208.3723457-1-ekurzinger@nvidia.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_syncobj.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index da0145bc104a8..8f2737075dc2f 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -980,7 +980,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint64_t *points; uint32_t signaled_count, i; - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) lockdep_assert_none_held_once(); points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); @@ -1049,7 +1050,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, * fallthough and try a 0 timeout wait! */ - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { for (i = 0; i < count; ++i) drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } -- GitLab From 58168005337eabef345a872be3f87d0215ff3b30 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 13 Feb 2024 01:50:50 +0100 Subject: [PATCH 0457/2290] drm/amd/display: Fix memory leak in dm_sw_fini() [ Upstream commit bae67893578d608e35691dcdfa90c4957debf1d3 ] After destroying dmub_srv, the memory associated with it is not freed, causing a memory leak: unreferenced object 0xffff896302b45800 (size 1024): comm "(udev-worker)", pid 222, jiffies 4294894636 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 6265fd77): [] kmalloc_trace+0x29d/0x340 [] dm_dmub_sw_init+0xb4/0x450 [amdgpu] [] dm_sw_init+0x15/0x2b0 [amdgpu] [] amdgpu_device_init+0x1417/0x24e0 [amdgpu] [] amdgpu_driver_load_kms+0x15/0x190 [amdgpu] [] amdgpu_pci_probe+0x187/0x4e0 [amdgpu] [] local_pci_probe+0x3e/0x90 [] pci_device_probe+0xc3/0x230 [] really_probe+0xe2/0x480 [] __driver_probe_device+0x78/0x160 [] driver_probe_device+0x1f/0x90 [] __driver_attach+0xce/0x1c0 [] bus_for_each_dev+0x70/0xc0 [] bus_add_driver+0x112/0x210 [] driver_register+0x55/0x100 [] do_one_initcall+0x41/0x300 Fix this by freeing dmub_srv after destroying it. Fixes: 743b9786b14a ("drm/amd/display: Hook up the DMUB service in DM") Signed-off-by: Armin Wolf Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a826c92933199..da16048bf1004 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2255,6 +2255,7 @@ static int dm_sw_fini(void *handle) if (adev->dm.dmub_srv) { dmub_srv_destroy(adev->dm.dmub_srv); + kfree(adev->dm.dmub_srv); adev->dm.dmub_srv = NULL; } -- GitLab From b1301f15ddc26d53f7e77e76c8c7236ce4bc2a18 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 21 Feb 2024 20:27:13 +0100 Subject: [PATCH 0458/2290] i2c: imx: when being a target, mark the last read as processed [ Upstream commit 87aec499368d488c20292952d6d4be7cb9e49c5e ] When being a target, NAK from the controller means that all bytes have been transferred. So, the last byte needs also to be marked as 'processed'. Otherwise index registers of backends may not increase. Fixes: f7414cd6923f ("i2c: imx: support slave mode for imx I2C driver") Signed-off-by: Corey Minyard Tested-by: Andrew Manley Reviewed-by: Andrew Manley Reviewed-by: Oleksij Rempel [wsa: fixed comment and commit message to properly describe the case] Signed-off-by: Wolfram Sang Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-imx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index fc70920c4ddab..0c203c614197c 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -804,6 +804,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx, ctl &= ~I2CR_MTX; imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + + /* flag the last byte as processed */ + i2c_imx_slave_event(i2c_imx, + I2C_SLAVE_READ_PROCESSED, &value); + i2c_imx_slave_finish_op(i2c_imx); return IRQ_HANDLED; } -- GitLab From 54407d9bc5e32e51d4e8488d2cfbaa976e9127fb Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sun, 22 Oct 2023 21:09:57 +0800 Subject: [PATCH 0459/2290] erofs: simplify compression configuration parser commit efb4fb02cef3ab410b603c8f0e1c67f61d55f542 upstream. Move erofs_load_compr_cfgs() into decompressor.c as well as introduce a callback instead of a hard-coded switch for each algorithm for simplicity. Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20231022130957.11398-1-xiang@kernel.org Stable-dep-of: 118a8cf504d7 ("erofs: fix inconsistent per-file compression format") Signed-off-by: Yue Hu Signed-off-by: Greg Kroah-Hartman --- fs/erofs/compress.h | 4 ++ fs/erofs/decompressor.c | 60 ++++++++++++++++++++++++++++-- fs/erofs/decompressor_lzma.c | 4 +- fs/erofs/internal.h | 28 ++------------ fs/erofs/super.c | 72 +++++------------------------------- 5 files changed, 76 insertions(+), 92 deletions(-) diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 26fa170090b8f..c4a3187bdb8fc 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -21,6 +21,8 @@ struct z_erofs_decompress_req { }; struct z_erofs_decompressor { + int (*config)(struct super_block *sb, struct erofs_super_block *dsb, + void *data, int size); int (*decompress)(struct z_erofs_decompress_req *rq, struct page **pagepool); char *name; @@ -93,6 +95,8 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); /* prototypes for specific algorithms */ +int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, void *data, int size); int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); #endif diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 0cfad74374ca9..ae3cfd018d99c 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -24,11 +24,11 @@ struct z_erofs_lz4_decompress_ctx { unsigned int oend; }; -int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int size) +static int z_erofs_load_lz4_config(struct super_block *sb, + struct erofs_super_block *dsb, void *data, int size) { struct erofs_sb_info *sbi = EROFS_SB(sb); + struct z_erofs_lz4_cfgs *lz4 = data; u16 distance; if (lz4) { @@ -374,17 +374,71 @@ static struct z_erofs_decompressor decompressors[] = { .name = "interlaced" }, [Z_EROFS_COMPRESSION_LZ4] = { + .config = z_erofs_load_lz4_config, .decompress = z_erofs_lz4_decompress, .name = "lz4" }, #ifdef CONFIG_EROFS_FS_ZIP_LZMA [Z_EROFS_COMPRESSION_LZMA] = { + .config = z_erofs_load_lzma_config, .decompress = z_erofs_lzma_decompress, .name = "lzma" }, #endif }; +int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + unsigned int algs, alg; + erofs_off_t offset; + int size, ret = 0; + + if (!erofs_sb_has_compr_cfgs(sbi)) { + sbi->available_compr_algs = Z_EROFS_COMPRESSION_LZ4; + return z_erofs_load_lz4_config(sb, dsb, NULL, 0); + } + + sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); + if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { + erofs_err(sb, "unidentified algorithms %x, please upgrade kernel", + sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); + return -EOPNOTSUPP; + } + + offset = EROFS_SUPER_OFFSET + sbi->sb_size; + alg = 0; + for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { + void *data; + + if (!(algs & 1)) + continue; + + data = erofs_read_metadata(sb, &buf, &offset, &size); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + break; + } + + if (alg >= ARRAY_SIZE(decompressors) || + !decompressors[alg].config) { + erofs_err(sb, "algorithm %d isn't enabled on this kernel", + alg); + ret = -EOPNOTSUPP; + } else { + ret = decompressors[alg].config(sb, + dsb, data, size); + } + + kfree(data); + if (ret) + break; + } + erofs_put_metabuf(&buf); + return ret; +} + int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool) { diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 49addc345aebe..970464c4b6769 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -72,10 +72,10 @@ int z_erofs_lzma_init(void) } int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size) + struct erofs_super_block *dsb, void *data, int size) { static DEFINE_MUTEX(lzma_resize_mutex); + struct z_erofs_lzma_cfgs *lzma = data; unsigned int dict_size, i; struct z_erofs_lzma *strm, *head = NULL; int err; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index d8d09fc3ed655..79a7a5815ea63 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -471,6 +471,8 @@ struct erofs_map_dev { /* data.c */ extern const struct file_operations erofs_file_fops; +void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, + erofs_off_t *offset, int *lengthp); void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); void *erofs_bread(struct erofs_buf *buf, struct inode *inode, @@ -565,9 +567,7 @@ void z_erofs_exit_zip_subsystem(void); int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, struct erofs_workgroup *egrp); int erofs_try_to_free_cached_page(struct page *page); -int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int len); +int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb); #else static inline void erofs_shrinker_register(struct super_block *sb) {} static inline void erofs_shrinker_unregister(struct super_block *sb) {} @@ -575,36 +575,14 @@ static inline int erofs_init_shrinker(void) { return 0; } static inline void erofs_exit_shrinker(void) {} static inline int z_erofs_init_zip_subsystem(void) { return 0; } static inline void z_erofs_exit_zip_subsystem(void) {} -static inline int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int len) -{ - if (lz4 || dsb->u1.lz4_max_distance) { - erofs_err(sb, "lz4 algorithm isn't enabled"); - return -EINVAL; - } - return 0; -} #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_ZIP_LZMA int z_erofs_lzma_init(void); void z_erofs_lzma_exit(void); -int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size); #else static inline int z_erofs_lzma_init(void) { return 0; } static inline int z_erofs_lzma_exit(void) { return 0; } -static inline int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size) { - if (lzma) { - erofs_err(sb, "lzma algorithm isn't enabled"); - return -EINVAL; - } - return 0; -} #endif /* !CONFIG_EROFS_FS_ZIP */ /* flags for erofs_fscache_register_cookie() */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index bd8bf8fc2f5df..f2647126cb2fb 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -126,8 +126,8 @@ static bool check_layout_compatibility(struct super_block *sb, #ifdef CONFIG_EROFS_FS_ZIP /* read variable-sized metadata, offset will be aligned by 4-byte */ -static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, - erofs_off_t *offset, int *lengthp) +void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, + erofs_off_t *offset, int *lengthp) { u8 *buffer, *ptr; int len, i, cnt; @@ -159,64 +159,15 @@ static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, } return buffer; } - -static int erofs_load_compr_cfgs(struct super_block *sb, - struct erofs_super_block *dsb) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - unsigned int algs, alg; - erofs_off_t offset; - int size, ret = 0; - - sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); - if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { - erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", - sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); - return -EINVAL; - } - - offset = EROFS_SUPER_OFFSET + sbi->sb_size; - alg = 0; - for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { - void *data; - - if (!(algs & 1)) - continue; - - data = erofs_read_metadata(sb, &buf, &offset, &size); - if (IS_ERR(data)) { - ret = PTR_ERR(data); - break; - } - - switch (alg) { - case Z_EROFS_COMPRESSION_LZ4: - ret = z_erofs_load_lz4_config(sb, dsb, data, size); - break; - case Z_EROFS_COMPRESSION_LZMA: - ret = z_erofs_load_lzma_config(sb, dsb, data, size); - break; - default: - DBG_BUGON(1); - ret = -EFAULT; - } - kfree(data); - if (ret) - break; - } - erofs_put_metabuf(&buf); - return ret; -} #else -static int erofs_load_compr_cfgs(struct super_block *sb, - struct erofs_super_block *dsb) +static int z_erofs_parse_cfgs(struct super_block *sb, + struct erofs_super_block *dsb) { - if (dsb->u1.available_compr_algs) { - erofs_err(sb, "try to load compressed fs when compression is disabled"); - return -EINVAL; - } - return 0; + if (!dsb->u1.available_compr_algs) + return 0; + + erofs_err(sb, "compression disabled, unable to mount compressed EROFS"); + return -EOPNOTSUPP; } #endif @@ -398,10 +349,7 @@ static int erofs_read_superblock(struct super_block *sb) } /* parse on-disk compression configurations */ - if (erofs_sb_has_compr_cfgs(sbi)) - ret = erofs_load_compr_cfgs(sb, dsb); - else - ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); + ret = z_erofs_parse_cfgs(sb, dsb); if (ret < 0) goto out; -- GitLab From 47467e04816cb297905c0f09bc2d11ef865942d9 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sat, 13 Jan 2024 23:06:02 +0800 Subject: [PATCH 0460/2290] erofs: fix inconsistent per-file compression format commit 118a8cf504d7dfa519562d000f423ee3ca75d2c4 upstream. EROFS can select compression algorithms on a per-file basis, and each per-file compression algorithm needs to be marked in the on-disk superblock for initialization. However, syzkaller can generate inconsistent crafted images that use an unsupported algorithmtype for specific inodes, e.g. use MicroLZMA algorithmtype even it's not set in `sbi->available_compr_algs`. This can lead to an unexpected "BUG: kernel NULL pointer dereference" if the corresponding decompressor isn't built-in. Fix this by checking against `sbi->available_compr_algs` for each m_algorithmformat request. Incorrect !erofs_sb_has_compr_cfgs preset bitmap is now fixed together since it was harmless previously. Reported-by: Fixes: 8f89926290c4 ("erofs: get compression algorithms directly on mapping") Fixes: 622ceaddb764 ("erofs: lzma compression support") Reviewed-by: Yue Hu Link: https://lore.kernel.org/r/20240113150602.1471050-1-hsiangkao@linux.alibaba.com Signed-off-by: Gao Xiang Signed-off-by: Gao Xiang Signed-off-by: Yue Hu Signed-off-by: Greg Kroah-Hartman --- fs/erofs/decompressor.c | 2 +- fs/erofs/zmap.c | 23 +++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index ae3cfd018d99c..1eefa4411e066 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -396,7 +396,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) int size, ret = 0; if (!erofs_sb_has_compr_cfgs(sbi)) { - sbi->available_compr_algs = Z_EROFS_COMPRESSION_LZ4; + sbi->available_compr_algs = 1 << Z_EROFS_COMPRESSION_LZ4; return z_erofs_load_lz4_config(sb, dsb, NULL, 0); } diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0337b70b2dac4..abcded1acd194 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -610,7 +610,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, .map = map, }; int err = 0; - unsigned int lclusterbits, endoff; + unsigned int lclusterbits, endoff, afmt; unsigned long initial_lcn; unsigned long long ofs, end; @@ -700,17 +700,20 @@ static int z_erofs_do_map_blocks(struct inode *inode, err = -EFSCORRUPTED; goto unmap_out; } - if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) - map->m_algorithmformat = - Z_EROFS_COMPRESSION_INTERLACED; - else - map->m_algorithmformat = - Z_EROFS_COMPRESSION_SHIFTED; - } else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) { - map->m_algorithmformat = vi->z_algorithmtype[1]; + afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ? + Z_EROFS_COMPRESSION_INTERLACED : + Z_EROFS_COMPRESSION_SHIFTED; } else { - map->m_algorithmformat = vi->z_algorithmtype[0]; + afmt = m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2 ? + vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; + if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { + erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", + afmt, vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } } + map->m_algorithmformat = afmt; if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && -- GitLab From 7ebeee513f8f2abdfe98632afdba0b8ebe44339b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 16 Feb 2024 11:40:24 -0800 Subject: [PATCH 0461/2290] mm/damon/reclaim: fix quota stauts loss due to online tunings commit 1b0ca4e4ff10a2c8402e2cf70132c683e1c772e4 upstream. Patch series "mm/damon: fix quota status loss due to online tunings". DAMON_RECLAIM and DAMON_LRU_SORT is not preserving internal quota status when applying new user parameters, and hence could cause temporal quota accuracy degradation. Fix it by preserving the status. This patch (of 2): For online parameters change, DAMON_RECLAIM creates new scheme based on latest values of the parameters and replaces the old scheme with the new one. When creating it, the internal status of the quota of the old scheme is not preserved. As a result, charging of the quota starts from zero after the online tuning. The data that collected to estimate the throughput of the scheme's action is also reset, and therefore the estimation should start from the scratch again. Because the throughput estimation is being used to convert the time quota to the effective size quota, this could result in temporal time quota inaccuracy. It would be recovered over time, though. In short, the quota accuracy could be temporarily degraded after online parameters update. Fix the problem by checking the case and copying the internal fields for the status. Link: https://lkml.kernel.org/r/20240216194025.9207-1-sj@kernel.org Link: https://lkml.kernel.org/r/20240216194025.9207-2-sj@kernel.org Fixes: e035c280f6df ("mm/damon/reclaim: support online inputs update") Signed-off-by: SeongJae Park Cc: [5.19+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/reclaim.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 162c9b1ca00fd..cc337e94acfda 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -141,9 +141,20 @@ static struct damos *damon_reclaim_new_scheme(void) &damon_reclaim_wmarks); } +static void damon_reclaim_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_reclaim_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *old_scheme; int err = 0; err = damon_set_attrs(ctx, &damon_reclaim_mon_attrs); @@ -154,6 +165,11 @@ static int damon_reclaim_apply_parameters(void) scheme = damon_reclaim_new_scheme(); if (!scheme) return -ENOMEM; + if (!list_empty(&ctx->schemes)) { + damon_for_each_scheme(old_scheme, ctx) + damon_reclaim_copy_quota_status(&scheme->quota, + &old_scheme->quota); + } damon_set_schemes(ctx, &scheme, 1); return damon_set_region_biggest_system_ram_default(target, -- GitLab From 18f614369def2a11a52f569fe0f910b199d13487 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 15 Feb 2024 12:47:38 -0800 Subject: [PATCH 0462/2290] fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio commit b820de741ae48ccf50dd95e297889c286ff4f760 upstream. If kiocb_set_cancel_fn() is called for I/O submitted via io_uring, the following kernel warning appears: WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8 Call trace: kiocb_set_cancel_fn+0x9c/0xa8 ffs_epfile_read_iter+0x144/0x1d0 io_read+0x19c/0x498 io_issue_sqe+0x118/0x27c io_submit_sqes+0x25c/0x5fc __arm64_sys_io_uring_enter+0x104/0xab0 invoke_syscall+0x58/0x11c el0_svc_common+0xb4/0xf4 do_el0_svc+0x2c/0xb0 el0_svc+0x2c/0xa4 el0t_64_sync_handler+0x68/0xb4 el0t_64_sync+0x1a4/0x1a8 Fix this by setting the IOCB_AIO_RW flag for read and write I/O that is submitted by libaio. Suggested-by: Jens Axboe Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240215204739.2677806-2-bvanassche@acm.org Signed-off-by: Christian Brauner Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 9 ++++++++- include/linux/fs.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index e85ba0b77f596..849c3e3ed558b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -595,6 +595,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) struct kioctx *ctx = req->ki_ctx; unsigned long flags; + /* + * kiocb didn't come from aio or is neither a read nor a write, hence + * ignore it. + */ + if (!(iocb->ki_flags & IOCB_AIO_RW)) + return; + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; @@ -1476,7 +1483,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = req->ki_filp->f_iocb_flags; + req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a1911dcf834b..67313881f8ac1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -337,6 +337,8 @@ enum rw_hint { #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) struct kiocb { struct file *ki_filp; -- GitLab From 14f1992430ef9e647b02aa8ca12c5bcb9a1dffea Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 25 Jan 2024 08:51:27 +0000 Subject: [PATCH 0463/2290] mm: zswap: fix missing folio cleanup in writeback race path commit e3b63e966cac0bf78aaa1efede1827a252815a1d upstream. In zswap_writeback_entry(), after we get a folio from __read_swap_cache_async(), we grab the tree lock again to check that the swap entry was not invalidated and recycled. If it was, we delete the folio we just added to the swap cache and exit. However, __read_swap_cache_async() returns the folio locked when it is newly allocated, which is always true for this path, and the folio is ref'd. Make sure to unlock and put the folio before returning. This was discovered by code inspection, probably because this path handles a race condition that should not happen often, and the bug would not crash the system, it will only strand the folio indefinitely. Link: https://lkml.kernel.org/r/20240125085127.1327013-1-yosryahmed@google.com Fixes: 04fc7816089c ("mm: fix zswap writeback race condition") Signed-off-by: Yosry Ahmed Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Reviewed-by: Nhat Pham Cc: Domenico Cerasuolo Cc: Signed-off-by: Andrew Morton Signed-off-by: Yosry Ahmed Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/zswap.c b/mm/zswap.c index b3829ada4a413..b7cb126797f9e 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1013,6 +1013,8 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) { spin_unlock(&tree->lock); delete_from_swap_cache(page_folio(page)); + unlock_page(page); + put_page(page); ret = -ENOMEM; goto fail; } -- GitLab From b03bca85617b95965a1af5722e452775a1ab8f32 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:04 -0700 Subject: [PATCH 0464/2290] mptcp: userspace pm send RM_ADDR for ID 0 commit 84c531f54ad9a124a924c9505d74e33d16965146 upstream. This patch adds the ability to send RM_ADDR for local ID 0. Check whether id 0 address is removed, if not, put id 0 into a removing list, pass it to mptcp_pm_remove_addr() to remove id 0 address. There is no reason not to allow the userspace to remove the initial address (ID 0). This special case was not taken into account not letting the userspace to delete all addresses as announced. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/379 Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-3-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski Fixes: d9a4594edabf ("mptcp: netlink: Add MPTCP_PM_CMD_REMOVE") Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2895be3046f79..631fa104617c3 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -222,6 +222,40 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) return err; } +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, + struct genl_info *info) +{ + struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool has_id_0 = false; + int err = -EINVAL; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + if (subflow->local_id == 0) { + has_id_0 = true; + break; + } + } + if (!has_id_0) { + GENL_SET_ERR_MSG(info, "address with id 0 not found"); + goto remove_err; + } + + list.ids[list.nr++] = 0; + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + + err = 0; + +remove_err: + release_sock(sk); + return err; +} + int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -253,6 +287,11 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) goto remove_err; } + if (id_val == 0) { + err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + goto remove_err; + } + lock_sock((struct sock *)msk); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { -- GitLab From 70a4a26572013738ee9b4d0e18487ef22f97be7c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 15 Feb 2024 19:25:29 +0100 Subject: [PATCH 0465/2290] mptcp: add needs_id for netlink appending addr commit 584f3894262634596532cf43a5e782e34a0ce374 upstream. Just the same as userspace PM, a new parameter needs_id is added for in-kernel PM mptcp_pm_nl_append_new_local_addr() too. Add a new helper mptcp_pm_has_addr_attr_id() to check whether an address ID is set from PM or not. In mptcp_pm_nl_get_local_id(), needs_id is always true, but in mptcp_pm_nl_add_addr_doit(), pass mptcp_pm_has_addr_attr_id() to needs_it. Fixes: efd5a4c04e18 ("mptcp: add the address ID assignment bitmap") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 980050f6b456f..70a1025f093cf 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -900,7 +900,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -942,7 +943,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) { + if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -953,7 +954,7 @@ find_next: } } - if (!entry->addr.id) + if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1095,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); @@ -1311,6 +1312,18 @@ next: return 0; } +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_addr_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1352,7 +1365,8 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG(info, "too many addresses or duplicate one"); goto out_free; -- GitLab From 4a37c6c068345ca2e4001e47555f5031872361d2 Mon Sep 17 00:00:00 2001 From: Szuying Chen Date: Thu, 21 Sep 2023 17:33:51 +0800 Subject: [PATCH 0466/2290] ata: ahci: add identifiers for ASM2116 series adapters commit 3bf6141060948e27b62b13beb216887f2e54591e upstream. Add support for PCIe SATA adapter cards based on Asmedia 2116 controllers. These cards can provide up to 10 SATA ports on PCIe card. Signed-off-by: Szuying Chen Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 42c6b660550c2..6da5005c2e268 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -613,6 +613,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ + { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ + { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ + { PCI_VDEVICE(ASMEDIA, 0x1165), board_ahci }, /* ASM1165 */ + { PCI_VDEVICE(ASMEDIA, 0x1166), board_ahci }, /* ASM1166 */ /* * Samsung SSDs found on some macbooks. NCQ times out if MSI is -- GitLab From f8fc1f1d9f65b89e7b90ffea5dfd58d427e48017 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 30 Jan 2024 15:21:51 +0200 Subject: [PATCH 0467/2290] ahci: Extend ASM1061 43-bit DMA address quirk to other ASM106x parts commit 51af8f255bdaca6d501afc0d085b808f67b44d91 upstream. ASMedia have confirmed that all ASM106x parts currently listed in ahci_pci_tbl[] suffer from the 43-bit DMA address limitation that we ran into on the ASM1061, and therefore, we need to apply the quirk added by commit 20730e9b2778 ("ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers") to the other supported ASM106x parts as well. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-ide/ZbopwKZJAKQRA4Xv@x1-carbon/ Signed-off-by: Lennert Buytenhek [cassel: add link to ASMedia confirmation email] Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 6da5005c2e268..1790a2ecb9fac 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -606,13 +606,13 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ /* ASMedia */ - { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ - { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ - { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ - { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */ + { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */ + { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */ { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ -- GitLab From f119f2325ba70cbfdec701000dcad4d88805d5b0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Feb 2024 15:05:16 -0800 Subject: [PATCH 0468/2290] arp: Prevent overflow in arp_req_get(). commit a7d6027790acea24446ddd6632d394096c0f4667 upstream. syzkaller reported an overflown write in arp_req_get(). [0] When ioctl(SIOCGARP) is issued, arp_req_get() looks up an neighbour entry and copies neigh->ha to struct arpreq.arp_ha.sa_data. The arp_ha here is struct sockaddr, not struct sockaddr_storage, so the sa_data buffer is just 14 bytes. In the splat below, 2 bytes are overflown to the next int field, arp_flags. We initialise the field just after the memcpy(), so it's not a problem. However, when dev->addr_len is greater than 22 (e.g. MAX_ADDR_LEN), arp_netmask is overwritten, which could be set as htonl(0xFFFFFFFFUL) in arp_ioctl() before calling arp_req_get(). To avoid the overflow, let's limit the max length of memcpy(). Note that commit b5f0de6df6dc ("net: dev: Convert sa_data to flexible array in struct sockaddr") just silenced syzkaller. [0]: memcpy: detected field-spanning write (size 16) of single field "r->arp_ha.sa_data" at net/ipv4/arp.c:1128 (size 14) WARNING: CPU: 0 PID: 144638 at net/ipv4/arp.c:1128 arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Modules linked in: CPU: 0 PID: 144638 Comm: syz-executor.4 Not tainted 6.1.74 #31 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 RIP: 0010:arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Code: fd ff ff e8 41 42 de fb b9 0e 00 00 00 4c 89 fe 48 c7 c2 20 6d ab 87 48 c7 c7 80 6d ab 87 c6 05 25 af 72 04 01 e8 5f 8d ad fb <0f> 0b e9 6c fd ff ff e8 13 42 de fb be 03 00 00 00 4c 89 e7 e8 a6 RSP: 0018:ffffc900050b7998 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88803a815000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8641a44a RDI: 0000000000000001 RBP: ffffc900050b7a98 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 203a7970636d656d R12: ffff888039c54000 R13: 1ffff92000a16f37 R14: ffff88803a815084 R15: 0000000000000010 FS: 00007f172bf306c0(0000) GS:ffff88805aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f172b3569f0 CR3: 0000000057f12005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: arp_ioctl+0x33f/0x4b0 net/ipv4/arp.c:1261 inet_ioctl+0x314/0x3a0 net/ipv4/af_inet.c:981 sock_do_ioctl+0xdf/0x260 net/socket.c:1204 sock_ioctl+0x3ef/0x650 net/socket.c:1321 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x18e/0x220 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x64/0xce RIP: 0033:0x7f172b262b8d Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f172bf300b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f172b3abf80 RCX: 00007f172b262b8d RDX: 0000000020000000 RSI: 0000000000008954 RDI: 0000000000000003 RBP: 00007f172b2d3493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f172b3abf80 R15: 00007f172bf10000 Reported-by: syzkaller Reported-by: Bjoern Doebel Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240215230516.31330-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/ipv4/arp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb35e5d..ccff96820a703 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, (unsigned char)sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; -- GitLab From 277439e7cabd9d4c6334b39a4b99d49b4c97265b Mon Sep 17 00:00:00 2001 From: Edward Lo Date: Thu, 27 Oct 2022 23:33:37 +0800 Subject: [PATCH 0469/2290] fs/ntfs3: Enhance the attribute size check commit 4f082a7531223a438c757bb20e304f4c941c67a8 upstream. This combines the overflow and boundary check so that all attribute size will be properly examined while enumerating them. [ 169.181521] BUG: KASAN: slab-out-of-bounds in run_unpack+0x2e3/0x570 [ 169.183161] Read of size 1 at addr ffff8880094b6240 by task mount/247 [ 169.184046] [ 169.184925] CPU: 0 PID: 247 Comm: mount Not tainted 6.0.0-rc7+ #3 [ 169.185908] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 169.187066] Call Trace: [ 169.187492] [ 169.188049] dump_stack_lvl+0x49/0x63 [ 169.188495] print_report.cold+0xf5/0x689 [ 169.188964] ? run_unpack+0x2e3/0x570 [ 169.189331] kasan_report+0xa7/0x130 [ 169.189714] ? run_unpack+0x2e3/0x570 [ 169.190079] __asan_load1+0x51/0x60 [ 169.190634] run_unpack+0x2e3/0x570 [ 169.191290] ? run_pack+0x840/0x840 [ 169.191569] ? run_lookup_entry+0xb3/0x1f0 [ 169.192443] ? mi_enum_attr+0x20a/0x230 [ 169.192886] run_unpack_ex+0xad/0x3e0 [ 169.193276] ? run_unpack+0x570/0x570 [ 169.193557] ? ni_load_mi+0x80/0x80 [ 169.193889] ? debug_smp_processor_id+0x17/0x20 [ 169.194236] ? mi_init+0x4a/0x70 [ 169.194496] attr_load_runs_vcn+0x166/0x1c0 [ 169.194851] ? attr_data_write_resident+0x250/0x250 [ 169.195188] mi_read+0x133/0x2c0 [ 169.195481] ntfs_iget5+0x277/0x1780 [ 169.196017] ? call_rcu+0x1c7/0x330 [ 169.196392] ? ntfs_get_block_bmap+0x70/0x70 [ 169.196708] ? evict+0x223/0x280 [ 169.197014] ? __kmalloc+0x33/0x540 [ 169.197305] ? wnd_init+0x15b/0x1b0 [ 169.197599] ntfs_fill_super+0x1026/0x1ba0 [ 169.197994] ? put_ntfs+0x1d0/0x1d0 [ 169.198299] ? vsprintf+0x20/0x20 [ 169.198583] ? mutex_unlock+0x81/0xd0 [ 169.198930] ? set_blocksize+0x95/0x150 [ 169.199269] get_tree_bdev+0x232/0x370 [ 169.199750] ? put_ntfs+0x1d0/0x1d0 [ 169.200094] ntfs_fs_get_tree+0x15/0x20 [ 169.200431] vfs_get_tree+0x4c/0x130 [ 169.200714] path_mount+0x654/0xfe0 [ 169.201067] ? putname+0x80/0xa0 [ 169.201358] ? finish_automount+0x2e0/0x2e0 [ 169.201965] ? putname+0x80/0xa0 [ 169.202445] ? kmem_cache_free+0x1c4/0x440 [ 169.203075] ? putname+0x80/0xa0 [ 169.203414] do_mount+0xd6/0xf0 [ 169.203719] ? path_mount+0xfe0/0xfe0 [ 169.203977] ? __kasan_check_write+0x14/0x20 [ 169.204382] __x64_sys_mount+0xca/0x110 [ 169.204711] do_syscall_64+0x3b/0x90 [ 169.205059] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 169.205571] RIP: 0033:0x7f67a80e948a [ 169.206327] Code: 48 8b 0d 11 fa 2a 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 008 [ 169.208296] RSP: 002b:00007ffddf020f58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 [ 169.209253] RAX: ffffffffffffffda RBX: 000055e2547a6060 RCX: 00007f67a80e948a [ 169.209777] RDX: 000055e2547a6260 RSI: 000055e2547a62e0 RDI: 000055e2547aeaf0 [ 169.210342] RBP: 0000000000000000 R08: 000055e2547a6280 R09: 0000000000000020 [ 169.210843] R10: 00000000c0ed0000 R11: 0000000000000202 R12: 000055e2547aeaf0 [ 169.211307] R13: 000055e2547a6260 R14: 0000000000000000 R15: 00000000ffffffff [ 169.211913] [ 169.212304] [ 169.212680] Allocated by task 0: [ 169.212963] (stack is not available) [ 169.213200] [ 169.213472] The buggy address belongs to the object at ffff8880094b5e00 [ 169.213472] which belongs to the cache UDP of size 1152 [ 169.214095] The buggy address is located 1088 bytes inside of [ 169.214095] 1152-byte region [ffff8880094b5e00, ffff8880094b6280) [ 169.214639] [ 169.215004] The buggy address belongs to the physical page: [ 169.215766] page:000000002e324c8c refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x94b4 [ 169.218412] head:000000002e324c8c order:2 compound_mapcount:0 compound_pincount:0 [ 169.219078] flags: 0xfffffc0010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff) [ 169.220272] raw: 000fffffc0010200 0000000000000000 dead000000000122 ffff888002409b40 [ 169.221006] raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 [ 169.222320] page dumped because: kasan: bad access detected [ 169.222922] [ 169.223119] Memory state around the buggy address: [ 169.224056] ffff8880094b6100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 169.224908] ffff8880094b6180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 169.225677] >ffff8880094b6200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 169.226445] ^ [ 169.227055] ffff8880094b6280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 169.227638] ffff8880094b6300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Signed-off-by: Edward Lo Signed-off-by: Konstantin Komarov Cc: "Doebel, Bjoern" Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/record.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index ab03c373cec66..a8d4ed7bca025 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -226,11 +226,6 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return NULL; } - if (off + asize < off) { - /* overflow check */ - return NULL; - } - attr = Add2Ptr(attr, asize); off += asize; } @@ -253,8 +248,8 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if ((t32 & 0xf) || (t32 > 0x100)) return NULL; - /* Check boundary. */ - if (off + asize > used) + /* Check overflow and boundary. */ + if (off + asize < off || off + asize > used) return NULL; /* Check size of attribute. */ -- GitLab From a3eb3a74aa8c94e6c8130b55f3b031f29162868c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Mar 2024 13:26:39 +0100 Subject: [PATCH 0470/2290] Linux 6.1.80 Link: https://lore.kernel.org/r/20240227131610.391465389@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Pavel Machek (CIP) Tested-by: Allen Pais Tested-by: Florian Fainelli Tested-by: Kelsey Steele Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Tested-by: Yann Sionneau Tested-by: Conor Dooley Tested-by: Shuah Khan Tested-by: Salvatore Bonaccorso Tested-by: Ron Economos Tested-by: kernelci.org bot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d6bc9f597e8b8..bc4adb561a7cf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 79 +SUBLEVEL = 80 EXTRAVERSION = NAME = Curry Ramen -- GitLab From b7be6c737a179a76901c872f6b4c1d00552d9a1b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:22:18 +0200 Subject: [PATCH 0471/2290] netfilter: nf_tables: disallow timeout for anonymous sets commit e26d3009efda338f19016df4175f354a9bd0a4ab upstream. Never used from userspace, disallow these parameters. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e21ec3ad80939..d3ba947f43761 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4752,6 +4752,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4760,6 +4763,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } -- GitLab From ae5f10ed9539878f1128f3fa129f104ba97ffc86 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 30 May 2023 09:38:09 +0200 Subject: [PATCH 0472/2290] drm/meson: fix unbind path if HDMI fails to bind [ Upstream commit 6a044642988b5f8285f3173b8e88784bef2bc306 ] If the case the HDMI controller fails to bind, we try to unbind all components before calling drm_dev_put() which makes drm_bridge_detach() crash because unbinding the HDMI controller frees the bridge memory. The solution is the unbind all components at the end like in the remove path. Reviewed-by: Nicolas Belin Tested-by: Nicolas Belin Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230512-amlogic-v6-4-upstream-dsi-ccf-vim3-v5-8-56eb7a4d5b8e@linaro.org Stable-dep-of: bd915ae73a2d ("drm/meson: Don't remove bridges which are created by other drivers") Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_drv.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 119544d88b586..fbac39aa38cc4 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -316,32 +316,34 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) goto exit_afbcd; if (has_components) { - ret = component_bind_all(drm->dev, drm); + ret = component_bind_all(dev, drm); if (ret) { dev_err(drm->dev, "Couldn't bind all components\n"); + /* Do not try to unbind */ + has_components = false; goto exit_afbcd; } } ret = meson_encoder_hdmi_init(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_plane_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_overlay_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_crtc_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) - goto unbind_all; + goto exit_afbcd; drm_mode_config_reset(drm); @@ -359,15 +361,18 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) uninstall_irq: free_irq(priv->vsync_irq, drm); -unbind_all: - if (has_components) - component_unbind_all(drm->dev, drm); exit_afbcd: if (priv->afbcd.ops) priv->afbcd.ops->exit(priv); free_drm: drm_dev_put(drm); + meson_encoder_hdmi_remove(priv); + meson_encoder_cvbs_remove(priv); + + if (has_components) + component_unbind_all(dev, drm); + return ret; } -- GitLab From 7d34b1078665e171f4883b8675e52d17ebfc5c64 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Thu, 15 Feb 2024 23:04:42 +0100 Subject: [PATCH 0473/2290] drm/meson: Don't remove bridges which are created by other drivers [ Upstream commit bd915ae73a2d78559b376ad2caf5e4ef51de2455 ] Stop calling drm_bridge_remove() for bridges allocated/managed by other drivers in the remove paths of meson_encoder_{cvbs,dsi,hdmi}. drm_bridge_remove() unregisters the bridge so it cannot be used anymore. Doing so for bridges we don't own can lead to the video pipeline not being able to come up after -EPROBE_DEFER of the VPU because we're unregistering a bridge that's managed by another driver. The other driver doesn't know that we have unregistered it's bridge and on subsequent .probe() we're not able to find those bridges anymore (since nobody re-creates them). This fixes probe errors on Meson8b boards with the CVBS outputs enabled. Fixes: 09847723c12f ("drm/meson: remove drm bridges at aggregate driver unbind time") Fixes: 42dcf15f901c ("drm/meson: add DSI encoder") Cc: Reported-by: Steve Morvai Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Tested-by: Steve Morvai Link: https://lore.kernel.org/r/20240215220442.1343152-1-martin.blumenstingl@googlemail.com Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240215220442.1343152-1-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_encoder_cvbs.c | 1 - drivers/gpu/drm/meson/meson_encoder_hdmi.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index 3f73b211fa8e3..3407450435e20 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_CVBS]) { meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; drm_bridge_remove(&meson_encoder_cvbs->bridge); - drm_bridge_remove(meson_encoder_cvbs->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index b14e6e507c61b..03062e7a02b64 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -472,6 +472,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_HDMI]) { meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; drm_bridge_remove(&meson_encoder_hdmi->bridge); - drm_bridge_remove(meson_encoder_hdmi->next_bridge); } } -- GitLab From cf33e6ca12d814e1be2263cb76960d0019d7fb94 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 29 Dec 2022 13:01:40 -0600 Subject: [PATCH 0474/2290] scsi: core: Add struct for args to execution functions [ Upstream commit d0949565811f0896c1c7e781ab2ad99d34273fdf ] Move the SCSI execution functions to use a struct for passing in optional args. This commit adds the new struct, temporarily converts scsi_execute() and scsi_execute_req() ands a new helper, scsi_execute_cmd(), which takes the scsi_exec_args struct. There should be no change in behavior. We no longer allow users to pass in any request->rq_flags value, but they were only passing in RQF_PM which we do support by allowing users to pass in the BLK_MQ_REQ flags used by blk_mq_alloc_request(). Subsequent commits will convert scsi_execute() and scsi_execute_req() users to the new helpers then remove scsi_execute() and scsi_execute_req(). Signed-off-by: Mike Christie Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Stable-dep-of: 321da3dc1f3c ("scsi: sd: usb_storage: uas: Access media prior to querying device properties") Signed-off-by: Sasha Levin --- drivers/scsi/scsi_lib.c | 52 ++++++++++++++++++-------------------- include/scsi/scsi_device.h | 51 +++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 41 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 5c5954b78585e..edd296f950a33 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -185,39 +185,37 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } - /** - * __scsi_execute - insert request and wait for the result - * @sdev: scsi device + * scsi_execute_cmd - insert request and wait for the result + * @sdev: scsi_device * @cmd: scsi command - * @data_direction: data direction + * @opf: block layer request cmd_flags * @buffer: data buffer * @bufflen: len of buffer - * @sense: optional sense buffer - * @sshdr: optional decoded sense header * @timeout: request timeout in HZ * @retries: number of times to retry request - * @flags: flags for ->cmd_flags - * @rq_flags: flags for ->rq_flags - * @resid: optional residual length + * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ -int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid) +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args) { + static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; - req = scsi_alloc_request(sdev->request_queue, - data_direction == DMA_TO_DEVICE ? - REQ_OP_DRV_OUT : REQ_OP_DRV_IN, - rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); + if (!args) + args = &default_args; + else if (WARN_ON_ONCE(args->sense && + args->sense_len != SCSI_SENSE_BUFFERSIZE)) + return -EINVAL; + + req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); @@ -232,8 +230,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, memcpy(scmd->cmnd, cmd, scmd->cmd_len); scmd->allowed = retries; req->timeout = timeout; - req->cmd_flags |= flags; - req->rq_flags |= rq_flags | RQF_QUIET; + req->rq_flags |= RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work @@ -249,20 +246,21 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); - if (resid) - *resid = scmd->resid_len; - if (sense && scmd->sense_len) - memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); - if (sshdr) + if (args->resid) + *args->resid = scmd->resid_len; + if (args->sense) + memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); + if (args->sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, - sshdr); + args->sshdr); + ret = scmd->result; out: blk_mq_free_request(req); return ret; } -EXPORT_SYMBOL(__scsi_execute); +EXPORT_SYMBOL(scsi_execute_cmd); /* * Wake up the error handler if necessary. Avoid as follows that the error diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d2751ed536df2..b407807cc6695 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -479,28 +479,51 @@ extern const char *scsi_device_state_name(enum scsi_device_state); extern int scsi_is_sdev_device(const struct device *); extern int scsi_is_target_device(const struct device *); extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); -extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid); + +/* Optional arguments to scsi_execute_cmd */ +struct scsi_exec_args { + unsigned char *sense; /* sense buffer */ + unsigned int sense_len; /* sense buffer len */ + struct scsi_sense_hdr *sshdr; /* decoded sense header */ + blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */ + int *resid; /* residual length */ +}; + +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args); + /* Make sure any sense buffer is the correct size. */ -#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ - sshdr, timeout, retries, flags, rq_flags, resid) \ +#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \ + _sshdr, _timeout, _retries, _flags, _rq_flags, \ + _resid) \ ({ \ - BUILD_BUG_ON((sense) != NULL && \ - sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \ - __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \ - sense, sshdr, timeout, retries, flags, rq_flags, \ - resid); \ + scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \ + REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \ + _buffer, _bufflen, _timeout, _retries, \ + &(struct scsi_exec_args) { \ + .sense = _sense, \ + .sshdr = _sshdr, \ + .req_flags = _rq_flags & RQF_PM ? \ + BLK_MQ_REQ_PM : 0, \ + .resid = _resid, \ + }); \ }) + static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, int *resid) { - return scsi_execute(sdev, cmd, data_direction, buffer, - bufflen, NULL, sshdr, timeout, retries, 0, 0, resid); + return scsi_execute_cmd(sdev, cmd, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, + bufflen, timeout, retries, + &(struct scsi_exec_args) { + .sshdr = sshdr, + .resid = resid, + }); } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); -- GitLab From b73dd5f9997279715cd450ee8ca599aaff2eabb9 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 13 Feb 2024 09:33:06 -0500 Subject: [PATCH 0475/2290] scsi: sd: usb_storage: uas: Access media prior to querying device properties [ Upstream commit 321da3dc1f3c92a12e3c5da934090d2992a8814c ] It has been observed that some USB/UAS devices return generic properties hardcoded in firmware for mode pages for a period of time after a device has been discovered. The reported properties are either garbage or they do not accurately reflect the characteristics of the physical storage device attached in the case of a bridge. Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") we would call revalidate several times during device discovery. As a result, incorrect values would eventually get replaced with ones accurately describing the attached storage. When we did away with the redundant revalidate pass, several cases were reported where devices reported nonsensical values or would end up in write-protected state. An initial attempt at addressing this issue involved introducing a delayed second revalidate invocation. However, this approach still left some devices reporting incorrect characteristics. Tasos Sahanidis debugged the problem further and identified that introducing a READ operation prior to MODE SENSE fixed the problem and that it wasn't a timing issue. Issuing a READ appears to cause the devices to update their state to reflect the actual properties of the storage media. Device properties like vendor, model, and storage capacity appear to be correctly reported from the get-go. It is unclear why these devices defer populating the remaining characteristics. Match the behavior of a well known commercial operating system and trigger a READ operation prior to querying device characteristics to force the device to populate the mode pages. The additional READ is triggered by a flag set in the USB storage and UAS drivers. We avoid issuing the READ for other transport classes since some storage devices identify Linux through our particular discovery command sequence. Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable@vger.kernel.org Reported-by: Tasos Sahanidis Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Tested-by: Tasos Sahanidis Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sd.c | 26 +++++++++++++++++++++++++- drivers/usb/storage/scsiglue.c | 7 +++++++ drivers/usb/storage/uas.c | 7 +++++++ include/scsi/scsi_device.h | 1 + 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 31b5273f43a71..4433b02c8935f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3284,6 +3284,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, + SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3323,7 +3341,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805da536..12cf9940e5b67 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index de3836412bf32..ed22053b3252f 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b407807cc6695..a64713fe52640 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -204,6 +204,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ -- GitLab From 2a3d40b4025fcfe51b04924979f1653993b17669 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 9 Feb 2024 14:04:53 -0800 Subject: [PATCH 0476/2290] af_unix: Fix task hung while purging oob_skb in GC. [ Upstream commit 25236c91b5ab4a26a56ba2e79b8060cf4e047839 ] syzbot reported a task hung; at the same time, GC was looping infinitely in list_for_each_entry_safe() for OOB skb. [0] syzbot demonstrated that the list_for_each_entry_safe() was not actually safe in this case. A single skb could have references for multiple sockets. If we free such a skb in the list_for_each_entry_safe(), the current and next sockets could be unlinked in a single iteration. unix_notinflight() uses list_del_init() to unlink the socket, so the prefetched next socket forms a loop itself and list_for_each_entry_safe() never stops. Here, we must use while() and make sure we always fetch the first socket. [0]: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 5065 Comm: syz-executor236 Not tainted 6.8.0-rc3-syzkaller-00136-g1f719a2f3fa6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 RIP: 0010:preempt_count arch/x86/include/asm/preempt.h:26 [inline] RIP: 0010:check_kcov_mode kernel/kcov.c:173 [inline] RIP: 0010:__sanitizer_cov_trace_pc+0xd/0x60 kernel/kcov.c:207 Code: cc cc cc cc 66 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 65 48 8b 14 25 40 c2 03 00 <65> 8b 05 b4 7c 78 7e a9 00 01 ff 00 48 8b 34 24 74 0f f6 c4 01 74 RSP: 0018:ffffc900033efa58 EFLAGS: 00000283 RAX: ffff88807b077800 RBX: ffff88807b077800 RCX: 1ffffffff27b1189 RDX: ffff88802a5a3b80 RSI: ffffffff8968488d RDI: ffff88807b077f70 RBP: ffffc900033efbb0 R08: 0000000000000001 R09: fffffbfff27a900c R10: ffffffff93d48067 R11: ffffffff8ae000eb R12: ffff88807b077800 R13: dffffc0000000000 R14: ffff88807b077e40 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000564f4fc1e3a8 CR3: 000000000d57a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unix_gc+0x563/0x13b0 net/unix/garbage.c:319 unix_release_sock+0xa93/0xf80 net/unix/af_unix.c:683 unix_release+0x91/0xf0 net/unix/af_unix.c:1064 __sock_release+0xb0/0x270 net/socket.c:659 sock_close+0x1c/0x30 net/socket.c:1421 __fput+0x270/0xb80 fs/file_table.c:376 task_work_run+0x14f/0x250 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa8a/0x2ad0 kernel/exit.c:871 do_group_exit+0xd4/0x2a0 kernel/exit.c:1020 __do_sys_exit_group kernel/exit.c:1031 [inline] __se_sys_exit_group kernel/exit.c:1029 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1029 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd5/0x270 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f9d6cbdac09 Code: Unable to access opcode bytes at 0x7f9d6cbdabdf. RSP: 002b:00007fff5952feb8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9d6cbdac09 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000 RBP: 00007f9d6cc552b0 R08: ffffffffffffffb8 R09: 0000000000000006 R10: 0000000000000006 R11: 0000000000000246 R12: 00007f9d6cc552b0 R13: 0000000000000000 R14: 00007f9d6cc55d00 R15: 00007f9d6cbabe70 Reported-by: syzbot+4fa4a2d1f5a5ee06f006@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4fa4a2d1f5a5ee06f006 Fixes: 1279f9d9dec2 ("af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC.") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240209220453.96053-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/unix/garbage.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 767b338a7a2d4..9e1bab97c05ba 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -315,10 +315,11 @@ void unix_gc(void) __skb_queue_purge(&hitlist); #if IS_ENABLED(CONFIG_AF_UNIX_OOB) - list_for_each_entry_safe(u, next, &gc_candidates, link) { - struct sk_buff *skb = u->oob_skb; + while (!list_empty(&gc_candidates)) { + u = list_entry(gc_candidates.next, struct unix_sock, link); + if (u->oob_skb) { + struct sk_buff *skb = u->oob_skb; - if (skb) { u->oob_skb = NULL; kfree_skb(skb); } -- GitLab From a76072bc73c77cbdc6c77e5893376939894e6f73 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Sep 2023 11:35:39 +0200 Subject: [PATCH 0477/2290] of: overlay: Reorder struct fragment fields kerneldoc [ Upstream commit 5d007ffdf6025fe83e497c44ed7c8aa8f150c4d1 ] The fields of the fragment structure were reordered, but the kerneldoc was not updated. Fixes: 81225ea682f45629 ("of: overlay: reorder fields in struct fragment") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/cfa36d2bb95e3c399c415dbf58057302c70ef375.1695893695.git.geert+renesas@glider.be Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/overlay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 4402871b5c0c0..e663d5585a057 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -45,8 +45,8 @@ struct target { /** * struct fragment - info about fragment nodes in overlay expanded device tree - * @target: target of the overlay operation * @overlay: pointer to the __overlay__ node + * @target: target of the overlay operation */ struct fragment { struct device_node *overlay; -- GitLab From 00459ae532d6f1e7c720b5a331f40f72cf158dca Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 16:10:51 -0600 Subject: [PATCH 0478/2290] net: restore alpha order to Ethernet devices in config [ Upstream commit a1331535aeb41b08fe0c2c78af51885edc93615b ] The filename "wangxun" sorts between "intel" and "xscale", but xscale/Kconfig contains "Intel XScale" prompts, so Wangxun ends up in the wrong place in the config front-ends. Move wangxun/Kconfig so the Wangxun devices appear in order in the user interface. Fixes: 3ce7547e5b71 ("net: txgbe: Add build support for txgbe") Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20230307221051.890135-1-helgaas@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 1917da7841919..5a274b99f2992 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig" source "drivers/net/ethernet/i825xx/Kconfig" source "drivers/net/ethernet/ibm/Kconfig" source "drivers/net/ethernet/intel/Kconfig" -source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/xscale/Kconfig" config JME @@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig" source "drivers/net/ethernet/tundra/Kconfig" source "drivers/net/ethernet/vertexcom/Kconfig" source "drivers/net/ethernet/via/Kconfig" +source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/wiznet/Kconfig" source "drivers/net/ethernet/xilinx/Kconfig" source "drivers/net/ethernet/xircom/Kconfig" -- GitLab From 174ac6b53a20cc7f466eead68ccee55ab633e5a1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 6 Feb 2023 16:39:20 +0100 Subject: [PATCH 0479/2290] mlxsw: spectrum_acl_tcam: Make fini symmetric to init [ Upstream commit 61fe3b9102ac84ba479ab84d8f5454af2e21e468 ] Move mutex_destroy() to the end to make the function symmetric with mlxsw_sp_acl_tcam_init(). No functional changes. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: Petr Machata Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index dc2e204bcd727..2107de4e9d99b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -86,10 +86,10 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - mutex_destroy(&tcam->lock); ops->fini(mlxsw_sp, tcam->priv); bitmap_free(tcam->used_groups); bitmap_free(tcam->used_regions); + mutex_destroy(&tcam->lock); } int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, -- GitLab From 5dbedec7e5cf668caa0d76e02915eef16d22e97f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 6 Feb 2023 16:39:19 +0100 Subject: [PATCH 0480/2290] mlxsw: spectrum_acl_tcam: Add missing mutex_destroy() [ Upstream commit 65823e07b1e4055b6278725fd92f4d7e6f8d53fd ] Pair mutex_init() with a mutex_destroy() in the error path. Found during code review. No functional changes. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: Petr Machata Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 2107de4e9d99b..41eac7dfb67e7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -52,8 +52,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, max_regions = max_tcam_regions; tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); - if (!tcam->used_regions) - return -ENOMEM; + if (!tcam->used_regions) { + err = -ENOMEM; + goto err_alloc_used_regions; + } tcam->max_regions = max_regions; max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); @@ -78,6 +80,8 @@ err_tcam_init: bitmap_free(tcam->used_groups); err_alloc_used_groups: bitmap_free(tcam->used_regions); +err_alloc_used_regions: + mutex_destroy(&tcam->lock); return err; } -- GitLab From e30f82597bf64ad32f3b9718bb12791bf3926f3d Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 15 May 2023 11:10:49 -0400 Subject: [PATCH 0481/2290] PCI: layerscape: Add the endpoint linkup notifier support [ Upstream commit 061cbfab09fb35898f2907d42f936cf9ae271d93 ] Layerscape has PME interrupt, which can be used as linkup notifier. Set CFG_READY bit of PEX_PF0_CONFIG to enable accesses from root complex when linkup detected. Link: https://lore.kernel.org/r/20230515151049.2797105-1-Frank.Li@nxp.com Signed-off-by: Xiaowei Bao Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- .../pci/controller/dwc/pci-layerscape-ep.c | 100 +++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c index ad99707b3b994..5b27554e071a1 100644 --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c @@ -18,6 +18,20 @@ #include "pcie-designware.h" +#define PEX_PF0_CONFIG 0xC0014 +#define PEX_PF0_CFG_READY BIT(0) + +/* PEX PFa PCIE PME and message interrupt registers*/ +#define PEX_PF0_PME_MES_DR 0xC0020 +#define PEX_PF0_PME_MES_DR_LUD BIT(7) +#define PEX_PF0_PME_MES_DR_LDD BIT(9) +#define PEX_PF0_PME_MES_DR_HRD BIT(10) + +#define PEX_PF0_PME_MES_IER 0xC0028 +#define PEX_PF0_PME_MES_IER_LUDIE BIT(7) +#define PEX_PF0_PME_MES_IER_LDDIE BIT(9) +#define PEX_PF0_PME_MES_IER_HRDIE BIT(10) + #define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev) struct ls_pcie_ep_drvdata { @@ -30,8 +44,84 @@ struct ls_pcie_ep { struct dw_pcie *pci; struct pci_epc_features *ls_epc; const struct ls_pcie_ep_drvdata *drvdata; + int irq; + bool big_endian; }; +static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset) +{ + struct dw_pcie *pci = pcie->pci; + + if (pcie->big_endian) + return ioread32be(pci->dbi_base + offset); + else + return ioread32(pci->dbi_base + offset); +} + +static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value) +{ + struct dw_pcie *pci = pcie->pci; + + if (pcie->big_endian) + iowrite32be(value, pci->dbi_base + offset); + else + iowrite32(value, pci->dbi_base + offset); +} + +static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) +{ + struct ls_pcie_ep *pcie = dev_id; + struct dw_pcie *pci = pcie->pci; + u32 val, cfg; + + val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR); + ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val); + + if (!val) + return IRQ_NONE; + + if (val & PEX_PF0_PME_MES_DR_LUD) { + cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG); + cfg |= PEX_PF0_CFG_READY; + ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg); + dw_pcie_ep_linkup(&pci->ep); + + dev_dbg(pci->dev, "Link up\n"); + } else if (val & PEX_PF0_PME_MES_DR_LDD) { + dev_dbg(pci->dev, "Link down\n"); + } else if (val & PEX_PF0_PME_MES_DR_HRD) { + dev_dbg(pci->dev, "Hot reset\n"); + } + + return IRQ_HANDLED; +} + +static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie, + struct platform_device *pdev) +{ + u32 val; + int ret; + + pcie->irq = platform_get_irq_byname(pdev, "pme"); + if (pcie->irq < 0) + return pcie->irq; + + ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler, + IRQF_SHARED, pdev->name, pcie); + if (ret) { + dev_err(&pdev->dev, "Can't register PCIe IRQ\n"); + return ret; + } + + /* Enable interrupts */ + val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER); + val |= PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE | + PEX_PF0_PME_MES_IER_LUDIE; + ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val); + + return 0; +} + static const struct pci_epc_features* ls_pcie_ep_get_features(struct dw_pcie_ep *ep) { @@ -124,6 +214,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) struct ls_pcie_ep *pcie; struct pci_epc_features *ls_epc; struct resource *dbi_base; + int ret; pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -143,6 +234,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) pci->ops = pcie->drvdata->dw_pcie_ops; ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4); + ls_epc->linkup_notifier = true; pcie->pci = pci; pcie->ls_epc = ls_epc; @@ -154,9 +246,15 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) pci->ep.ops = &ls_pcie_ep_ops; + pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian"); + platform_set_drvdata(pdev, pcie); - return dw_pcie_ep_init(&pci->ep); + ret = dw_pcie_ep_init(&pci->ep); + if (ret) + return ret; + + return ls_pcie_ep_interrupt_init(pcie, pdev); } static struct platform_driver ls_pcie_ep_driver = { -- GitLab From 507eeaad4d32174640440f225a30112d8cccd374 Mon Sep 17 00:00:00 2001 From: Xiaowei Bao Date: Thu, 20 Jul 2023 09:58:34 -0400 Subject: [PATCH 0482/2290] PCI: layerscape: Add workaround for lost link capabilities during reset [ Upstream commit 17cf8661ee0f065c08152e611a568dd1fb0285f1 ] The endpoint controller loses the Maximum Link Width and Supported Link Speed value from the Link Capabilities Register - initially configured by the Reset Configuration Word (RCW) - during a link-down or hot reset event. Address this issue in the endpoint event handler. Link: https://lore.kernel.org/r/20230720135834.1977616-2-Frank.Li@nxp.com Fixes: a805770d8a22 ("PCI: layerscape: Add EP mode support") Signed-off-by: Xiaowei Bao Signed-off-by: Hou Zhiqiang Signed-off-by: Frank Li Signed-off-by: Lorenzo Pieralisi Acked-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- .../pci/controller/dwc/pci-layerscape-ep.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c index 5b27554e071a1..dd7d74fecc48e 100644 --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c @@ -45,6 +45,7 @@ struct ls_pcie_ep { struct pci_epc_features *ls_epc; const struct ls_pcie_ep_drvdata *drvdata; int irq; + u32 lnkcap; bool big_endian; }; @@ -73,6 +74,7 @@ static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) struct ls_pcie_ep *pcie = dev_id; struct dw_pcie *pci = pcie->pci; u32 val, cfg; + u8 offset; val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR); ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val); @@ -81,6 +83,19 @@ static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) return IRQ_NONE; if (val & PEX_PF0_PME_MES_DR_LUD) { + + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + + /* + * The values of the Maximum Link Width and Supported Link + * Speed from the Link Capabilities Register will be lost + * during link down or hot reset. Restore initial value + * that configured by the Reset Configuration Word (RCW). + */ + dw_pcie_dbi_ro_wr_en(pci); + dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap); + dw_pcie_dbi_ro_wr_dis(pci); + cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG); cfg |= PEX_PF0_CFG_READY; ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg); @@ -214,6 +229,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) struct ls_pcie_ep *pcie; struct pci_epc_features *ls_epc; struct resource *dbi_base; + u8 offset; int ret; pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); @@ -250,6 +266,9 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pcie); + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP); + ret = dw_pcie_ep_init(&pci->ep); if (ret) return ret; -- GitLab From 0cea0c330a11461d0fbad5347a5d68d499db56fd Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 14 Apr 2023 11:19:46 +0200 Subject: [PATCH 0483/2290] ARM: dts: imx: Adjust dma-apbh node name [ Upstream commit e9f5cd85f1f931bb7b64031492f7051187ccaac7 ] Currently the dtbs_check generates warnings like this: $nodename:0: 'dma-apbh@110000' does not match '^dma-controller(@.*)?$' So fix all affected dma-apbh node names. Signed-off-by: Stefan Wahren Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx23.dtsi | 2 +- arch/arm/boot/dts/imx28.dtsi | 2 +- arch/arm/boot/dts/imx6qdl.dtsi | 2 +- arch/arm/boot/dts/imx6sx.dtsi | 2 +- arch/arm/boot/dts/imx6ul.dtsi | 2 +- arch/arm/boot/dts/imx7s.dtsi | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index ec476b1596496..b236d23f80715 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -59,7 +59,7 @@ reg = <0x80000000 0x2000>; }; - dma_apbh: dma-apbh@80004000 { + dma_apbh: dma-controller@80004000 { compatible = "fsl,imx23-dma-apbh"; reg = <0x80004000 0x2000>; interrupts = <0 14 20 0 diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index b15df16ecb01a..b81592a613112 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -78,7 +78,7 @@ status = "disabled"; }; - dma_apbh: dma-apbh@80004000 { + dma_apbh: dma-controller@80004000 { compatible = "fsl,imx28-dma-apbh"; reg = <0x80004000 0x2000>; interrupts = <82 83 84 85 diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index ff1e0173b39be..2c6eada01d792 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -150,7 +150,7 @@ interrupt-parent = <&gpc>; ranges; - dma_apbh: dma-apbh@110000 { + dma_apbh: dma-controller@110000 { compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x00110000 0x2000>; interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 1f1053a898fbf..67d344ae76b51 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -209,7 +209,7 @@ power-domains = <&pd_pu>; }; - dma_apbh: dma-apbh@1804000 { + dma_apbh: dma-controller@1804000 { compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x01804000 0x2000>; interrupts = , diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index 2b5996395701a..aac081b6daaac 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -164,7 +164,7 @@ <0x00a06000 0x2000>; }; - dma_apbh: dma-apbh@1804000 { + dma_apbh: dma-controller@1804000 { compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x01804000 0x2000>; interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 4b23630fc738d..2940dacaa56fc 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1267,7 +1267,7 @@ }; }; - dma_apbh: dma-apbh@33000000 { + dma_apbh: dma-controller@33000000 { compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x33000000 0x2000>; interrupts = , -- GitLab From 49e734926a4b07308d98dc9d3c8f05eb77f1da00 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 17 Dec 2022 02:08:53 +0100 Subject: [PATCH 0484/2290] ARM: dts: imx7s: Drop dma-apb interrupt-names [ Upstream commit 9928f0a9e7c0cee3360ca1442b4001d34ad67556 ] Drop "interrupt-names" property, since it is broken. The drivers/dma/mxs-dma.c in Linux kernel does not use it, the property contains duplicate array entries in existing DTs, and even malformed entries (gmpi, should have been gpmi). Get rid of that optional property altogether. Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx7s.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 2940dacaa56fc..69aebc691526f 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1274,7 +1274,6 @@ , , ; - interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3"; #dma-cells = <1>; dma-channels = <4>; clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; -- GitLab From ed9fdc82cafbcf8a46b55d315219bf9464621bca Mon Sep 17 00:00:00 2001 From: Elson Roy Serrao Date: Fri, 24 Mar 2023 14:47:57 -0700 Subject: [PATCH 0485/2290] usb: gadget: Properly configure the device for remote wakeup [ Upstream commit b93c2a68f3d9dc98ec30dcb342ae47c1c8d09d18 ] The wakeup bit in the bmAttributes field indicates whether the device is configured for remote wakeup. But this field should be allowed to set only if the UDC supports such wakeup mechanism. So configure this field based on UDC capability. Also inform the UDC whether the device is configured for remote wakeup by implementing a gadget op. Reviewed-by: Thinh Nguyen Signed-off-by: Elson Roy Serrao Link: https://lore.kernel.org/r/1679694482-16430-2-git-send-email-quic_eserrao@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/composite.c | 18 ++++++++++++++++++ drivers/usb/gadget/configfs.c | 3 +++ drivers/usb/gadget/udc/core.c | 27 +++++++++++++++++++++++++++ drivers/usb/gadget/udc/trace.h | 5 +++++ include/linux/usb/composite.h | 2 ++ include/linux/usb/gadget.h | 8 ++++++++ 6 files changed, 63 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index cb0a4e2cdbb73..247cca46cdfae 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -511,6 +511,19 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, return min(val, 900U) / 8; } +void check_remote_wakeup_config(struct usb_gadget *g, + struct usb_configuration *c) +{ + if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) { + /* Reset the rw bit if gadget is not capable of it */ + if (!g->wakeup_capable && g->ops->set_remote_wakeup) { + WARN(c->cdev, "Clearing wakeup bit for config c.%d\n", + c->bConfigurationValue); + c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP; + } + } +} + static int config_buf(struct usb_configuration *config, enum usb_device_speed speed, void *buf, u8 type) { @@ -959,6 +972,11 @@ static int set_config(struct usb_composite_dev *cdev, power = min(power, 500U); else power = min(power, 900U); + + if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) + usb_gadget_set_remote_wakeup(gadget, 1); + else + usb_gadget_set_remote_wakeup(gadget, 0); done: if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) usb_gadget_set_selfpowered(gadget); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 4dcf29577f8f1..b94aec6227c51 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1376,6 +1376,9 @@ static int configfs_composite_bind(struct usb_gadget *gadget, if (gadget_is_otg(gadget)) c->descriptors = otg_desc; + /* Properly configure the bmAttributes wakeup bit */ + check_remote_wakeup_config(gadget, c); + cfg = container_of(c, struct config_usb_cfg, c); if (!list_empty(&cfg->string_list)) { i = 0; diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index c40f2ecbe1b8c..0edd9e53fc5a1 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -525,6 +525,33 @@ out: } EXPORT_SYMBOL_GPL(usb_gadget_wakeup); +/** + * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature. + * @gadget:the device being configured for remote wakeup + * @set:value to be configured. + * + * set to one to enable remote wakeup feature and zero to disable it. + * + * returns zero on success, else negative errno. + */ +int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) +{ + int ret = 0; + + if (!gadget->ops->set_remote_wakeup) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = gadget->ops->set_remote_wakeup(gadget, set); + +out: + trace_usb_gadget_set_remote_wakeup(gadget, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup); + /** * usb_gadget_set_selfpowered - sets the device selfpowered feature. * @gadget:the device being declared as self-powered diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h index abdbcb1bacb0b..a5ed26fbc2dad 100644 --- a/drivers/usb/gadget/udc/trace.h +++ b/drivers/usb/gadget/udc/trace.h @@ -91,6 +91,11 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup, TP_ARGS(g, ret) ); +DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup, + TP_PROTO(struct usb_gadget *g, int ret), + TP_ARGS(g, ret) +); + DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 43ac3fa760dbe..9783b9107d76b 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -412,6 +412,8 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite, extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev, struct usb_ep *ep0); void composite_dev_cleanup(struct usb_composite_dev *cdev); +void check_remote_wakeup_config(struct usb_gadget *g, + struct usb_configuration *c); static inline struct usb_composite_driver *to_cdriver( struct usb_gadget_driver *gdrv) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index dc3092cea99e9..5bec668b41dcd 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -309,6 +309,7 @@ struct usb_udc; struct usb_gadget_ops { int (*get_frame)(struct usb_gadget *); int (*wakeup)(struct usb_gadget *); + int (*set_remote_wakeup)(struct usb_gadget *, int set); int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered); int (*vbus_session) (struct usb_gadget *, int is_active); int (*vbus_draw) (struct usb_gadget *, unsigned mA); @@ -383,6 +384,8 @@ struct usb_gadget_ops { * @connected: True if gadget is connected. * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag * indicates that it supports LPM as per the LPM ECN & errata. + * @wakeup_capable: True if gadget is capable of sending remote wakeup. + * @wakeup_armed: True if gadget is armed by the host for remote wakeup. * @irq: the interrupt number for device controller. * @id_number: a unique ID number for ensuring that gadget names are distinct * @@ -444,6 +447,8 @@ struct usb_gadget { unsigned deactivated:1; unsigned connected:1; unsigned lpm_capable:1; + unsigned wakeup_capable:1; + unsigned wakeup_armed:1; int irq; int id_number; }; @@ -600,6 +605,7 @@ static inline int gadget_is_otg(struct usb_gadget *g) #if IS_ENABLED(CONFIG_USB_GADGET) int usb_gadget_frame_number(struct usb_gadget *gadget); int usb_gadget_wakeup(struct usb_gadget *gadget); +int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set); int usb_gadget_set_selfpowered(struct usb_gadget *gadget); int usb_gadget_clear_selfpowered(struct usb_gadget *gadget); int usb_gadget_vbus_connect(struct usb_gadget *gadget); @@ -615,6 +621,8 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_wakeup(struct usb_gadget *gadget) { return 0; } +static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) +{ return 0; } static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget) -- GitLab From f8faa536370ec9db460bac96460e16801f62325e Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Thu, 13 Apr 2023 23:57:42 -0700 Subject: [PATCH 0486/2290] Input: xpad - add constants for GIP interface numbers [ Upstream commit f9b2e603c6216824e34dc9a67205d98ccc9a41ca ] Wired GIP devices present multiple interfaces with the same USB identification other than the interface number. This adds constants for differentiating two of them and uses them where appropriate Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20230411031650.960322-2-vi@endrift.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 02f3bc4e4895e..13c36f51b9353 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -564,6 +564,9 @@ struct xboxone_init_packet { #define GIP_MOTOR_LT BIT(3) #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT) +#define GIP_WIRED_INTF_DATA 0 +#define GIP_WIRED_INTF_AUDIO 1 + /* * This packet is required for all Xbox One pads with 2015 * or later firmware installed (or present from the factory). @@ -2008,7 +2011,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id } if (xpad->xtype == XTYPE_XBOXONE && - intf->cur_altsetting->desc.bInterfaceNumber != 0) { + intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) { /* * The Xbox One controller lists three interfaces all with the * same interface class, subclass and protocol. Differentiate by -- GitLab From 8745f3592ee4a7b49ede16ddd3f12a41ecaa23c9 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 31 Mar 2023 11:31:23 +0800 Subject: [PATCH 0487/2290] iommu/sprd: Release dma buffer to avoid memory leak [ Upstream commit 9afea57384d4ae7b2034593eac7fa76c7122762a ] When attaching to a domain, the driver would alloc a DMA buffer which is used to store address mapping table, and it need to be released when the IOMMU domain is freed. Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20230331033124.864691-2-zhang.lyra@gmail.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/sprd-iommu.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 8261066de07d7..e4358393fe378 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -152,13 +152,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) return &dom->domain; } -static void sprd_iommu_domain_free(struct iommu_domain *domain) -{ - struct sprd_iommu_domain *dom = to_sprd_domain(domain); - - kfree(dom); -} - static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) { struct sprd_iommu_device *sdev = dom->sdev; @@ -231,6 +224,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en) sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); } +static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) +{ + size_t pgt_size; + + /* Nothing need to do if the domain hasn't been attached */ + if (!dom->sdev) + return; + + pgt_size = sprd_iommu_pgt_size(&dom->domain); + dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); + dom->sdev = NULL; + sprd_iommu_hw_en(dom->sdev, false); +} + +static void sprd_iommu_domain_free(struct iommu_domain *domain) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + + sprd_iommu_cleanup(dom); + kfree(dom); +} + static int sprd_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { -- GitLab From e89c84422f35ce9fcb0fe9e3f3f60506586a7bae Mon Sep 17 00:00:00 2001 From: Tomas Krcka Date: Wed, 29 Mar 2023 12:34:19 +0000 Subject: [PATCH 0488/2290] iommu/arm-smmu-v3: Acknowledge pri/event queue overflow if any [ Upstream commit 67ea0b7ce41844eae7c10bb04dfe66a23318c224 ] When an overflow occurs in the PRI queue, the SMMU toggles the overflow flag in the PROD register. To exit the overflow condition, the PRI thread is supposed to acknowledge it by toggling this flag in the CONS register. Unacknowledged overflow causes the queue to stop adding anything new. Currently, the priq thread always writes the CONS register back to the SMMU after clearing the queue. The writeback is not necessary if the OVFLG in the PROD register has not been changed, no overflow has occured. This commit checks the difference of the overflow flag between CONS and PROD register. If it's different, toggles the OVACKFLG flag in the CONS register and write it to the SMMU. The situation is similar for the event queue. The acknowledge register is also toggled after clearing the event queue but never propagated to the hardware. This would only be done the next time when executing evtq thread. Unacknowledged event queue overflow doesn't affect the event queue, because the SMMU still adds elements to that queue when the overflow condition is active. But it feel nicer to keep SMMU in sync when possible, so use the same way here as well. Signed-off-by: Tomas Krcka Link: https://lore.kernel.org/r/20230329123420.34641-1-tomas.krcka@gmail.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8966f7d5aab61..82f100e591b5a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -152,6 +152,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q) q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); } +static void queue_sync_cons_ovf(struct arm_smmu_queue *q) +{ + struct arm_smmu_ll_queue *llq = &q->llq; + + if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) + return; + + llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | + Q_IDX(llq, llq->cons); + queue_sync_cons_out(q); +} + static int queue_sync_prod_in(struct arm_smmu_queue *q) { u32 prod; @@ -1583,8 +1595,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } @@ -1642,9 +1653,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); - queue_sync_cons_out(q); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } -- GitLab From 39c6312009574ca73865354133ca222e7753a71b Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 11 Jan 2023 16:59:43 +0800 Subject: [PATCH 0489/2290] fs/ntfs3: Fix a possible null-pointer dereference in ni_clear() [ Upstream commit ec275bf9693d19cc0fdce8436f4c425ced86f6e7 ] In a previous commit c1006bd13146, ni->mi.mrec in ni_write_inode() could be NULL, and thus a NULL check is added for this variable. However, in the same call stack, ni->mi.mrec can be also dereferenced in ni_clear(): ntfs_evict_inode(inode) ni_write_inode(inode, ...) ni = ntfs_i(inode); is_rec_inuse(ni->mi.mrec) -> Add a NULL check by previous commit ni_clear(ntfs_i(inode)) is_rec_inuse(ni->mi.mrec) -> No check Thus, a possible null-pointer dereference may exist in ni_clear(). To fix it, a NULL check is added in this function. Signed-off-by: Jia-Ju Bai Reported-by: TOTE Robot Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/frecord.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index bb7e33c240737..1f0e230ec9e2c 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -102,7 +102,7 @@ void ni_clear(struct ntfs_inode *ni) { struct rb_node *node; - if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec)) + if (!ni->vfs_inode.i_nlink && ni->mi.mrec && is_rec_inuse(ni->mi.mrec)) ni_delete_all(ni); al_destroy(ni); -- GitLab From 976126f2def45f4075f18372bc4e97bb5da3757a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Feb 2023 09:59:10 +0100 Subject: [PATCH 0490/2290] clk: tegra20: fix gcc-7 constant overflow warning [ Upstream commit b4a2adbf3586efa12fe78b9dec047423e01f3010 ] Older gcc versions get confused by comparing a u32 value to a negative constant in a switch()/case block: drivers/clk/tegra/clk-tegra20.c: In function 'tegra20_clk_measure_input_freq': drivers/clk/tegra/clk-tegra20.c:581:2: error: case label does not reduce to an integer constant case OSC_CTRL_OSC_FREQ_12MHZ: ^~~~ drivers/clk/tegra/clk-tegra20.c:593:2: error: case label does not reduce to an integer constant case OSC_CTRL_OSC_FREQ_26MHZ: Make the constants unsigned instead. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230227085914.2560984-1-arnd@kernel.org Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-tegra20.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 422d782475532..dcacc5064d339 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -21,24 +21,24 @@ #define MISC_CLK_ENB 0x48 #define OSC_CTRL 0x50 -#define OSC_CTRL_OSC_FREQ_MASK (3<<30) -#define OSC_CTRL_OSC_FREQ_13MHZ (0<<30) -#define OSC_CTRL_OSC_FREQ_19_2MHZ (1<<30) -#define OSC_CTRL_OSC_FREQ_12MHZ (2<<30) -#define OSC_CTRL_OSC_FREQ_26MHZ (3<<30) -#define OSC_CTRL_MASK (0x3f2 | OSC_CTRL_OSC_FREQ_MASK) - -#define OSC_CTRL_PLL_REF_DIV_MASK (3<<28) -#define OSC_CTRL_PLL_REF_DIV_1 (0<<28) -#define OSC_CTRL_PLL_REF_DIV_2 (1<<28) -#define OSC_CTRL_PLL_REF_DIV_4 (2<<28) +#define OSC_CTRL_OSC_FREQ_MASK (3u<<30) +#define OSC_CTRL_OSC_FREQ_13MHZ (0u<<30) +#define OSC_CTRL_OSC_FREQ_19_2MHZ (1u<<30) +#define OSC_CTRL_OSC_FREQ_12MHZ (2u<<30) +#define OSC_CTRL_OSC_FREQ_26MHZ (3u<<30) +#define OSC_CTRL_MASK (0x3f2u | OSC_CTRL_OSC_FREQ_MASK) + +#define OSC_CTRL_PLL_REF_DIV_MASK (3u<<28) +#define OSC_CTRL_PLL_REF_DIV_1 (0u<<28) +#define OSC_CTRL_PLL_REF_DIV_2 (1u<<28) +#define OSC_CTRL_PLL_REF_DIV_4 (2u<<28) #define OSC_FREQ_DET 0x58 -#define OSC_FREQ_DET_TRIG (1<<31) +#define OSC_FREQ_DET_TRIG (1u<<31) #define OSC_FREQ_DET_STATUS 0x5c -#define OSC_FREQ_DET_BUSY (1<<31) -#define OSC_FREQ_DET_CNT_MASK 0xFFFF +#define OSC_FREQ_DET_BUSYu (1<<31) +#define OSC_FREQ_DET_CNT_MASK 0xFFFFu #define TEGRA20_CLK_PERIPH_BANKS 3 -- GitLab From 0d04e45c65f0785e558b93d2631d58680f263e10 Mon Sep 17 00:00:00 2001 From: Edward Lo Date: Tue, 4 Oct 2022 23:15:06 +0800 Subject: [PATCH 0491/2290] fs/ntfs3: Add length check in indx_get_root [ Upstream commit 08e8cf5f2d9ec383a2e339a2711b62a54ff3fba0 ] This adds a length check to guarantee the retrieved index root is legit. [ 162.459513] BUG: KASAN: use-after-free in hdr_find_e.isra.0+0x10c/0x320 [ 162.460176] Read of size 2 at addr ffff8880037bca99 by task mount/243 [ 162.460851] [ 162.461252] CPU: 0 PID: 243 Comm: mount Not tainted 6.0.0-rc7 #42 [ 162.461744] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 162.462609] Call Trace: [ 162.462954] [ 162.463276] dump_stack_lvl+0x49/0x63 [ 162.463822] print_report.cold+0xf5/0x689 [ 162.464608] ? unwind_get_return_address+0x3a/0x60 [ 162.465766] ? hdr_find_e.isra.0+0x10c/0x320 [ 162.466975] kasan_report+0xa7/0x130 [ 162.467506] ? _raw_spin_lock_irq+0xc0/0xf0 [ 162.467998] ? hdr_find_e.isra.0+0x10c/0x320 [ 162.468536] __asan_load2+0x68/0x90 [ 162.468923] hdr_find_e.isra.0+0x10c/0x320 [ 162.469282] ? cmp_uints+0xe0/0xe0 [ 162.469557] ? cmp_sdh+0x90/0x90 [ 162.469864] ? ni_find_attr+0x214/0x300 [ 162.470217] ? ni_load_mi+0x80/0x80 [ 162.470479] ? entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 162.470931] ? ntfs_bread_run+0x190/0x190 [ 162.471307] ? indx_get_root+0xe4/0x190 [ 162.471556] ? indx_get_root+0x140/0x190 [ 162.471833] ? indx_init+0x1e0/0x1e0 [ 162.472069] ? fnd_clear+0x115/0x140 [ 162.472363] ? _raw_spin_lock_irqsave+0x100/0x100 [ 162.472731] indx_find+0x184/0x470 [ 162.473461] ? sysvec_apic_timer_interrupt+0x57/0xc0 [ 162.474429] ? indx_find_buffer+0x2d0/0x2d0 [ 162.474704] ? do_syscall_64+0x3b/0x90 [ 162.474962] dir_search_u+0x196/0x2f0 [ 162.475381] ? ntfs_nls_to_utf16+0x450/0x450 [ 162.475661] ? ntfs_security_init+0x3d6/0x440 [ 162.475906] ? is_sd_valid+0x180/0x180 [ 162.476191] ntfs_extend_init+0x13f/0x2c0 [ 162.476496] ? ntfs_fix_post_read+0x130/0x130 [ 162.476861] ? iput.part.0+0x286/0x320 [ 162.477325] ntfs_fill_super+0x11e0/0x1b50 [ 162.477709] ? put_ntfs+0x1d0/0x1d0 [ 162.477970] ? vsprintf+0x20/0x20 [ 162.478258] ? set_blocksize+0x95/0x150 [ 162.478538] get_tree_bdev+0x232/0x370 [ 162.478789] ? put_ntfs+0x1d0/0x1d0 [ 162.479038] ntfs_fs_get_tree+0x15/0x20 [ 162.479374] vfs_get_tree+0x4c/0x130 [ 162.479729] path_mount+0x654/0xfe0 [ 162.480124] ? putname+0x80/0xa0 [ 162.480484] ? finish_automount+0x2e0/0x2e0 [ 162.480894] ? putname+0x80/0xa0 [ 162.481467] ? kmem_cache_free+0x1c4/0x440 [ 162.482280] ? putname+0x80/0xa0 [ 162.482714] do_mount+0xd6/0xf0 [ 162.483264] ? path_mount+0xfe0/0xfe0 [ 162.484782] ? __kasan_check_write+0x14/0x20 [ 162.485593] __x64_sys_mount+0xca/0x110 [ 162.486024] do_syscall_64+0x3b/0x90 [ 162.486543] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 162.487141] RIP: 0033:0x7f9d374e948a [ 162.488324] Code: 48 8b 0d 11 fa 2a 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 008 [ 162.489728] RSP: 002b:00007ffe30e73d18 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 162.490971] RAX: ffffffffffffffda RBX: 0000561cdb43a060 RCX: 00007f9d374e948a [ 162.491669] RDX: 0000561cdb43a260 RSI: 0000561cdb43a2e0 RDI: 0000561cdb442af0 [ 162.492050] RBP: 0000000000000000 R08: 0000561cdb43a280 R09: 0000000000000020 [ 162.492459] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000561cdb442af0 [ 162.493183] R13: 0000561cdb43a260 R14: 0000000000000000 R15: 00000000ffffffff [ 162.493644] [ 162.493908] [ 162.494214] The buggy address belongs to the physical page: [ 162.494761] page:000000003e38a3d5 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x37bc [ 162.496064] flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff) [ 162.497278] raw: 000fffffc0000000 ffffea00000df1c8 ffffea00000df008 0000000000000000 [ 162.498928] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000 [ 162.500542] page dumped because: kasan: bad access detected [ 162.501057] [ 162.501242] Memory state around the buggy address: [ 162.502230] ffff8880037bc980: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 162.502977] ffff8880037bca00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 162.503522] >ffff8880037bca80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 162.503963] ^ [ 162.504370] ffff8880037bcb00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 162.504766] ffff8880037bcb80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Signed-off-by: Edward Lo Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/index.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 7371f7855e4c4..eee01db6e0cc5 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -998,6 +998,7 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le = NULL; struct ATTRIB *a; const struct INDEX_NAMES *in = &s_index_names[indx->type]; + struct INDEX_ROOT *root = NULL; a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL, mi); @@ -1007,7 +1008,15 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, if (attr) *attr = a; - return resident_data_ex(a, sizeof(struct INDEX_ROOT)); + root = resident_data_ex(a, sizeof(struct INDEX_ROOT)); + + /* length check */ + if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) > + le32_to_cpu(a->res.data_size)) { + return NULL; + } + + return root; } static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, -- GitLab From b3152afc0eb864f7c6ecad134a15b577ef7aec77 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sun, 30 Oct 2022 12:32:51 +0530 Subject: [PATCH 0492/2290] fs/ntfs3: Fix NULL dereference in ni_write_inode [ Upstream commit 8dae4f6341e335a09575be60b4fdf697c732a470 ] Syzbot reports a NULL dereference in ni_write_inode. When creating a new inode, if allocation fails in mi_init function (called in mi_format_new function), mi->mrec is set to NULL. In the error path of this inode creation, mi->mrec is later dereferenced in ni_write_inode. Add a NULL check to prevent NULL dereference. Link: https://syzkaller.appspot.com/bug?extid=f45957555ed4a808cc7a Reported-and-tested-by: syzbot+f45957555ed4a808cc7a@syzkaller.appspotmail.com Signed-off-by: Abdun Nihaal Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/frecord.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 1f0e230ec9e2c..d260260900241 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3255,6 +3255,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) return 0; } + if (!ni->mi.mrec) + goto out; + if (is_rec_inuse(ni->mi.mrec) && !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) { bool modified = false; -- GitLab From 329fc4d3f73d865b25f2ee4eafafb040ace37ad5 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 17 Nov 2022 17:19:12 +0800 Subject: [PATCH 0493/2290] fs/ntfs3: Fix NULL pointer dereference in 'ni_write_inode' [ Upstream commit db2a3cc6a3481076da6344cc62a80a4e2525f36f ] Syzbot found the following issue: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000016 Mem abort info: ESR = 0x0000000096000006 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault Data abort info: ISV = 0, ISS = 0x00000006 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000010af56000 [0000000000000016] pgd=08000001090da003, p4d=08000001090da003, pud=08000001090ce003, pmd=0000000000000000 Internal error: Oops: 0000000096000006 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 3036 Comm: syz-executor206 Not tainted 6.0.0-rc6-syzkaller-17739-g16c9f284e746 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : is_rec_inuse fs/ntfs3/ntfs.h:313 [inline] pc : ni_write_inode+0xac/0x798 fs/ntfs3/frecord.c:3232 lr : ni_write_inode+0xa0/0x798 fs/ntfs3/frecord.c:3226 sp : ffff8000126c3800 x29: ffff8000126c3860 x28: 0000000000000000 x27: ffff0000c8b02000 x26: ffff0000c7502320 x25: ffff0000c7502288 x24: 0000000000000000 x23: ffff80000cbec91c x22: ffff0000c8b03000 x21: ffff0000c8b02000 x20: 0000000000000001 x19: ffff0000c75024d8 x18: 00000000000000c0 x17: ffff80000dd1b198 x16: ffff80000db59158 x15: ffff0000c4b6b500 x14: 00000000000000b8 x13: 0000000000000000 x12: ffff0000c4b6b500 x11: ff80800008be1b60 x10: 0000000000000000 x9 : ffff0000c4b6b500 x8 : 0000000000000000 x7 : ffff800008be1b50 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 0000000000000008 x1 : 0000000000000001 x0 : 0000000000000000 Call trace: is_rec_inuse fs/ntfs3/ntfs.h:313 [inline] ni_write_inode+0xac/0x798 fs/ntfs3/frecord.c:3232 ntfs_evict_inode+0x54/0x84 fs/ntfs3/inode.c:1744 evict+0xec/0x334 fs/inode.c:665 iput_final fs/inode.c:1748 [inline] iput+0x2c4/0x324 fs/inode.c:1774 ntfs_new_inode+0x7c/0xe0 fs/ntfs3/fsntfs.c:1660 ntfs_create_inode+0x20c/0xe78 fs/ntfs3/inode.c:1278 ntfs_create+0x54/0x74 fs/ntfs3/namei.c:100 lookup_open fs/namei.c:3413 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x804/0x11c4 fs/namei.c:3688 do_filp_open+0xdc/0x1b8 fs/namei.c:3718 do_sys_openat2+0xb8/0x22c fs/open.c:1311 do_sys_open fs/open.c:1327 [inline] __do_sys_openat fs/open.c:1343 [inline] __se_sys_openat fs/open.c:1338 [inline] __arm64_sys_openat+0xb0/0xe0 fs/open.c:1338 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 Code: 97dafee4 340001b4 f9401328 2a1f03e0 (79402d14) ---[ end trace 0000000000000000 ]--- Above issue may happens as follows: ntfs_new_inode mi_init mi->mrec = kmalloc(sbi->record_size, GFP_NOFS); -->failed to allocate memory if (!mi->mrec) return -ENOMEM; iput iput_final evict ntfs_evict_inode ni_write_inode is_rec_inuse(ni->mi.mrec)-> As 'ni->mi.mrec' is NULL trigger NULL-ptr-deref To solve above issue if new inode failed make inode bad before call 'iput()' in 'ntfs_new_inode()'. Reported-by: syzbot+f45957555ed4a808cc7a@syzkaller.appspotmail.com Signed-off-by: Ye Bin Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fsntfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 1eac80d55b554..4c2d079b3d49b 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1674,6 +1674,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir) out: if (err) { + make_bad_inode(inode); iput(inode); ni = ERR_PTR(err); } -- GitLab From e5f488993bc1893b84d93e9915155fab66a070d2 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 27 Mar 2023 13:30:29 +0530 Subject: [PATCH 0494/2290] iommu/arm-smmu-qcom: Limit the SMR groups to 128 [ Upstream commit 12261134732689b7e30c59db9978f81230965181 ] Some platforms support more than 128 stream matching groups than what is defined by the ARM SMMU architecture specification. But due to some unknown reasons, those additional groups don't exhibit the same behavior as the architecture supported ones. For instance, the additional groups will not detect the quirky behavior of some firmware versions intercepting writes to S2CR register, thus skipping the quirk implemented in the driver and causing boot crash. So let's limit the groups to 128 for now until the issue with those groups are fixed and issue a notice to users in that case. Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20230327080029.11584-1-manivannan.sadhasivam@linaro.org [will: Reworded the comment slightly] Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index d80065c8105af..f15dcb9e4175c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -267,12 +267,26 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) { - unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + unsigned int last_s2cr; u32 reg; u32 smr; int i; + /* + * Some platforms support more than the Arm SMMU architected maximum of + * 128 stream matching groups. For unknown reasons, the additional + * groups don't exhibit the same behavior as the architected registers, + * so limit the groups to 128 until the behavior is fixed for the other + * groups. + */ + if (smmu->num_mapping_groups > 128) { + dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n"); + smmu->num_mapping_groups = 128; + } + + last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); + /* * With some firmware versions writes to S2CR of type FAULT are * ignored, and writing BYPASS will end up written as FAULT in the -- GitLab From afbf1a5cef46427241e76704991cc83c9b1a463b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 21 Mar 2023 17:47:03 -0600 Subject: [PATCH 0495/2290] RDMA/core: Fix multiple -Warray-bounds warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aa4d540b4150052ae3b36d286b9c833a961ce291 ] GCC-13 (and Clang)[1] does not like to access a partially allocated object, since it cannot reason about it for bounds checking. In this case 140 bytes are allocated for an object of type struct ib_umad_packet: packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL); However, notice that sizeof(*packet) is only 104 bytes: struct ib_umad_packet { struct ib_mad_send_buf * msg; /* 0 8 */ struct ib_mad_recv_wc * recv_wc; /* 8 8 */ struct list_head list; /* 16 16 */ int length; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ struct ib_user_mad mad __attribute__((__aligned__(8))); /* 40 64 */ /* size: 104, cachelines: 2, members: 5 */ /* sum members: 100, holes: 1, sum holes: 4 */ /* forced alignments: 1, forced holes: 1, sum forced holes: 4 */ /* last cacheline: 40 bytes */ } __attribute__((__aligned__(8))); and 36 bytes extra bytes are allocated for a flexible-array member in struct ib_user_mad: include/rdma/ib_mad.h: 120 enum { ... 123 IB_MGMT_RMPP_HDR = 36, ... } struct ib_user_mad { struct ib_user_mad_hdr hdr; /* 0 64 */ /* --- cacheline 1 boundary (64 bytes) --- */ __u64 data[] __attribute__((__aligned__(8))); /* 64 0 */ /* size: 64, cachelines: 1, members: 2 */ /* forced alignments: 1 */ } __attribute__((__aligned__(8))); So we have sizeof(*packet) + IB_MGMT_RMPP_HDR == 140 bytes Then the address of the flex-array member (for which only 36 bytes were allocated) is casted and copied into a pointer to struct ib_rmpp_mad, which, in turn, is of size 256 bytes: rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; struct ib_rmpp_mad { struct ib_mad_hdr mad_hdr; /* 0 24 */ struct ib_rmpp_hdr rmpp_hdr; /* 24 12 */ u8 data[220]; /* 36 220 */ /* size: 256, cachelines: 4, members: 3 */ }; The thing is that those 36 bytes allocated for flex-array member data in struct ib_user_mad onlly account for the size of both struct ib_mad_hdr and struct ib_rmpp_hdr, but nothing is left for array u8 data[220]. So, the compiler is legitimately complaining about accessing an object for which not enough memory was allocated. Apparently, the only members of struct ib_rmpp_mad that are relevant (that are actually being used) in function ib_umad_write() are mad_hdr and rmpp_hdr. So, instead of casting packet->mad.data to (struct ib_rmpp_mad *) create a new structure struct ib_rmpp_mad_hdr { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; } __packed; and cast packet->mad.data to (struct ib_rmpp_mad_hdr *). Notice that IB_MGMT_RMPP_HDR == sizeof(struct ib_rmpp_mad_hdr) == 36 bytes Refactor the rest of the code, accordingly. Fix the following warnings seen under GCC-13 and -Warray-bounds: drivers/infiniband/core/user_mad.c:564:50: warning: array subscript ‘struct ib_rmpp_mad[0]’ is partly outside array bounds of ‘unsigned char[140]’ [-Warray-bounds=] drivers/infiniband/core/user_mad.c:566:42: warning: array subscript ‘struct ib_rmpp_mad[0]’ is partly outside array bounds of ‘unsigned char[140]’ [-Warray-bounds=] drivers/infiniband/core/user_mad.c:618:25: warning: array subscript ‘struct ib_rmpp_mad[0]’ is partly outside array bounds of ‘unsigned char[140]’ [-Warray-bounds=] drivers/infiniband/core/user_mad.c:622:44: warning: array subscript ‘struct ib_rmpp_mad[0]’ is partly outside array bounds of ‘unsigned char[140]’ [-Warray-bounds=] Link: https://github.com/KSPP/linux/issues/273 Link: https://godbolt.org/z/oYWaGM4Yb [1] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/ZBpB91qQcB10m3Fw@work Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/user_mad.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index d96c78e436f98..5c284dfbe6923 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -131,6 +131,11 @@ struct ib_umad_packet { struct ib_user_mad mad; }; +struct ib_rmpp_mad_hdr { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; +} __packed; + #define CREATE_TRACE_POINTS #include @@ -494,11 +499,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; + struct ib_rmpp_mad_hdr *rmpp_mad_hdr; struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct rdma_ah_attr ah_attr; struct ib_ah *ah; - struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; u8 base_version; @@ -506,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; - packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; @@ -560,13 +565,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_up; } - rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; - hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); + rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data; + hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class); - if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) && ib_mad_kernel_rmpp_agent(agent)) { copy_offset = IB_MGMT_RMPP_HDR; - rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE; } else { copy_offset = IB_MGMT_MAD_HDR; @@ -615,12 +620,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); - rmpp_mad->mad_hdr.tid = *tid; + rmpp_mad_hdr->mad_hdr.tid = *tid; } if (!ib_mad_kernel_rmpp_agent(agent) - && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) - && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { + && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { spin_lock_irq(&file->send_lock); list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); -- GitLab From 87632bc9ecff5ded93433bc0fca428019bdd1cfe Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 18 Jan 2024 10:05:05 -0800 Subject: [PATCH 0496/2290] mm: huge_memory: don't force huge page alignment on 32 bit commit 4ef9ad19e17676b9ef071309bc62020e2373705d upstream. commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") caused two issues [1] [2] reported on 32 bit system or compat userspace. It doesn't make too much sense to force huge page alignment on 32 bit system due to the constrained virtual address space. [1] https://lore.kernel.org/linux-mm/d0a136a0-4a31-46bc-adf4-2db109a61672@kernel.org/ [2] https://lore.kernel.org/linux-mm/CAJuCfpHXLdQy1a2B6xN2d7quTYwg2OoZseYPZTRpU0eHHKD-sQ@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240118180505.2914778-1-shy828301@gmail.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Yang Shi Reported-by: Jiri Slaby Reported-by: Suren Baghdasaryan Tested-by: Jiri Slaby Tested-by: Suren Baghdasaryan Reviewed-by: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Christopher Lameter Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 59577946735b1..9736e762184bd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -607,6 +608,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret; + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; + if (off_end <= off_align || (off_end - off_align) < size) return 0; -- GitLab From 65a389ef979b5ca96bc08aa165d6710fe8f1e890 Mon Sep 17 00:00:00 2001 From: Han Xu Date: Wed, 8 Nov 2023 09:07:01 -0600 Subject: [PATCH 0497/2290] mtd: spinand: gigadevice: Fix the get ecc status issue [ Upstream commit 59950610c0c00c7a06d8a75d2ee5d73dba4274cf ] Some GigaDevice ecc_get_status functions use on-stack buffer for spi_mem_op causes spi_mem_check_op failing, fix the issue by using spinand scratchbuf. Fixes: c40c7a990a46 ("mtd: spinand: Add support for GigaDevice GD5F1GQ4UExxG") Signed-off-by: Han Xu Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20231108150701.593912-1-han.xu@nxp.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/spi/gigadevice.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index 6b043e24855fb..9116ee7f023ed 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, * report the maximum of 4 in this case */ /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status & STATUS_ECC_MASK) >> 2) | ((status2 & STATUS_ECC_MASK) >> 4); @@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, * 1 ... 4 bits are flipped (and corrected) */ /* bits sorted this way (1...0): ECCSE1, ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status2 & STATUS_ECC_MASK) >> 4) + 1; case STATUS_ECC_UNCOR_ERROR: -- GitLab From 0b27bf4c494d61e5663baa34c3edd7ccebf0ea44 Mon Sep 17 00:00:00 2001 From: Ryosuke Yasuoka Date: Wed, 21 Feb 2024 16:40:48 +0900 Subject: [PATCH 0498/2290] netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter [ Upstream commit 661779e1fcafe1b74b3f3fe8e980c1e207fea1fd ] syzbot reported the following uninit-value access issue [1]: netlink_to_full_skb() creates a new `skb` and puts the `skb->data` passed as a 1st arg of netlink_to_full_skb() onto new `skb`. The data size is specified as `len` and passed to skb_put_data(). This `len` is based on `skb->end` that is not data offset but buffer offset. The `skb->end` contains data and tailroom. Since the tailroom is not initialized when the new `skb` created, KMSAN detects uninitialized memory area when copying the data. This patch resolved this issue by correct the len from `skb->end` to `skb->len`, which is the actual data offset. BUG: KMSAN: kernel-infoleak-after-free in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak-after-free in copy_to_user_iter lib/iov_iter.c:24 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_ubuf include/linux/iov_iter.h:29 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance include/linux/iov_iter.h:271 [inline] BUG: KMSAN: kernel-infoleak-after-free in _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copy_to_user_iter lib/iov_iter.c:24 [inline] iterate_ubuf include/linux/iov_iter.h:29 [inline] iterate_and_advance2 include/linux/iov_iter.h:245 [inline] iterate_and_advance include/linux/iov_iter.h:271 [inline] _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186 copy_to_iter include/linux/uio.h:197 [inline] simple_copy_to_iter+0x68/0xa0 net/core/datagram.c:532 __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:420 skb_copy_datagram_iter+0x5c/0x200 net/core/datagram.c:546 skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline] packet_recvmsg+0xd9c/0x2000 net/packet/af_packet.c:3482 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg net/socket.c:1066 [inline] sock_read_iter+0x467/0x580 net/socket.c:1136 call_read_iter include/linux/fs.h:2014 [inline] new_sync_read fs/read_write.c:389 [inline] vfs_read+0x8f6/0xe00 fs/read_write.c:470 ksys_read+0x20f/0x4c0 fs/read_write.c:613 __do_sys_read fs/read_write.c:623 [inline] __se_sys_read fs/read_write.c:621 [inline] __x64_sys_read+0x93/0xd0 fs/read_write.c:621 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was stored to memory at: skb_put_data include/linux/skbuff.h:2622 [inline] netlink_to_full_skb net/netlink/af_netlink.c:181 [inline] __netlink_deliver_tap_skb net/netlink/af_netlink.c:298 [inline] __netlink_deliver_tap+0x5be/0xc90 net/netlink/af_netlink.c:325 netlink_deliver_tap net/netlink/af_netlink.c:338 [inline] netlink_deliver_tap_kernel net/netlink/af_netlink.c:347 [inline] netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x10f1/0x1250 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1238/0x13d0 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2674 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: free_pages_prepare mm/page_alloc.c:1087 [inline] free_unref_page_prepare+0xb0/0xa40 mm/page_alloc.c:2347 free_unref_page_list+0xeb/0x1100 mm/page_alloc.c:2533 release_pages+0x23d3/0x2410 mm/swap.c:1042 free_pages_and_swap_cache+0xd9/0xf0 mm/swap_state.c:316 tlb_batch_pages_flush mm/mmu_gather.c:98 [inline] tlb_flush_mmu_free mm/mmu_gather.c:293 [inline] tlb_flush_mmu+0x6f5/0x980 mm/mmu_gather.c:300 tlb_finish_mmu+0x101/0x260 mm/mmu_gather.c:392 exit_mmap+0x49e/0xd30 mm/mmap.c:3321 __mmput+0x13f/0x530 kernel/fork.c:1349 mmput+0x8a/0xa0 kernel/fork.c:1371 exit_mm+0x1b8/0x360 kernel/exit.c:567 do_exit+0xd57/0x4080 kernel/exit.c:858 do_group_exit+0x2fd/0x390 kernel/exit.c:1021 __do_sys_exit_group kernel/exit.c:1032 [inline] __se_sys_exit_group kernel/exit.c:1030 [inline] __x64_sys_exit_group+0x3c/0x50 kernel/exit.c:1030 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Bytes 3852-3903 of 3904 are uninitialized Memory access of size 3904 starts at ffff88812ea1e000 Data copied to user address 0000000020003280 CPU: 1 PID: 5043 Comm: syz-executor297 Not tainted 6.7.0-rc5-syzkaller-00047-g5bd7ef53ffe5 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 Fixes: 1853c9496460 ("netlink, mmap: transform mmap skb into full skb on taps") Reported-and-tested-by: syzbot+34ad5fab48f7bf510349@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=34ad5fab48f7bf510349 [1] Signed-off-by: Ryosuke Yasuoka Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240221074053.1794118-1-ryasuoka@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6857a4965fe87..e9b81cba1e2b4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group) static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, gfp_t gfp_mask) { - unsigned int len = skb_end_offset(skb); + unsigned int len = skb->len; struct sk_buff *new; new = alloc_skb(len, gfp_mask); -- GitLab From 0ac219c4c3ab253f3981f346903458d20bacab32 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Feb 2024 18:27:33 +0100 Subject: [PATCH 0499/2290] netlink: add nla be16/32 types to minlen array [ Upstream commit 9a0d18853c280f6a0ee99f91619f2442a17a323a ] BUG: KMSAN: uninit-value in nla_validate_range_unsigned lib/nlattr.c:222 [inline] BUG: KMSAN: uninit-value in nla_validate_int_range lib/nlattr.c:336 [inline] BUG: KMSAN: uninit-value in validate_nla lib/nlattr.c:575 [inline] BUG: KMSAN: uninit-value in __nla_validate_parse+0x2e20/0x45c0 lib/nlattr.c:631 nla_validate_range_unsigned lib/nlattr.c:222 [inline] nla_validate_int_range lib/nlattr.c:336 [inline] validate_nla lib/nlattr.c:575 [inline] ... The message in question matches this policy: [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255), but because NLA_BE32 size in minlen array is 0, the validation code will read past the malformed (too small) attribute. Note: Other attributes, e.g. BITFIELD32, SINT, UINT.. are also missing: those likely should be added too. Reported-by: syzbot+3f497b07aa3baf2fb4d0@syzkaller.appspotmail.com Reported-by: xingwei lee Closes: https://lore.kernel.org/all/CABOYnLzFYHSnvTyS6zGa-udNX55+izqkOt2sB9WDqUcEGW6n8w@mail.gmail.com/raw Fixes: ecaf75ffd5f5 ("netlink: introduce bigendian integer types") Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20240221172740.5092-1-fw@strlen.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- lib/nlattr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/nlattr.c b/lib/nlattr.c index dffd60e4065fd..86344df0ccf7b 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { @@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; /* -- GitLab From ab63de24ebea36fe73ac7121738595d704b66d96 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Feb 2024 14:56:02 +0100 Subject: [PATCH 0500/2290] net: ip_tunnel: prevent perpetual headroom growth [ Upstream commit 5ae1e9922bbdbaeb9cfbe91085ab75927488ac0f ] syzkaller triggered following kasan splat: BUG: KASAN: use-after-free in __skb_flow_dissect+0x19d1/0x7a50 net/core/flow_dissector.c:1170 Read of size 1 at addr ffff88812fb4000e by task syz-executor183/5191 [..] kasan_report+0xda/0x110 mm/kasan/report.c:588 __skb_flow_dissect+0x19d1/0x7a50 net/core/flow_dissector.c:1170 skb_flow_dissect_flow_keys include/linux/skbuff.h:1514 [inline] ___skb_get_hash net/core/flow_dissector.c:1791 [inline] __skb_get_hash+0xc7/0x540 net/core/flow_dissector.c:1856 skb_get_hash include/linux/skbuff.h:1556 [inline] ip_tunnel_xmit+0x1855/0x33c0 net/ipv4/ip_tunnel.c:748 ipip_tunnel_xmit+0x3cc/0x4e0 net/ipv4/ipip.c:308 __netdev_start_xmit include/linux/netdevice.h:4940 [inline] netdev_start_xmit include/linux/netdevice.h:4954 [inline] xmit_one net/core/dev.c:3548 [inline] dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3564 __dev_queue_xmit+0x7c1/0x3d60 net/core/dev.c:4349 dev_queue_xmit include/linux/netdevice.h:3134 [inline] neigh_connected_output+0x42c/0x5d0 net/core/neighbour.c:1592 ... ip_finish_output2+0x833/0x2550 net/ipv4/ip_output.c:235 ip_finish_output+0x31/0x310 net/ipv4/ip_output.c:323 .. iptunnel_xmit+0x5b4/0x9b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1dbc/0x33c0 net/ipv4/ip_tunnel.c:831 ipgre_xmit+0x4a1/0x980 net/ipv4/ip_gre.c:665 __netdev_start_xmit include/linux/netdevice.h:4940 [inline] netdev_start_xmit include/linux/netdevice.h:4954 [inline] xmit_one net/core/dev.c:3548 [inline] dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3564 ... The splat occurs because skb->data points past skb->head allocated area. This is because neigh layer does: __skb_pull(skb, skb_network_offset(skb)); ... but skb_network_offset() returns a negative offset and __skb_pull() arg is unsigned. IOW, we skb->data gets "adjusted" by a huge value. The negative value is returned because skb->head and skb->data distance is more than 64k and skb->network_header (u16) has wrapped around. The bug is in the ip_tunnel infrastructure, which can cause dev->needed_headroom to increment ad infinitum. The syzkaller reproducer consists of packets getting routed via a gre tunnel, and route of gre encapsulated packets pointing at another (ipip) tunnel. The ipip encapsulation finds gre0 as next output device. This results in the following pattern: 1). First packet is to be sent out via gre0. Route lookup found an output device, ipip0. 2). ip_tunnel_xmit for gre0 bumps gre0->needed_headroom based on the future output device, rt.dev->needed_headroom (ipip0). 3). ip output / start_xmit moves skb on to ipip0. which runs the same code path again (xmit recursion). 4). Routing step for the post-gre0-encap packet finds gre0 as output device to use for ipip0 encapsulated packet. tunl0->needed_headroom is then incremented based on the (already bumped) gre0 device headroom. This repeats for every future packet: gre0->needed_headroom gets inflated because previous packets' ipip0 step incremented rt->dev (gre0) headroom, and ipip0 incremented because gre0 needed_headroom was increased. For each subsequent packet, gre/ipip0->needed_headroom grows until post-expand-head reallocations result in a skb->head/data distance of more than 64k. Once that happens, skb->network_header (u16) wraps around when pskb_expand_head tries to make sure that skb_network_offset() is unchanged after the headroom expansion/reallocation. After this skb_network_offset(skb) returns a different (and negative) result post headroom expansion. The next trip to neigh layer (or anything else that would __skb_pull the network header) makes skb->data point to a memory location outside skb->head area. v2: Cap the needed_headroom update to an arbitarily chosen upperlimit to prevent perpetual increase instead of dropping the headroom increment completely. Reported-and-tested-by: syzbot+bfde3bef047a81b8fde6@syzkaller.appspotmail.com Closes: https://groups.google.com/g/syzkaller-bugs/c/fL9G6GtWskY/m/VKk_PR5FBAAJ Fixes: 243aad830e8a ("ip_gre: include route header_len in max_headroom calculation") Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220135606.4939-1-fw@strlen.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/ip_tunnel.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 24961b304dad0..328f9068c6a43 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -540,6 +540,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + static const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto, int tunnel_hlen) { @@ -614,13 +628,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, headroom); - - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, headroom)) { ip_rt_put(rt); goto tx_dropped; } + + ip_tunnel_adj_headroom(dev, headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; @@ -800,16 +814,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, max_headroom)) { ip_rt_put(rt); dev->stats.tx_dropped++; kfree_skb(skb); return; } + ip_tunnel_adj_headroom(dev, max_headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; -- GitLab From a3c8fa54e904b0ddb52a08cc2d8ac239054f61fd Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 20 Feb 2024 16:10:53 +0800 Subject: [PATCH 0501/2290] net: mctp: take ownership of skb in mctp_local_output [ Upstream commit 3773d65ae5154ed7df404b050fd7387a36ab5ef3 ] Currently, mctp_local_output only takes ownership of skb on success, and we may leak an skb if mctp_local_output fails in specific states; the skb ownership isn't transferred until the actual output routing occurs. Instead, make mctp_local_output free the skb on all error paths up to the route action, so it always consumes the passed skb. Fixes: 833ef3b91de6 ("mctp: Populate socket implementation") Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220081053.1439104-1-jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/mctp.h | 1 + net/mctp/route.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/net/mctp.h b/include/net/mctp.h index 82800d521c3de..7ed84054f4623 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -249,6 +249,7 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); +/* always takes ownership of skb */ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); diff --git a/net/mctp/route.c b/net/mctp/route.c index 256bf0b89e6ca..0144d8ebdaefb 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); if (!dev) { rcu_read_unlock(); - return rc; + goto out_free; } rt->dev = __mctp_dev_get(dev); rcu_read_unlock(); @@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rt->mtu = 0; } else { - return -EINVAL; + rc = -EINVAL; + goto out_free; } spin_lock_irqsave(&rt->dev->addrs_lock, flags); @@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rc = mctp_do_fragment_route(rt, skb, mtu, tag); } + /* route output functions consume the skb, even on error */ + skb = NULL; + out_release: if (!ext_rt) mctp_route_release(rt); mctp_dev_put(tmp_rt.dev); +out_free: + kfree_skb(skb); return rc; } -- GitLab From 29360fd3288f3978ccde2f8f7eba22282c4a08a3 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Tue, 20 Feb 2024 11:12:07 +0800 Subject: [PATCH 0502/2290] tun: Fix xdp_rxq_info's queue_index when detaching [ Upstream commit 2a770cdc4382b457ca3d43d03f0f0064f905a0d0 ] When a queue(tfile) is detached, we only update tfile's queue_index, but do not update xdp_rxq_info's queue_index. This patch fixes it. Fixes: 8bf5c4ee1889 ("tun: setup xdp_rxq_info") Signed-off-by: Yunjian Wang Link: https://lore.kernel.org/r/1708398727-46308-1-git-send-email-wangyunjian@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 367255bb44cdc..922d6f16d99d1 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; + ntfile->xdp_rxq.queue_index = index; rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], NULL); -- GitLab From e85b3c15398f6fa1f3941be8acbef79ae114744d Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sat, 17 Feb 2024 13:30:10 -0800 Subject: [PATCH 0503/2290] cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf call back [ Upstream commit f0a0fc10abb062d122db5ac4ed42f6d1ca342649 ] There is a loophole in pstate limit clamping for the intel_cpufreq CPU frequency scaling driver (intel_pstate in passive mode), schedutil CPU frequency scaling governor, HWP (HardWare Pstate) control enabled, when the adjust_perf call back path is used. Fix it. Fixes: a365ab6b9dfb cpufreq: intel_pstate: Implement the ->adjust_perf() callback Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index abdd26f7d04c9..5771f3fc6115d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2952,6 +2952,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, if (min_pstate < cpu->min_perf_ratio) min_pstate = cpu->min_perf_ratio; + if (min_pstate > cpu->max_perf_ratio) + min_pstate = cpu->max_perf_ratio; + max_pstate = min(cap_pstate, cpu->max_perf_ratio); if (max_pstate < min_pstate) max_pstate = min_pstate; -- GitLab From 7985d73961bbb4e726c1be7b9cd26becc7be8325 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Feb 2024 15:12:10 -0800 Subject: [PATCH 0504/2290] net: veth: clear GRO when clearing XDP even when down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fe9f801355f0b47668419f30f1fac1cf4539e736 ] veth sets NETIF_F_GRO automatically when XDP is enabled, because both features use the same NAPI machinery. The logic to clear NETIF_F_GRO sits in veth_disable_xdp() which is called both on ndo_stop and when XDP is turned off. To avoid the flag from being cleared when the device is brought down, the clearing is skipped when IFF_UP is not set. Bringing the device down should indeed not modify its features. Unfortunately, this means that clearing is also skipped when XDP is disabled _while_ the device is down. And there's nothing on the open path to bring the device features back into sync. IOW if user enables XDP, disables it and then brings the device up we'll end up with a stray GRO flag set but no NAPI instances. We don't depend on the GRO flag on the datapath, so the datapath won't crash. We will crash (or hang), however, next time features are sync'ed (either by user via ethtool or peer changing its config). The GRO flag will go away, and veth will try to disable the NAPIs. But the open path never created them since XDP was off, the GRO flag was a stray. If NAPI was initialized before we'll hang in napi_disable(). If it never was we'll crash trying to stop uninitialized hrtimer. Move the GRO flag updates to the XDP enable / disable paths, instead of mixing them with the ndo_open / ndo_close paths. Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") Reported-by: Thomas Gleixner Reported-by: syzbot+039399a9b96297ddedca@syzkaller.appspotmail.com Signed-off-by: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/veth.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 36c5a41f84e44..dea9cc8c39f7a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1135,14 +1135,6 @@ static int veth_enable_xdp(struct net_device *dev) veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); return err; } - - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding XDP - * is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } } } @@ -1162,18 +1154,9 @@ static void veth_disable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); - if (!netif_running(dev) || !veth_gro_requested(dev)) { + if (!netif_running(dev) || !veth_gro_requested(dev)) veth_napi_del(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev) && netif_running(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - } - veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } @@ -1558,6 +1541,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (!old_prog) { + if (!veth_gro_requested(dev)) { + /* user-space did not require GRO, but adding + * XDP is supposed to get GRO working + */ + dev->features |= NETIF_F_GRO; + netdev_features_change(dev); + } + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } @@ -1568,6 +1559,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->flags & IFF_UP) veth_disable_xdp(dev); + /* if user-space did not require GRO, since adding XDP + * enabled it, clear it now + */ + if (!veth_gro_requested(dev)) { + dev->features &= ~NETIF_F_GRO; + netdev_features_change(dev); + } + if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; -- GitLab From 1b0998fdd85776775d975d0024bca227597e836a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 12:17:47 +0000 Subject: [PATCH 0505/2290] ipv6: fix potential "struct net" leak in inet6_rtm_getaddr() [ Upstream commit 10bfd453da64a057bcfd1a49fb6b271c48653cdb ] It seems that if userspace provides a correct IFA_TARGET_NETNSID value but no IFA_ADDRESS and IFA_LOCAL attributes, inet6_rtm_getaddr() returns -EINVAL with an elevated "struct net" refcount. Fixes: 6ecf4c37eb3e ("ipv6: enable IFA_TARGET_NETNSID for RTM_GETADDR") Signed-off-by: Eric Dumazet Cc: Christian Brauner Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 46527b5cc8f0c..1648373692a99 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5473,9 +5473,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, } addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (!addr) - return -EINVAL; - + if (!addr) { + err = -EINVAL; + goto errout; + } ifm = nlmsg_data(nlh); if (ifm->ifa_index) dev = dev_get_by_index(tgt_net, ifm->ifa_index); -- GitLab From c41548fede3d4b0305be2237ba7dbf657e9ff30b Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 22 Feb 2024 13:38:38 +0100 Subject: [PATCH 0506/2290] lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected [ Upstream commit 0e67899abfbfdea0c3c0ed3fd263ffc601c5c157 ] Same as LAN7800, LAN7850 can be used without EEPROM. If EEPROM is not present or not flashed, LAN7850 will fail to sync the speed detected by the PHY with the MAC. In case link speed is 100Mbit, it will accidentally work, otherwise no data can be transferred. Better way would be to implement link_up callback, or set auto speed configuration unconditionally. But this changes would be more intrusive. So, for now, set it only if no EEPROM is found. Fixes: e69647a19c87 ("lan78xx: Set ASD in MAC_CR when EEE is enabled.") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240222123839.2816561-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index c458c030fadf6..7b9d480e44fe4 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3035,7 +3035,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; - if (dev->chipid == ID_REV_CHIP_ID_7800_) { + if (dev->chipid == ID_REV_CHIP_ID_7800_ || + dev->chipid == ID_REV_CHIP_ID_7850_) { ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); if (!ret && sig != EEPROM_INDICATOR) { /* Implies there is no external eeprom. Set mac speed */ -- GitLab From 548ab66730848c8ed105e1d7caf9f4e3f68cdc94 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 23 Feb 2024 15:59:08 -0800 Subject: [PATCH 0507/2290] veth: try harder when allocating queue memory [ Upstream commit 1ce7d306ea63f3e379557c79abd88052e0483813 ] struct veth_rq is pretty large, 832B total without debug options enabled. Since commit under Fixes we try to pre-allocate enough queues for every possible CPU. Miao Wang reports that this may lead to order-5 allocations which will fail in production. Let the allocation fallback to vmalloc() and try harder. These are the same flags we pass to netdev queue allocation. Reported-and-tested-by: Miao Wang Fixes: 9d3684c24a52 ("veth: create by default nr_possible_cpus queues") Link: https://lore.kernel.org/all/5F52CAE2-2FB7-4712-95F1-3312FBBFA8DD@gmail.com/ Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240223235908.693010-1-kuba@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/veth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index dea9cc8c39f7a..dd9f5f1461921 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1359,7 +1359,8 @@ static int veth_alloc_queues(struct net_device *dev) struct veth_priv *priv = netdev_priv(dev); int i; - priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); + priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!priv->rq) return -ENOMEM; @@ -1375,7 +1376,7 @@ static void veth_free_queues(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); - kfree(priv->rq); + kvfree(priv->rq); } static int veth_dev_init(struct net_device *dev) -- GitLab From d77ab053fb2f97ff366118d4dbffd8fe48168541 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sun, 25 Feb 2024 00:20:06 +0100 Subject: [PATCH 0508/2290] net: usb: dm9601: fix wrong return value in dm9601_mdio_read [ Upstream commit c68b2c9eba38ec3f60f4894b189090febf4d8d22 ] The MII code does not check the return value of mdio_read (among others), and therefore no error code should be sent. A previous fix to the use of an uninitialized variable propagates negative error codes, that might lead to wrong operations by the MII library. An example of such issues is the use of mii_nway_restart by the dm9601 driver. The mii_nway_restart function does not check the value returned by mdio_read, which in this case might be a negative number which could contain the exact bit the function checks (BMCR_ANENABLE = 0x1000). Return zero in case of error, as it is common practice in users of mdio_read to avoid wrong uses of the return value. Fixes: 8f8abb863fa5 ("net: usb: dm9601: fix uninitialized variable use in dm9601_mdio_read") Signed-off-by: Javier Carrasco Reviewed-by: Simon Horman Reviewed-by: Peter Korsgaard Link: https://lore.kernel.org/r/20240225-dm9601_ret_err-v1-1-02c1d959ea59@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/dm9601.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 99ec1d4a972db..8b6d6a1b3c2ec 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); - return err; + return 0; } netdev_dbg(dev->net, -- GitLab From 1b4223e807fa17bc53062e922e4e7266450e304f Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 26 Feb 2024 12:08:20 +0100 Subject: [PATCH 0509/2290] net: lan78xx: fix "softirq work is pending" error [ Upstream commit e3d5d70cb483df8296dd44e9ae3b6355ef86494c ] Disable BH around the call to napi_schedule() to avoid following error: NOHZ tick-stop error: local softirq work is pending, handler #08!!! Fixes: ec4c7e12396b ("lan78xx: Introduce NAPI polling support") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240226110820.2113584-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 7b9d480e44fe4..4fd4563811299 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) lan78xx_rx_urb_submit_all(dev); + local_bh_disable(); napi_schedule(&dev->napi); + local_bh_enable(); } return 0; -- GitLab From aa5897232682c27ff731b083b40c879b0eb2c994 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 26 Feb 2024 13:49:21 +0100 Subject: [PATCH 0510/2290] uapi: in6: replace temporary label with rfc9486 [ Upstream commit 6a2008641920a9c6fe1abbeb9acbec463215d505 ] Not really a fix per se, but IPV6_TLV_IOAM is still tagged as "TEMPORARY IANA allocation for IOAM", while RFC 9486 is available for some time now. Just update the reference. Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Signed-off-by: Justin Iurman Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240226124921.9097-1-justin.iurman@uliege.be Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/uapi/linux/in6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index c4c53a9ab9595..ff8d21f9e95b7 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,7 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ -#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ +#define IPV6_TLV_IOAM 49 /* RFC 9486 */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ -- GitLab From 17ccd9798fe0beda3db212cfa3ebe373f605cbd6 Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 26 Feb 2024 17:42:32 +0100 Subject: [PATCH 0511/2290] stmmac: Clear variable when destroying workqueue [ Upstream commit 8af411bbba1f457c33734795f024d0ef26d0963f ] Currently when suspending driver and stopping workqueue it is checked whether workqueue is not NULL and if so, it is destroyed. Function destroy_workqueue() does drain queue and does clear variable, but it does not set workqueue variable to NULL. This can cause kernel/module panic if code attempts to clear workqueue that was not initialized. This scenario is possible when resuming suspended driver in stmmac_resume(), because there is no handling for failed stmmac_hw_setup(), which can fail and return if DMA engine has failed to initialize, and workqueue is initialized after DMA engine. Should DMA engine fail to initialize, resume will proceed normally, but interface won't work and TX queue will eventually timeout, causing 'Reset adapter' error. This then does destroy workqueue during reset process. And since workqueue is initialized after DMA engine and can be skipped, it will cause kernel/module panic. To secure against this possible crash, set workqueue variable to NULL when destroying workqueue. Log/backtrace from crash goes as follows: [88.031977]------------[ cut here ]------------ [88.031985]NETDEV WATCHDOG: eth0 (sxgmac): transmit queue 1 timed out [88.032017]WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:477 dev_watchdog+0x390/0x398 [88.032251]---[ end trace e70de432e4d5c2c0 ]--- [88.032282]sxgmac 16d88000.ethernet eth0: Reset adapter. [88.036359]------------[ cut here ]------------ [88.036519]Call trace: [88.036523] flush_workqueue+0x3e4/0x430 [88.036528] drain_workqueue+0xc4/0x160 [88.036533] destroy_workqueue+0x40/0x270 [88.036537] stmmac_fpe_stop_wq+0x4c/0x70 [88.036541] stmmac_release+0x278/0x280 [88.036546] __dev_close_many+0xcc/0x158 [88.036551] dev_close_many+0xbc/0x190 [88.036555] dev_close.part.0+0x70/0xc0 [88.036560] dev_close+0x24/0x30 [88.036564] stmmac_service_task+0x110/0x140 [88.036569] process_one_work+0x1d8/0x4a0 [88.036573] worker_thread+0x54/0x408 [88.036578] kthread+0x164/0x170 [88.036583] ret_from_fork+0x10/0x20 [88.036588]---[ end trace e70de432e4d5c2c1 ]--- [88.036597]Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 Fixes: 5a5586112b929 ("net: stmmac: support FPE link partner hand-shaking procedure") Signed-off-by: Jakub Raczynski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 91b2aa81914ba..e2d51014ab4bc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3900,8 +3900,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) { set_bit(__FPE_REMOVING, &priv->fpe_task_state); - if (priv->fpe_wq) + if (priv->fpe_wq) { destroy_workqueue(priv->fpe_wq); + priv->fpe_wq = NULL; + } netdev_info(priv->dev, "FPE workqueue stop"); } -- GitLab From cad078914b628737fa0946de02169e80fba721cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Tue, 2 Jan 2024 19:08:08 +0100 Subject: [PATCH 0512/2290] Bluetooth: hci_sync: Check the correct flag before starting a scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6b3899be24b16ff8ee0cb25f0bd59b01b15ba1d1 ] There's a very confusing mistake in the code starting a HCI inquiry: We're calling hci_dev_test_flag() to test for HCI_INQUIRY, but hci_dev_test_flag() checks hdev->dev_flags instead of hdev->flags. HCI_INQUIRY is a bit that's set on hdev->flags, not on hdev->dev_flags though. HCI_INQUIRY equals the integer 7, and in hdev->dev_flags, 7 means HCI_BONDABLE, so we were actually checking for HCI_BONDABLE here. The mistake is only present in the synchronous code for starting an inquiry, not in the async one. Also devices are typically bondable while doing an inquiry, so that might be the reason why nobody noticed it so far. Fixes: abfeea476c68 ("Bluetooth: hci_sync: Convert MGMT_OP_START_DISCOVERY") Signed-off-by: Jonas Dreßler Reviewed-by: Simon Horman Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 45d19294aa772..13ed6cbfade3e 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5482,7 +5482,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) bt_dev_dbg(hdev, ""); - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) + if (test_bit(HCI_INQUIRY, &hdev->flags)) return 0; hci_dev_lock(hdev); -- GitLab From 45085686b9559bfbe3a4f41d3d695a520668f5e1 Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Thu, 4 Jan 2024 11:56:32 +0000 Subject: [PATCH 0513/2290] Bluetooth: Avoid potential use-after-free in hci_error_reset [ Upstream commit 2449007d3f73b2842c9734f45f0aadb522daf592 ] While handling the HCI_EV_HARDWARE_ERROR event, if the underlying BT controller is not responding, the GPIO reset mechanism would free the hci_dev and lead to a use-after-free in hci_error_reset. Here's the call trace observed on a ChromeOS device with Intel AX201: queue_work_on+0x3e/0x6c __hci_cmd_sync_sk+0x2ee/0x4c0 [bluetooth ] ? init_wait_entry+0x31/0x31 __hci_cmd_sync+0x16/0x20 [bluetooth ] hci_error_reset+0x4f/0xa4 [bluetooth ] process_one_work+0x1d8/0x33f worker_thread+0x21b/0x373 kthread+0x13a/0x152 ? pr_cont_work+0x54/0x54 ? kthread_blkcg+0x31/0x31 ret_from_fork+0x1f/0x30 This patch holds the reference count on the hci_dev while processing a HCI_EV_HARDWARE_ERROR event to avoid potential crash. Fixes: c7741d16a57c ("Bluetooth: Perform a power cycle when receiving hardware error event") Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6a1db678d032f..a8932d449eb63 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + hci_dev_hold(hdev); BT_DBG("%s", hdev->name); if (hdev->hw_error) @@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work) else bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); - if (hci_dev_do_close(hdev)) - return; + if (!hci_dev_do_close(hdev)) + hci_dev_do_open(hdev); - hci_dev_do_open(hdev); + hci_dev_put(hdev); } void hci_uuids_clear(struct hci_dev *hdev) -- GitLab From 926405765f25809602c52e037827d0d5a9f62692 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Jan 2024 10:43:26 -0500 Subject: [PATCH 0514/2290] Bluetooth: hci_sync: Fix accept_list when attempting to suspend [ Upstream commit e5469adb2a7e930d96813316592302d9f8f1df4e ] During suspend, only wakeable devices can be in acceptlist, so if the device was previously added it needs to be removed otherwise the device can end up waking up the system prematurely. Fixes: 3b42055388c3 ("Bluetooth: hci_sync: Fix attempting to suspend with unfiltered passive scan") Signed-off-by: Clancy Shang Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Paul Menzel Signed-off-by: Sasha Levin --- net/bluetooth/hci_sync.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 13ed6cbfade3e..a337340464567 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2251,8 +2251,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, /* During suspend, only wakeable devices can be in acceptlist */ if (hdev->suspended && - !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) + !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) { + hci_le_del_accept_list_sync(hdev, ¶ms->addr, + params->addr_type); return 0; + } /* Select filter policy to accept all advertising */ if (*num_entries >= hdev->le_accept_list_size) -- GitLab From 0b056a52b3adfe5fedf20cd64addbe4e1d226c95 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 9 Jan 2024 19:03:23 +0800 Subject: [PATCH 0515/2290] Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR [ Upstream commit 61a5ab72edea7ebc3ad2c6beea29d966f528ebfb ] hci_store_wake_reason() wrongly parses event HCI_Connection_Request as HCI_Connection_Complete and HCI_Connection_Complete as HCI_Connection_Request, so causes recording wakeup BD_ADDR error and potential stability issue, fix it by using the correct field. Fixes: 2f20216c1d6f ("Bluetooth: Emit controller suspend and resume events") Signed-off-by: Zijun Hu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 56ecc5f97b916..b18f5e5df8ad0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7245,10 +7245,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, * keep track of the bdaddr of the connection event that woke us up. */ if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); + bacpy(&hdev->wake_addr, &conn_request->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); + bacpy(&hdev->wake_addr, &conn_complete->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_LE_META) { struct hci_ev_le_meta *le_ev = (void *)skb->data; -- GitLab From 30a5e812f78e3d1cced90e1ed750bf027599205f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 22 Jan 2024 09:02:47 -0500 Subject: [PATCH 0516/2290] Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST [ Upstream commit 7e74aa53a68bf60f6019bd5d9a9a1406ec4d4865 ] If we received HCI_EV_IO_CAPA_REQUEST while HCI_OP_READ_REMOTE_EXT_FEATURES is yet to be responded assume the remote does support SSP since otherwise this event shouldn't be generated. Link: https://lore.kernel.org/linux-bluetooth/CABBYNZ+9UdG1cMZVmdtN3U2aS16AKMCyTARZZyFX7xTEDWcMOw@mail.gmail.com/T/#t Fixes: c7f59461f5a7 ("Bluetooth: Fix a refcnt underflow problem for hci_conn") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b18f5e5df8ad0..f79aaef5a276d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5282,9 +5282,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn || !hci_conn_ssp_enabled(conn)) + if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) goto unlock; + /* Assume remote supports SSP since it has triggered this event */ + set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); + hci_conn_hold(conn); if (!hci_dev_test_flag(hdev, HCI_MGMT)) -- GitLab From 2dc94c160ef0292d7da7ea2d4c3087c852c97fcc Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 25 Jan 2024 14:50:28 +0800 Subject: [PATCH 0517/2290] Bluetooth: Enforce validation on max value of connection interval [ Upstream commit e4b019515f950b4e6e5b74b2e1bb03a90cb33039 ] Right now Linux BT stack cannot pass test case "GAP/CONN/CPUP/BV-05-C 'Connection Parameter Update Procedure Invalid Parameters Central Responder'" in Bluetooth Test Suite revision GAP.TS.p44. [0] That was revoled by commit c49a8682fc5d ("Bluetooth: validate BLE connection interval updates"), but later got reverted due to devices like keyboards and mice may require low connection interval. So only validate the max value connection interval to pass the Test Suite, and let devices to request low connection interval if needed. [0] https://www.bluetooth.org/docman/handlers/DownloadDoc.ashx?doc_id=229869 Fixes: 68d19d7d9957 ("Revert "Bluetooth: validate BLE connection interval updates"") Signed-off-by: Kai-Heng Feng Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 4 ++++ net/bluetooth/l2cap_core.c | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f79aaef5a276d..452d839c152fc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6719,6 +6719,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_UNKNOWN_CONN_ID); + if (max > hcon->le_conn_max_interval) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_INVALID_LL_PARAMS); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 81f5974e5eb5a..b4cba55be5ad9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - err = hci_check_conn_params(min, max, latency, to_multiplier); + if (max > hcon->le_conn_max_interval) { + BT_DBG("requested connection interval exceeds current bounds."); + err = -EINVAL; + } else { + err = hci_check_conn_params(min, max, latency, to_multiplier); + } + if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else -- GitLab From 7b410226d9eff7f64857a75d65e149440bff2b2f Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 19 Jan 2024 17:45:30 +0800 Subject: [PATCH 0518/2290] Bluetooth: qca: Fix wrong event type for patch config command [ Upstream commit c0dbc56077ae759f2dd602c7561480bc2b1b712c ] Vendor-specific command patch config has HCI_Command_Complete event as response, but qca_send_patch_config_cmd() wrongly expects vendor-specific event for the command, fixed by using right event type. Btmon log for the vendor-specific command are shown below: < HCI Command: Vendor (0x3f|0x0000) plen 5 28 01 00 00 00 > HCI Event: Command Complete (0x0e) plen 5 Vendor (0x3f|0x0000) ncmd 1 Status: Success (0x00) 28 Fixes: 4fac8a7ac80b ("Bluetooth: btqca: sequential validation") Signed-off-by: Zijun Hu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index c9064d34d8308..d7d0c9de3dc31 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev) bt_dev_dbg(hdev, "QCA Patch config"); skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd), - cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + cmd, 0, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err); -- GitLab From eb7b5777d3c7f5dbbb0736f638068f50006d81b0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 12:13:53 +0100 Subject: [PATCH 0519/2290] Bluetooth: hci_qca: mark OF related data as maybe unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 44fac8a2fd2f72ee98ee41e6bc9ecc7765b5d3cc ] The driver can be compile tested with !CONFIG_OF making certain data unused: drivers/bluetooth/hci_qca.c:1869:37: error: ‘qca_soc_data_wcn6750’ defined but not used [-Werror=unused-const-variable=] drivers/bluetooth/hci_qca.c:1853:37: error: ‘qca_soc_data_wcn3998’ defined but not used [-Werror=unused-const-variable=] drivers/bluetooth/hci_qca.c:1841:37: error: ‘qca_soc_data_wcn3991’ defined but not used [-Werror=unused-const-variable=] drivers/bluetooth/hci_qca.c:1830:37: error: ‘qca_soc_data_wcn3990’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 76ceb8a0183d1..0e908a337e534 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1824,7 +1824,7 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; -static const struct qca_device_data qca_soc_data_wcn3990 = { +static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { .soc_type = QCA_WCN3990, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1835,7 +1835,7 @@ static const struct qca_device_data qca_soc_data_wcn3990 = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_wcn3991 = { +static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = { .soc_type = QCA_WCN3991, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1847,7 +1847,7 @@ static const struct qca_device_data qca_soc_data_wcn3991 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn3998 = { +static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { .soc_type = QCA_WCN3998, .vregs = (struct qca_vreg []) { { "vddio", 10000 }, @@ -1858,13 +1858,13 @@ static const struct qca_device_data qca_soc_data_wcn3998 = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_qca6390 = { +static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { .soc_type = QCA_QCA6390, .num_vregs = 0, .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn6750 = { +static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { .soc_type = QCA_WCN6750, .vregs = (struct qca_vreg []) { { "vddio", 5000 }, -- GitLab From e5383662fd02ad9516ae2c27f85cd56296372ba9 Mon Sep 17 00:00:00 2001 From: Steev Klimaszewski Date: Sun, 26 Mar 2023 18:38:10 -0500 Subject: [PATCH 0520/2290] Bluetooth: hci_qca: Add support for QTI Bluetooth chip wcn6855 [ Upstream commit 095327fede005f4b14d40b2183b2f7965c739dbd ] Add regulators, GPIOs and changes required to power on/off wcn6855. Add support for firmware download for wcn6855 which is in the linux-firmware repository as hpbtfw21.tlv and hpnv21.bin. Based on the assumption that this is similar to the wcn6750 Tested-on: BTFW.HSP.2.1.0-00538-VER_PATCHZ-1 Signed-off-by: Steev Klimaszewski Reviewed-by: Bjorn Andersson Tested-by: Bjorn Andersson Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 14 ++++++++- drivers/bluetooth/btqca.h | 10 +++++++ drivers/bluetooth/hci_qca.c | 57 ++++++++++++++++++++++++++++--------- 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index d7d0c9de3dc31..4cb541096b934 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -614,6 +614,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, config.type = ELF_TYPE_PATCH; snprintf(config.fwname, sizeof(config.fwname), "qca/msbtfw%02x.mbn", rom_ver); + } else if (soc_type == QCA_WCN6855) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpbtfw%02x.tlv", rom_ver); } else { snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); @@ -648,6 +651,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, else if (soc_type == QCA_WCN6750) snprintf(config.fwname, sizeof(config.fwname), "qca/msnv%02x.bin", rom_ver); + else if (soc_type == QCA_WCN6855) + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpnv%02x.bin", rom_ver); else snprintf(config.fwname, sizeof(config.fwname), "qca/nvm_%08x.bin", soc_ver); @@ -685,11 +691,17 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) { + switch (soc_type) { + case QCA_WCN3991: + case QCA_WCN6750: + case QCA_WCN6855: /* get fw build info */ err = qca_read_fw_build_info(hdev); if (err < 0) return err; + break; + default: + break; } bt_dev_info(hdev, "QCA setup on UART is completed"); diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 61e9a50e66ae1..b884095bcd9d0 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -147,6 +147,7 @@ enum qca_btsoc_type { QCA_WCN3991, QCA_QCA6390, QCA_WCN6750, + QCA_WCN6855, }; #if IS_ENABLED(CONFIG_BT_QCA) @@ -168,6 +169,10 @@ static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) { return soc_type == QCA_WCN6750; } +static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) +{ + return soc_type == QCA_WCN6855; +} #else @@ -206,6 +211,11 @@ static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) return false; } +static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) +{ + return false; +} + static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 0e908a337e534..f217c2821b9fb 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1315,7 +1315,8 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) /* Give the controller time to process the request */ if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu))) + qca_is_wcn6750(qca_soc_type(hu)) || + qca_is_wcn6855(qca_soc_type(hu))) usleep_range(1000, 10000); else msleep(300); @@ -1392,7 +1393,8 @@ static unsigned int qca_get_speed(struct hci_uart *hu, static int qca_check_speeds(struct hci_uart *hu) { if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu))) { + qca_is_wcn6750(qca_soc_type(hu)) || + qca_is_wcn6855(qca_soc_type(hu))) { if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1426,7 +1428,8 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) * changing the baudrate of chip and host. */ if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) hci_uart_set_flow_control(hu, true); if (soc_type == QCA_WCN3990) { @@ -1444,7 +1447,8 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) error: if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) hci_uart_set_flow_control(hu, false); if (soc_type == QCA_WCN3990) { @@ -1680,7 +1684,8 @@ static int qca_power_on(struct hci_dev *hdev) return 0; if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) { + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) { ret = qca_regulator_init(hu); } else { qcadev = serdev_device_get_drvdata(hu->serdev); @@ -1721,7 +1726,8 @@ static int qca_setup(struct hci_uart *hu) bt_dev_info(hdev, "setting up %s", qca_is_wcn399x(soc_type) ? "wcn399x" : - (soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390"); + (soc_type == QCA_WCN6750) ? "wcn6750" : + (soc_type == QCA_WCN6855) ? "wcn6855" : "ROME/QCA6390"); qca->memdump_state = QCA_MEMDUMP_IDLE; @@ -1733,7 +1739,8 @@ retry: clear_bit(QCA_SSR_TRIGGERED, &qca->flags); if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) { + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type)) { set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -1755,7 +1762,8 @@ retry: } if (!(qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type))) { + qca_is_wcn6750(soc_type) || + qca_is_wcn6855(soc_type))) { /* Get QCA version information */ ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) @@ -1881,6 +1889,20 @@ static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; +static const struct qca_device_data qca_soc_data_wcn6855 = { + .soc_type = QCA_WCN6855, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, + { "vddbtcxmx", 126000 }, + { "vddrfacmn", 12500 }, + { "vddrfa0p8", 102000 }, + { "vddrfa1p7", 302000 }, + { "vddrfa1p2", 257000 }, + }, + .num_vregs = 6, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, +}; + static void qca_power_shutdown(struct hci_uart *hu) { struct qca_serdev *qcadev; @@ -1910,7 +1932,7 @@ static void qca_power_shutdown(struct hci_uart *hu) host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else if (soc_type == QCA_WCN6750) { + } else if (soc_type == QCA_WCN6750 || soc_type == QCA_WCN6855) { gpiod_set_value_cansleep(qcadev->bt_en, 0); msleep(100); qca_regulator_disable(qcadev); @@ -2045,7 +2067,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (data && (qca_is_wcn399x(data->soc_type) || - qca_is_wcn6750(data->soc_type))) { + qca_is_wcn6750(data->soc_type) || + qca_is_wcn6855(data->soc_type))) { qcadev->btsoc_type = data->soc_type; qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), @@ -2065,14 +2088,18 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) { + if (IS_ERR_OR_NULL(qcadev->bt_en) && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); power_ctrl_enabled = false; } qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); - if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750) + if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855)) dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); @@ -2148,8 +2175,9 @@ static void qca_serdev_remove(struct serdev_device *serdev) struct qca_power *power = qcadev->bt_power; if ((qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type)) && - power->vregs_on) + qca_is_wcn6750(qcadev->btsoc_type) || + qca_is_wcn6855(qcadev->btsoc_type)) && + power->vregs_on) qca_power_shutdown(&qcadev->serdev_hu); else if (qcadev->susclk) clk_disable_unprepare(qcadev->susclk); @@ -2333,6 +2361,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, + { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); -- GitLab From 29059d0f3bc21f76db5a375a70a449ba86f3d6cc Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Fri, 19 May 2023 18:43:23 +0800 Subject: [PATCH 0521/2290] Bluetooth: btqca: use le32_to_cpu for ver.soc_id [ Upstream commit 8153b738bc547878a017889d2b1cf8dd2de0e0c6 ] Use le32_to_cpu for ver.soc_id to fix the following sparse warning. drivers/bluetooth/btqca.c:640:24: sparse: warning: restricted __le32 degrades to integer Signed-off-by: Min-Hua Chen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 4cb541096b934..d40a6041c48cd 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -637,7 +637,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); else if (qca_is_wcn399x(soc_type)) { - if (ver.soc_id == QCA_WCN3991_SOC_ID) { + if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { snprintf(config.fwname, sizeof(config.fwname), "qca/crnv%02xu.bin", rom_ver); } else { -- GitLab From 940963613275a39fd693fb1969c1c6fbc0798a21 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 2 Aug 2023 08:56:29 +0200 Subject: [PATCH 0522/2290] Bluetooth: btqca: Add WCN3988 support [ Upstream commit f904feefe60c28b6852d5625adc4a2c39426a2d9 ] Add support for the Bluetooth chip codenamed APACHE which is part of WCN3988. The firmware for this chip has a slightly different naming scheme compared to most others. For ROM Version 0x0200 we need to use apbtfw10.tlv + apnv10.bin and for ROM version 0x201 apbtfw11.tlv + apnv11.bin Signed-off-by: Luca Weiss Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 13 +++++++++++-- drivers/bluetooth/btqca.h | 12 ++++++++++-- drivers/bluetooth/hci_qca.c | 12 ++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index d40a6041c48cd..d775402b33df3 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -594,14 +594,20 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Firmware files to download are based on ROM version. * ROM version is derived from last two bytes of soc_ver. */ - rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); + if (soc_type == QCA_WCN3988) + rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f); + else + rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); if (soc_type == QCA_WCN6750) qca_send_patch_config_cmd(hdev); /* Download rampatch file */ config.type = TLV_TYPE_PATCH; - if (qca_is_wcn399x(soc_type)) { + if (soc_type == QCA_WCN3988) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/apbtfw%02x.tlv", rom_ver); + } else if (qca_is_wcn399x(soc_type)) { snprintf(config.fwname, sizeof(config.fwname), "qca/crbtfw%02x.tlv", rom_ver); } else if (soc_type == QCA_QCA6390) { @@ -636,6 +642,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, if (firmware_name) snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); + else if (soc_type == QCA_WCN3988) + snprintf(config.fwname, sizeof(config.fwname), + "qca/apnv%02x.bin", rom_ver); else if (qca_is_wcn399x(soc_type)) { if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { snprintf(config.fwname, sizeof(config.fwname), diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index b884095bcd9d0..fc6cf314eb0ef 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -142,6 +142,7 @@ enum qca_btsoc_type { QCA_INVALID = -1, QCA_AR3002, QCA_ROME, + QCA_WCN3988, QCA_WCN3990, QCA_WCN3998, QCA_WCN3991, @@ -162,8 +163,15 @@ int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) { - return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 || - soc_type == QCA_WCN3998; + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + return true; + default: + return false; + } } static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) { diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index f217c2821b9fb..746eb096c037c 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1832,6 +1832,17 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; +static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = { + .soc_type = QCA_WCN3988, + .vregs = (struct qca_vreg []) { + { "vddio", 15000 }, + { "vddxo", 80000 }, + { "vddrf", 300000 }, + { "vddch0", 450000 }, + }, + .num_vregs = 4, +}; + static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { .soc_type = QCA_WCN3990, .vregs = (struct qca_vreg []) { @@ -2357,6 +2368,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,qca6174-bt" }, { .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390}, { .compatible = "qcom,qca9377-bt" }, + { .compatible = "qcom,wcn3988-bt", .data = &qca_soc_data_wcn3988}, { .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990}, { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, -- GitLab From fc47ed389a884ee4a5b01f62ecae8137b41f63d7 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 16 Aug 2023 10:06:47 +0200 Subject: [PATCH 0523/2290] Bluetooth: qca: use switch case for soc type behavior [ Upstream commit 691d54d0f7cb14baac1ff4af210d13c0e4897e27 ] Use switch/case to handle soc type specific behaviour, the permit dropping the qca_is_xxx() inline functions and make the code clearer and easier to update for new SoCs. Suggested-by: Konrad Dybcio Suggested-by: Luiz Augusto von Dentz Signed-off-by: Neil Armstrong Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 87 +++++++++----- drivers/bluetooth/btqca.h | 36 ------ drivers/bluetooth/hci_qca.c | 233 +++++++++++++++++++++++++++--------- 3 files changed, 236 insertions(+), 120 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index d775402b33df3..8331090af86ea 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -604,26 +604,34 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Download rampatch file */ config.type = TLV_TYPE_PATCH; - if (soc_type == QCA_WCN3988) { - snprintf(config.fwname, sizeof(config.fwname), - "qca/apbtfw%02x.tlv", rom_ver); - } else if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: snprintf(config.fwname, sizeof(config.fwname), "qca/crbtfw%02x.tlv", rom_ver); - } else if (soc_type == QCA_QCA6390) { + break; + case QCA_WCN3988: + snprintf(config.fwname, sizeof(config.fwname), + "qca/apbtfw%02x.tlv", rom_ver); + break; + case QCA_QCA6390: snprintf(config.fwname, sizeof(config.fwname), "qca/htbtfw%02x.tlv", rom_ver); - } else if (soc_type == QCA_WCN6750) { + break; + case QCA_WCN6750: /* Choose mbn file by default.If mbn file is not found * then choose tlv file */ config.type = ELF_TYPE_PATCH; snprintf(config.fwname, sizeof(config.fwname), "qca/msbtfw%02x.mbn", rom_ver); - } else if (soc_type == QCA_WCN6855) { + break; + case QCA_WCN6855: snprintf(config.fwname, sizeof(config.fwname), "qca/hpbtfw%02x.tlv", rom_ver); - } else { + break; + default: snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); } @@ -639,33 +647,44 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Download NVM configuration */ config.type = TLV_TYPE_NVM; - if (firmware_name) + if (firmware_name) { snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); - else if (soc_type == QCA_WCN3988) - snprintf(config.fwname, sizeof(config.fwname), - "qca/apnv%02x.bin", rom_ver); - else if (qca_is_wcn399x(soc_type)) { - if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { + } else { + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crnv%02xu.bin", rom_ver); + } else { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crnv%02x.bin", rom_ver); + } + break; + case QCA_WCN3988: snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02xu.bin", rom_ver); - } else { + "qca/apnv%02x.bin", rom_ver); + break; + case QCA_QCA6390: + snprintf(config.fwname, sizeof(config.fwname), + "qca/htnv%02x.bin", rom_ver); + break; + case QCA_WCN6750: + snprintf(config.fwname, sizeof(config.fwname), + "qca/msnv%02x.bin", rom_ver); + break; + case QCA_WCN6855: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpnv%02x.bin", rom_ver); + break; + + default: snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02x.bin", rom_ver); + "qca/nvm_%08x.bin", soc_ver); } } - else if (soc_type == QCA_QCA6390) - snprintf(config.fwname, sizeof(config.fwname), - "qca/htnv%02x.bin", rom_ver); - else if (soc_type == QCA_WCN6750) - snprintf(config.fwname, sizeof(config.fwname), - "qca/msnv%02x.bin", rom_ver); - else if (soc_type == QCA_WCN6855) - snprintf(config.fwname, sizeof(config.fwname), - "qca/hpnv%02x.bin", rom_ver); - else - snprintf(config.fwname, sizeof(config.fwname), - "qca/nvm_%08x.bin", soc_ver); err = qca_download_firmware(hdev, &config, soc_type, rom_ver); if (err < 0) { @@ -673,16 +692,24 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - if (soc_type >= QCA_WCN3991) { + switch (soc_type) { + case QCA_WCN3991: + case QCA_QCA6390: + case QCA_WCN6750: + case QCA_WCN6855: err = qca_disable_soc_logging(hdev); if (err < 0) return err; + break; + default: + break; } /* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the * VsMsftOpCode. */ switch (soc_type) { + case QCA_WCN3988: case QCA_WCN3990: case QCA_WCN3991: case QCA_WCN3998: diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index fc6cf314eb0ef..fe51c632d7720 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -161,27 +161,6 @@ int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); -static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) -{ - switch (soc_type) { - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: - return true; - default: - return false; - } -} -static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN6750; -} -static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN6855; -} - #else static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) @@ -209,21 +188,6 @@ static inline int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) return -EOPNOTSUPP; } -static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) -{ - return false; -} - -static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) -{ - return false; -} - -static inline bool qca_is_wcn6855(enum qca_btsoc_type soc_type) -{ - return false; -} - static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 746eb096c037c..e6ead996948a8 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -606,9 +606,18 @@ static int qca_open(struct hci_uart *hu) if (hu->serdev) { qcadev = serdev_device_get_drvdata(hu->serdev); - if (qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type)) + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: hu->init_speed = qcadev->init_speed; + break; + + default: + break; + } if (qcadev->oper_speed) hu->oper_speed = qcadev->oper_speed; @@ -1314,12 +1323,19 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); /* Give the controller time to process the request */ - if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu)) || - qca_is_wcn6855(qca_soc_type(hu))) + switch (qca_soc_type(hu)) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: usleep_range(1000, 10000); - else + break; + + default: msleep(300); + } return 0; } @@ -1392,13 +1408,19 @@ static unsigned int qca_get_speed(struct hci_uart *hu, static int qca_check_speeds(struct hci_uart *hu) { - if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu)) || - qca_is_wcn6855(qca_soc_type(hu))) { + switch (qca_soc_type(hu)) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; - } else { + break; + + default: if (!qca_get_speed(hu, QCA_INIT_SPEED) || !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1427,14 +1449,28 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) /* Disable flow control for wcn3990 to deassert RTS while * changing the baudrate of chip and host. */ - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: hci_uart_set_flow_control(hu, true); + break; - if (soc_type == QCA_WCN3990) { + default: + break; + } + + switch (soc_type) { + case QCA_WCN3990: reinit_completion(&qca->drop_ev_comp); set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); + break; + + default: + break; } qca_baudrate = qca_get_baudrate_value(speed); @@ -1446,12 +1482,22 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) host_set_baudrate(hu, speed); error: - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: hci_uart_set_flow_control(hu, false); + break; - if (soc_type == QCA_WCN3990) { + default: + break; + } + + switch (soc_type) { + case QCA_WCN3990: /* Wait for the controller to send the vendor event * for the baudrate change command. */ @@ -1463,6 +1509,10 @@ error: } clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); + break; + + default: + break; } } @@ -1624,12 +1674,20 @@ static int qca_regulator_init(struct hci_uart *hu) } } - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: /* Forcefully enable wcn399x to enter in to boot mode. */ host_set_baudrate(hu, 2400); ret = qca_send_power_pulse(hu, false); if (ret) return ret; + break; + + default: + break; } /* For wcn6750 need to enable gpio bt_en */ @@ -1646,10 +1704,18 @@ static int qca_regulator_init(struct hci_uart *hu) qca_set_speed(hu, QCA_INIT_SPEED); - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: ret = qca_send_power_pulse(hu, true); if (ret) return ret; + break; + + default: + break; } /* Now the device is in ready state to communicate with host. @@ -1683,11 +1749,17 @@ static int qca_power_on(struct hci_dev *hdev) if (!hu->serdev) return 0; - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: ret = qca_regulator_init(hu); - } else { + break; + + default: qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bt_en) { gpiod_set_value_cansleep(qcadev->bt_en, 1); @@ -1710,6 +1782,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + const char *soc_name; ret = qca_check_speeds(hu); if (ret) @@ -1724,10 +1797,26 @@ static int qca_setup(struct hci_uart *hu) */ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - bt_dev_info(hdev, "setting up %s", - qca_is_wcn399x(soc_type) ? "wcn399x" : - (soc_type == QCA_WCN6750) ? "wcn6750" : - (soc_type == QCA_WCN6855) ? "wcn6855" : "ROME/QCA6390"); + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + soc_name = "wcn399x"; + break; + + case QCA_WCN6750: + soc_name = "wcn6750"; + break; + + case QCA_WCN6855: + soc_name = "wcn6855"; + break; + + default: + soc_name = "ROME/QCA6390"; + } + bt_dev_info(hdev, "setting up %s", soc_name); qca->memdump_state = QCA_MEMDUMP_IDLE; @@ -1738,16 +1827,22 @@ retry: clear_bit(QCA_SSR_TRIGGERED, &qca->flags); - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) goto out; - } else { + break; + + default: qca_set_speed(hu, QCA_INIT_SPEED); } @@ -1761,9 +1856,16 @@ retry: qca_baudrate = qca_get_baudrate_value(speed); } - if (!(qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type) || - qca_is_wcn6855(soc_type))) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + break; + + default: /* Get QCA version information */ ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) @@ -1939,11 +2041,18 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else if (soc_type == QCA_WCN6750 || soc_type == QCA_WCN6855) { + break; + + case QCA_WCN6750: + case QCA_WCN6855: gpiod_set_value_cansleep(qcadev->bt_en, 0); msleep(100); qca_regulator_disable(qcadev); @@ -1951,7 +2060,9 @@ static void qca_power_shutdown(struct hci_uart *hu) sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl); bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state); } - } else if (qcadev->bt_en) { + break; + + default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } @@ -2076,11 +2187,18 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); - if (data && - (qca_is_wcn399x(data->soc_type) || - qca_is_wcn6750(data->soc_type) || - qca_is_wcn6855(data->soc_type))) { + if (data) qcadev->btsoc_type = data->soc_type; + else + qcadev->btsoc_type = QCA_ROME; + + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2124,12 +2242,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) BT_ERR("wcn3990 serdev registration failed"); return err; } - } else { - if (data) - qcadev->btsoc_type = data->soc_type; - else - qcadev->btsoc_type = QCA_ROME; + break; + default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR_OR_NULL(qcadev->bt_en)) { @@ -2185,13 +2300,23 @@ static void qca_serdev_remove(struct serdev_device *serdev) struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); struct qca_power *power = qcadev->bt_power; - if ((qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type) || - qca_is_wcn6855(qcadev->btsoc_type)) && - power->vregs_on) - qca_power_shutdown(&qcadev->serdev_hu); - else if (qcadev->susclk) - clk_disable_unprepare(qcadev->susclk); + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + if (power->vregs_on) { + qca_power_shutdown(&qcadev->serdev_hu); + break; + } + fallthrough; + + default: + if (qcadev->susclk) + clk_disable_unprepare(qcadev->susclk); + } hci_uart_unregister_device(&qcadev->serdev_hu); } -- GitLab From 67ffc334b92a96e65b627b6b3349c25946ff69f6 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 16 Aug 2023 10:06:48 +0200 Subject: [PATCH 0524/2290] Bluetooth: qca: add support for WCN7850 [ Upstream commit e0c1278ac89b0390fe9a74f673b6f25172292db2 ] Add support for the WCN7850 Bluetooth chipset. Tested on the SM8550 QRD platform. Signed-off-by: Neil Armstrong Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 10 ++++++++++ drivers/bluetooth/btqca.h | 1 + drivers/bluetooth/hci_qca.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 8331090af86ea..0211f704a358b 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -631,6 +631,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/hpbtfw%02x.tlv", rom_ver); break; + case QCA_WCN7850: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hmtbtfw%02x.tlv", rom_ver); + break; default: snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); @@ -679,6 +683,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/hpnv%02x.bin", rom_ver); break; + case QCA_WCN7850: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hmtnv%02x.bin", rom_ver); + break; default: snprintf(config.fwname, sizeof(config.fwname), @@ -697,6 +705,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, case QCA_QCA6390: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: err = qca_disable_soc_logging(hdev); if (err < 0) return err; @@ -731,6 +740,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, case QCA_WCN3991: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: /* get fw build info */ err = qca_read_fw_build_info(hdev); if (err < 0) diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index fe51c632d7720..03bff5c0059de 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -149,6 +149,7 @@ enum qca_btsoc_type { QCA_QCA6390, QCA_WCN6750, QCA_WCN6855, + QCA_WCN7850, }; #if IS_ENABLED(CONFIG_BT_QCA) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index e6ead996948a8..43abdaf92a0ed 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1330,6 +1330,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: usleep_range(1000, 10000); break; @@ -1415,6 +1416,7 @@ static int qca_check_speeds(struct hci_uart *hu) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1456,6 +1458,7 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: hci_uart_set_flow_control(hu, true); break; @@ -1489,6 +1492,7 @@ error: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: hci_uart_set_flow_control(hu, false); break; @@ -1756,6 +1760,7 @@ static int qca_power_on(struct hci_dev *hdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: ret = qca_regulator_init(hu); break; @@ -1813,6 +1818,10 @@ static int qca_setup(struct hci_uart *hu) soc_name = "wcn6855"; break; + case QCA_WCN7850: + soc_name = "wcn7850"; + break; + default: soc_name = "ROME/QCA6390"; } @@ -1834,6 +1843,7 @@ retry: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -1863,6 +1873,7 @@ retry: case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: break; default: @@ -2016,6 +2027,20 @@ static const struct qca_device_data qca_soc_data_wcn6855 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; +static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = { + .soc_type = QCA_WCN7850, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, + { "vddaon", 26000 }, + { "vdddig", 126000 }, + { "vddrfa0p8", 102000 }, + { "vddrfa1p2", 257000 }, + { "vddrfa1p9", 302000 }, + }, + .num_vregs = 6, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, +}; + static void qca_power_shutdown(struct hci_uart *hu) { struct qca_serdev *qcadev; @@ -2199,6 +2224,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2228,7 +2254,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) GPIOD_IN); if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || - data->soc_type == QCA_WCN6855)) + data->soc_type == QCA_WCN6855 || + data->soc_type == QCA_WCN7850)) dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); @@ -2307,6 +2334,7 @@ static void qca_serdev_remove(struct serdev_device *serdev) case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: + case QCA_WCN7850: if (power->vregs_on) { qca_power_shutdown(&qcadev->serdev_hu); break; @@ -2499,6 +2527,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, + { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); -- GitLab From 92b8a3273f3812ba441d2a842d602ff7d33362b3 Mon Sep 17 00:00:00 2001 From: Janaki Ramaiah Thota Date: Wed, 24 Jan 2024 20:00:42 +0530 Subject: [PATCH 0525/2290] Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT [ Upstream commit 7dcd3e014aa7faeeaf4047190b22d8a19a0db696 ] BT adapter going into UNCONFIGURED state during BT turn ON when devicetree has no local-bd-address node. Bluetooth will not work out of the box on such devices, to avoid this problem, added check to set HCI_QUIRK_USE_BDADDR_PROPERTY based on local-bd-address node entry. When this quirk is not set, the public Bluetooth address read by host from controller though HCI Read BD Address command is considered as valid. Fixes: e668eb1e1578 ("Bluetooth: hci_core: Don't stop BT if the BD address missing in dts") Signed-off-by: Janaki Ramaiah Thota Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 43abdaf92a0ed..8bfef7f81b417 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,6 +7,7 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -1844,7 +1845,17 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + /* Set BDA quirk bit for reading BDA value from fwnode property + * only if that property exist in DT. + */ + if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); + } else { + bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); + } + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); -- GitLab From ddf6ee3df30b694ac0a66b243245e5b89b6162e2 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Thu, 22 Feb 2024 10:33:08 +0000 Subject: [PATCH 0526/2290] netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate() [ Upstream commit 7e0f122c65912740327e4c54472acaa5f85868cb ] Commit d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") added some validation of NFPROTO_* families in the nft_compat module, but it broke the ability to use legacy iptables modules in dual-stack nftables. While with legacy iptables one had to independently manage IPv4 and IPv6 tables, with nftables it is possible to have dual-stack tables sharing the rules. Moreover, it was possible to use rules based on legacy iptables match/target modules in dual-stack nftables. As an example, the program from [2] creates an INET dual-stack family table using an xt_bpf based rule, which looks like the following (the actual output was generated with a patched nft tool as the current nft tool does not parse dual stack tables with legacy match rules, so consider it for illustrative purposes only): table inet testfw { chain input { type filter hook prerouting priority filter; policy accept; bytecode counter packets 0 bytes 0 accept } } After d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") we get EOPNOTSUPP for the above program. Fix this by allowing NFPROTO_INET for nft_(match/target)_validate(), but also restrict the functions to classic iptables hooks. Changes in v3: * clarify that upstream nft will not display such configuration properly and that the output was generated with a patched nft tool * remove example program from commit description and link to it instead * no code changes otherwise Changes in v2: * restrict nft_(match/target)_validate() to classic iptables hooks * rewrite example program to use unmodified libnftnl Fixes: d0009effa886 ("netfilter: nf_tables: validate NFPROTO_* family") Link: https://lore.kernel.org/all/Zc1PfoWN38UuFJRI@calendula/T/#mc947262582c90fec044c7a3398cc92fac7afea72 [1] Link: https://lore.kernel.org/all/20240220145509.53357-1-ignat@cloudflare.com/ [2] Reported-by: Jordan Griege Signed-off-by: Ignat Korchagin Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_compat.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index e1623fbf36548..e4b8c02c5e6ae 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -358,10 +358,20 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -607,10 +617,20 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); -- GitLab From b8afc22a1160121d108d7ea8496f133804d69b93 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Feb 2023 14:45:22 +0100 Subject: [PATCH 0527/2290] netfilter: let reset rules clean out conntrack entries [ Upstream commit 2954fe60e33da0f4de4d81a4c95c7dddb517d00c ] iptables/nftables support responding to tcp packets with tcp resets. The generated tcp reset packet passes through both output and postrouting netfilter hooks, but conntrack will never see them because the generated skb has its ->nfct pointer copied over from the packet that triggered the reset rule. If the reset rule is used for established connections, this may result in the conntrack entry to be around for a very long time (default timeout is 5 days). One way to avoid this would be to not copy the nf_conn pointer so that the rest packet passes through conntrack too. Problem is that output rules might not have the same conntrack zone setup as the prerouting ones, so its possible that the reset skb won't find the correct entry. Generating a template entry for the skb seems error prone as well. Add an explicit "closing" function that switches a confirmed conntrack entry to closed state and wire this up for tcp. If the entry isn't confirmed, no action is needed because the conntrack entry will never be committed to the table. Reported-by: Russel King Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Stable-dep-of: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") Signed-off-by: Sasha Levin --- include/linux/netfilter.h | 3 +++ include/net/netfilter/nf_conntrack.h | 8 ++++++ net/ipv4/netfilter/nf_reject_ipv4.c | 1 + net/ipv6/netfilter/nf_reject_ipv6.c | 1 + net/netfilter/core.c | 16 ++++++++++++ net/netfilter/nf_conntrack_core.c | 12 +++++++++ net/netfilter/nf_conntrack_proto_tcp.c | 35 ++++++++++++++++++++++++++ 7 files changed, 76 insertions(+) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bef8db9d6c085..c8e03bcaecaaa 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -437,11 +437,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #include void nf_ct_attach(struct sk_buff *, const struct sk_buff *); +void nf_ct_set_closing(struct nf_conntrack *nfct); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) @@ -459,6 +461,7 @@ struct nf_ct_hook { bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); + void (*set_closing)(struct nf_conntrack *nfct); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6a2019aaa4644..3dbf947285be2 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -125,6 +125,12 @@ struct nf_conn { union nf_conntrack_proto proto; }; +static inline struct nf_conn * +nf_ct_to_nf_conn(const struct nf_conntrack *nfct) +{ + return container_of(nfct, struct nf_conn, ct_general); +} + static inline struct nf_conn * nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) { @@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) void nf_ct_destroy(struct nf_conntrack *nfct); +void nf_conntrack_tcp_set_closing(struct nf_conn *ct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 4073762996e22..fc761915c5f6f 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -279,6 +279,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, goto free_nskb; nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* If we use ip_local_out for bridged traffic, the MAC source on diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 433d98bbe33f7..71d692728230e 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -344,6 +344,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* If we use ip6_local_out for bridged traffic, the MAC source on diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 55a7f72d547cd..edf92074221e2 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -707,6 +707,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) } EXPORT_SYMBOL(nf_conntrack_destroy); +void nf_ct_set_closing(struct nf_conntrack *nfct) +{ + const struct nf_ct_hook *ct_hook; + + if (!nfct) + return; + + rcu_read_lock(); + ct_hook = rcu_dereference(nf_ct_hook); + if (ct_hook) + ct_hook->set_closing(nfct); + + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nf_ct_set_closing); + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7960262966094..6d30c64a5fe86 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2772,11 +2772,23 @@ err_cachep: return ret; } +static void nf_conntrack_set_closing(struct nf_conntrack *nfct) +{ + struct nf_conn *ct = nf_ct_to_nf_conn(nfct); + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + nf_conntrack_tcp_set_closing(ct); + break; + } +} + static const struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, .destroy = nf_ct_destroy, .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, + .set_closing = nf_conntrack_set_closing, }; void nf_conntrack_init_end(void) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e0092bf273fd0..9480e638e5d15 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -913,6 +913,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct) return false; } +void nf_conntrack_tcp_set_closing(struct nf_conn *ct) +{ + enum tcp_conntrack old_state; + const unsigned int *timeouts; + u32 timeout; + + if (!nf_ct_is_confirmed(ct)) + return; + + spin_lock_bh(&ct->lock); + old_state = ct->proto.tcp.state; + ct->proto.tcp.state = TCP_CONNTRACK_CLOSE; + + if (old_state == TCP_CONNTRACK_CLOSE || + test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { + spin_unlock_bh(&ct->lock); + return; + } + + timeouts = nf_ct_timeout_lookup(ct); + if (!timeouts) { + const struct nf_tcp_net *tn; + + tn = nf_tcp_pernet(nf_ct_net(ct)); + timeouts = tn->timeouts; + } + + timeout = timeouts[TCP_CONNTRACK_CLOSE]; + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); + + spin_unlock_bh(&ct->lock); + + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); +} + static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) { state->td_end = 0; -- GitLab From 2b1414d5e94e477edff1d2c79030f1d742625ea0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2024 16:17:51 +0100 Subject: [PATCH 0528/2290] netfilter: bridge: confirm multicast packets before passing them up the stack [ Upstream commit 62e7151ae3eb465e0ab52a20c941ff33bb6332e9 ] conntrack nf_confirm logic cannot handle cloned skbs referencing the same nf_conn entry, which will happen for multicast (broadcast) frames on bridges. Example: macvlan0 | br0 / \ ethX ethY ethX (or Y) receives a L2 multicast or broadcast packet containing an IP packet, flow is not yet in conntrack table. 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. -> skb->_nfct now references a unconfirmed entry 2. skb is broad/mcast packet. bridge now passes clones out on each bridge interface. 3. skb gets passed up the stack. 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb and schedules a work queue to send them out on the lower devices. The clone skb->_nfct is not a copy, it is the same entry as the original skb. The macvlan rx handler then returns RX_HANDLER_PASS. 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. The Macvlan broadcast worker and normal confirm path will race. This race will not happen if step 2 already confirmed a clone. In that case later steps perform skb_clone() with skb->_nfct already confirmed (in hash table). This works fine. But such confirmation won't happen when eb/ip/nftables rules dropped the packets before they reached the nf_confirm step in postrouting. Pablo points out that nf_conntrack_bridge doesn't allow use of stateful nat, so we can safely discard the nf_conn entry and let inet call conntrack again. This doesn't work for bridge netfilter: skb could have a nat transformation. Also bridge nf prevents re-invocation of inet prerouting via 'sabotage_in' hook. Work around this problem by explicit confirmation of the entry at LOCAL_IN time, before upper layer has a chance to clone the unconfirmed entry. The downside is that this disables NAT and conntrack helpers. Alternative fix would be to add locking to all code parts that deal with unconfirmed packets, but even if that could be done in a sane way this opens up other problems, for example: -m physdev --physdev-out eth0 -j SNAT --snat-to 1.2.3.4 -m physdev --physdev-out eth1 -j SNAT --snat-to 1.2.3.5 For multicast case, only one of such conflicting mappings will be created, conntrack only handles 1:1 NAT mappings. Users should set create a setup that explicitly marks such traffic NOTRACK (conntrack bypass) to avoid this, but we cannot auto-bypass them, ruleset might have accept rules for untracked traffic already, so user-visible behaviour would change. Suggested-by: Pablo Neira Ayuso Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217777 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/linux/netfilter.h | 1 + net/bridge/br_netfilter_hooks.c | 96 ++++++++++++++++++++++ net/bridge/netfilter/nf_conntrack_bridge.c | 30 +++++++ net/netfilter/nf_conntrack_core.c | 1 + 4 files changed, 128 insertions(+) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c8e03bcaecaaa..e5f4b6f8d1c09 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -462,6 +462,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 202ad43e35d6b..bff48d5763635 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -43,6 +43,10 @@ #include #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + static unsigned int brnf_net_id __read_mostly; struct brnf_net { @@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_STOLEN; } +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +/* conntracks' nf_confirm logic cannot handle cloned skbs referencing + * the same nf_conn entry, which will happen for multicast (broadcast) + * Frames on bridges. + * + * Example: + * macvlan0 + * br0 + * ethX ethY + * + * ethX (or Y) receives multicast or broadcast packet containing + * an IP packet, not yet in conntrack table. + * + * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. + * -> skb->_nfct now references a unconfirmed entry + * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge + * interface. + * 3. skb gets passed up the stack. + * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb + * and schedules a work queue to send them out on the lower devices. + * + * The clone skb->_nfct is not a copy, it is the same entry as the + * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. + * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. + * + * The Macvlan broadcast worker and normal confirm path will race. + * + * This race will not happen if step 2 already confirmed a clone. In that + * case later steps perform skb_clone() with skb->_nfct already confirmed (in + * hash table). This works fine. + * + * But such confirmation won't happen when eb/ip/nftables rules dropped the + * packets before they reached the nf_confirm step in postrouting. + * + * Work around this problem by explicit confirmation of the entry at + * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed + * entry. + * + */ +static unsigned int br_nf_local_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conntrack *nfct = skb_nfct(skb); + const struct nf_ct_hook *ct_hook; + struct nf_conn *ct; + int ret; + + if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + ct = container_of(nfct, struct nf_conn, ct_general); + if (likely(nf_ct_is_confirmed(ct))) + return NF_ACCEPT; + + WARN_ON_ONCE(skb_shared(skb)); + WARN_ON_ONCE(refcount_read(&nfct->use) != 1); + + /* We can't call nf_confirm here, it would create a dependency + * on nf_conntrack module. + */ + ct_hook = rcu_dereference(nf_ct_hook); + if (!ct_hook) { + skb->_nfct = 0ul; + nf_conntrack_put(nfct); + return NF_ACCEPT; + } + + nf_bridge_pull_encap_header(skb); + ret = ct_hook->confirm(skb); + switch (ret & NF_VERDICT_MASK) { + case NF_STOLEN: + return NF_STOLEN; + default: + nf_bridge_push_encap_header(skb); + break; + } + + ct = container_of(nfct, struct nf_conn, ct_general); + WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); + + return ret; +} +#endif /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -962,6 +1050,14 @@ static const struct nf_hook_ops br_nf_ops[] = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + { + .hook = br_nf_local_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_LAST, + }, +#endif { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 06d94b2c6b5de..c7c27ada67044 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, return nf_conntrack_in(skb, &bridge_state); } +static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + if (skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + /* nf_conntrack_confirm() cannot handle concurrent clones, + * this happens for broad/multicast frames with e.g. macvlan on top + * of the bridge device. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + return NF_ACCEPT; + + /* let inet prerouting call conntrack again */ + skb->_nfct = 0; + nf_ct_put(ct); + + return NF_ACCEPT; +} + static void nf_ct_bridge_frag_save(struct sk_buff *skb, struct nf_bridge_frag_data *data) { @@ -415,6 +439,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK, }, + { + .hook = nf_ct_bridge_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, { .hook = nf_ct_bridge_post, .pf = NFPROTO_BRIDGE, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6d30c64a5fe86..024f93fc8c0bb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2789,6 +2789,7 @@ static const struct nf_ct_hook nf_conntrack_hook = { .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, .set_closing = nf_conntrack_set_closing, + .confirm = __nf_conntrack_confirm, }; void nf_conntrack_init_end(void) -- GitLab From f2261eb994aa5757c1da046b78e3229a3ece0ad9 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 27 Feb 2024 20:11:28 +0800 Subject: [PATCH 0529/2290] rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back [ Upstream commit 743ad091fb46e622f1b690385bb15e3cd3daf874 ] In the commit d73ef2d69c0d ("rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length"), an adjustment was made to the old loop logic in the function `rtnl_bridge_setlink` to enable the loop to also check the length of the IFLA_BRIDGE_MODE attribute. However, this adjustment removed the `break` statement and led to an error logic of the flags writing back at the end of this function. if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); // attr should point to IFLA_BRIDGE_FLAGS NLA !!! Before the mentioned commit, the `attr` is granted to be IFLA_BRIDGE_FLAGS. However, this is not necessarily true fow now as the updated loop will let the attr point to the last NLA, even an invalid NLA which could cause overflow writes. This patch introduces a new variable `br_flag` to save the NLA pointer that points to IFLA_BRIDGE_FLAGS and uses it to resolve the mentioned error logic. Fixes: d73ef2d69c0d ("rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length") Signed-off-by: Lin Ma Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240227121128.608110-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7cf1e42d7f93b..ac379e4590f8d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5026,10 +5026,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - struct nlattr *br_spec, *attr = NULL; + struct nlattr *br_spec, *attr, *br_flags_attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; - bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -5047,11 +5046,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; - have_flags = true; + br_flags_attr = attr; flags = nla_get_u16(attr); } @@ -5095,8 +5094,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - if (have_flags) - memcpy(nla_data(attr), &flags, sizeof(flags)); + if (br_flags_attr) + memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); out: return err; } -- GitLab From a0222b48175709b8a66e5f373d17a10ca5659cc8 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 27 Feb 2024 10:49:41 -0800 Subject: [PATCH 0530/2290] igb: extend PTP timestamp adjustments to i211 [ Upstream commit 0bb7b09392eb74b152719ae87b1ba5e4bf910ef0 ] The i211 requires the same PTP timestamp adjustments as the i210, according to its datasheet. To ensure consistent timestamping across different platforms, this change extends the existing adjustments to include the i211. The adjustment result are tested and comparable for i210 and i211 based systems. Fixes: 3f544d2a4d5c ("igb: adjust PTP timestamps for Tx/Rx latency") Signed-off-by: Oleksij Rempel Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240227184942.362710-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 07171e574e7d7..36e62197fba0b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -976,7 +976,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); /* adjust timestamp for the TX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_TX_LATENCY_10; @@ -1022,6 +1022,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, ktime_t *timestamp) { struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; struct skb_shared_hwtstamps ts; __le64 *regval = (__le64 *)va; int adjust = 0; @@ -1041,7 +1042,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_RX_LATENCY_10; -- GitLab From 7d4121b40149aed0698c7b82384c5c069da91836 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Wed, 28 Feb 2024 09:56:44 +0100 Subject: [PATCH 0531/2290] net: hsr: Use correct offset for HSR TLV values in supervisory HSR frames [ Upstream commit 51dd4ee0372228ffb0f7709fa7aa0678d4199d06 ] Current HSR implementation uses following supervisory frame (even for HSRv1 the HSR tag is not is not present): 00000000: 01 15 4e 00 01 2d XX YY ZZ 94 77 10 88 fb 00 01 00000010: 7e 1c 17 06 XX YY ZZ 94 77 10 1e 06 XX YY ZZ 94 00000020: 77 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 The current code adds extra two bytes (i.e. sizeof(struct hsr_sup_tlv)) when offset for skb_pull() is calculated. This is wrong, as both 'struct hsrv1_ethhdr_sp' and 'hsrv0_ethhdr_sp' already have 'struct hsr_sup_tag' defined in them, so there is no need for adding extra two bytes. This code was working correctly as with no RedBox support, the check for HSR_TLV_EOT (0x00) was off by two bytes, which were corresponding to zeroed padded bytes for minimal packet size. Fixes: eafaa88b3eb7 ("net: hsr: Add support for redbox supervision frames") Signed-off-by: Lukasz Majewski Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240228085644.3618044-1-lukma@denx.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/hsr/hsr_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 80cdc6f6b34c9..0323ab5023c69 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) return false; /* Get next tlv */ - total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length; + total_length += hsr_sup_tag->tlv.HSR_TLV_length; if (!pskb_may_pull(skb, total_length)) return false; skb_pull(skb, total_length); -- GitLab From 40f0f326cfe6847faaa409f4883b94fcdda468ab Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 28 Feb 2024 23:43:57 +0100 Subject: [PATCH 0532/2290] tls: decrement decrypt_pending if no async completion will be called [ Upstream commit f7fa16d49837f947ee59492958f9e6f0e51d9a78 ] With mixed sync/async decryption, or failures of crypto_aead_decrypt, we increment decrypt_pending but we never do the corresponding decrement since tls_decrypt_done will not be called. In this case, we should decrement decrypt_pending immediately to avoid getting stuck. For example, the prequeue prequeue test gets stuck with mixed modes (one async decrypt + one sync decrypt). Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/c56d5fc35543891d5319f834f25622360e1bfbec.1709132643.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 93e1bfa72d791..c6ad435a44218 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -273,6 +273,8 @@ static int tls_do_decryption(struct sock *sk, return 0; ret = crypto_wait_req(ret, &ctx->async_wait); + } else if (darg->async) { + atomic_dec(&ctx->decrypt_pending); } darg->async = false; -- GitLab From 08562ca971ff6d4d30ef7eb3fe932f8bf9dcd841 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 28 Feb 2024 23:43:58 +0100 Subject: [PATCH 0533/2290] tls: fix peeking with sync+async decryption [ Upstream commit 6caaf104423d809b49a67ee6500191d063b40dc6 ] If we peek from 2 records with a currently empty rx_list, and the first record is decrypted synchronously but the second record is decrypted async, the following happens: 1. decrypt record 1 (sync) 2. copy from record 1 to the userspace's msg 3. queue the decrypted record to rx_list for future read(!PEEK) 4. decrypt record 2 (async) 5. queue record 2 to rx_list 6. call process_rx_list to copy data from the 2nd record We currently pass copied=0 as skip offset to process_rx_list, so we end up copying once again from the first record. We should skip over the data we've already copied. Seen with selftest tls.12_aes_gcm.recv_peek_large_buf_mult_recs Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/1b132d2b2b99296bfde54e8a67672d90d6d16e71.1709132643.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c6ad435a44218..2bd27b77769cb 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2023,6 +2023,7 @@ int tls_sw_recvmsg(struct sock *sk, struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; + ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); @@ -2170,8 +2171,10 @@ put_on_rx_list: if (err < 0) goto put_on_rx_list_err; - if (is_peek) + if (is_peek) { + peeked += chunk; goto put_on_rx_list; + } if (partially_consumed) { rxm->offset += chunk; @@ -2210,8 +2213,8 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek, NULL); + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); -- GitLab From ddc547dd05a46720866c32022300f7376c40119f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:24:40 +0100 Subject: [PATCH 0534/2290] efi/capsule-loader: fix incorrect allocation size [ Upstream commit fccfa646ef3628097d59f7d9c1a3e84d4b6bb45e ] gcc-14 notices that the allocation with sizeof(void) on 32-bit architectures is not enough for a 64-bit phys_addr_t: drivers/firmware/efi/capsule-loader.c: In function 'efi_capsule_open': drivers/firmware/efi/capsule-loader.c:295:24: error: allocation of insufficient size '4' for type 'phys_addr_t' {aka 'long long unsigned int'} with size '8' [-Werror=alloc-size] 295 | cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); | ^ Use the correct type instead here. Fixes: f24c4d478013 ("efi/capsule-loader: Reinstate virtual capsule mapping") Signed-off-by: Arnd Bergmann Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/capsule-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 3e8d4b51a8140..97bafb5f70389 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } - cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); if (!cap_info->phys) { kfree(cap_info->pages); kfree(cap_info); -- GitLab From cefe18e9ec84f8fe3e198ccebb815cc996eb9797 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Feb 2024 16:51:33 +0100 Subject: [PATCH 0535/2290] power: supply: bq27xxx-i2c: Do not free non existing IRQ [ Upstream commit 2df70149e73e79783bcbc7db4fa51ecef0e2022c ] The bq27xxx i2c-client may not have an IRQ, in which case client->irq will be 0. bq27xxx_battery_i2c_probe() already has an if (client->irq) check wrapping the request_threaded_irq(). But bq27xxx_battery_i2c_remove() unconditionally calls free_irq(client->irq) leading to: [ 190.310742] ------------[ cut here ]------------ [ 190.310843] Trying to free already-free IRQ 0 [ 190.310861] WARNING: CPU: 2 PID: 1304 at kernel/irq/manage.c:1893 free_irq+0x1b8/0x310 Followed by a backtrace when unbinding the driver. Add an if (client->irq) to bq27xxx_battery_i2c_remove() mirroring probe() to fix this. Fixes: 444ff00734f3 ("power: supply: bq27xxx: Fix I2C IRQ race on remove") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240215155133.70537-1-hdegoede@redhat.com Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 0713a52a25107..17b37354e32c0 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client) { struct bq27xxx_device_info *di = i2c_get_clientdata(client); - free_irq(client->irq, di); + if (client->irq) + free_irq(client->irq, di); + bq27xxx_battery_teardown(di); mutex_lock(&battery_mutex); -- GitLab From 7f8644b6a86d45c9f8240734b161896a09069fe5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 21 Feb 2024 10:21:56 +0100 Subject: [PATCH 0536/2290] ALSA: Drop leftover snd-rtctimer stuff from Makefile [ Upstream commit 4df49712eb54141be00a9312547436d55677f092 ] We forgot to remove the line for snd-rtctimer from Makefile while dropping the functionality. Get rid of the stale line. Fixes: 34ce71a96dcb ("ALSA: timer: remove legacy rtctimer") Link: https://lore.kernel.org/r/20240221092156.28695-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/core/Makefile b/sound/core/Makefile index 2762f03d9b7bc..a7a1590b29526 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -30,7 +30,6 @@ snd-ctl-led-objs := control_led.o snd-rawmidi-objs := rawmidi.o snd-timer-objs := timer.o snd-hrtimer-objs := hrtimer.o -snd-rtctimer-objs := rtctimer.o snd-hwdep-objs := hwdep.o snd-seq-device-objs := seq_device.o -- GitLab From d36b9a1b4e5214abaf864afde5617b021b5cb588 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 23 Feb 2024 16:03:33 +0100 Subject: [PATCH 0537/2290] drm/tegra: Remove existing framebuffer only if we support display [ Upstream commit 86bf8cfda6d2a6720fa2e6e676c98f0882c9d3d7 ] Tegra DRM doesn't support display on Tegra234 and later, so make sure not to remove any existing framebuffers in that case. v2: - add comments explaining how this situation can come about - clear DRIVER_MODESET and DRIVER_ATOMIC feature bits Fixes: 6848c291a54f ("drm/aperture: Convert drivers to aperture interfaces") Signed-off-by: Thierry Reding Reviewed-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240223150333.1401582-1-thierry.reding@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/drm.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 5fc55b9777cbf..6806779f8ecce 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1252,9 +1252,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = tegra_drm_fb_init(drm); if (err < 0) -- GitLab From 2f91a96b892fab2f2543b4a55740c5bee36b1a6b Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Feb 2024 12:44:11 +0100 Subject: [PATCH 0538/2290] fbcon: always restore the old font data in fbcon_do_set_font() [ Upstream commit 00d6a284fcf3fad1b7e1b5bc3cd87cbfb60ce03f ] Commit a5a923038d70 (fbdev: fbcon: Properly revert changes when vc_resize() failed) started restoring old font data upon failure (of vc_resize()). But it performs so only for user fonts. It means that the "system"/internal fonts are not restored at all. So in result, the very first call to fbcon_do_set_font() performs no restore at all upon failing vc_resize(). This can be reproduced by Syzkaller to crash the system on the next invocation of font_get(). It's rather hard to hit the allocation failure in vc_resize() on the first font_set(), but not impossible. Esp. if fault injection is used to aid the execution/failure. It was demonstrated by Sirius: BUG: unable to handle page fault for address: fffffffffffffff8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD cb7b067 P4D cb7b067 PUD cb7d067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8007 Comm: poc Not tainted 6.7.0-g9d1694dc91ce #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:fbcon_get_font+0x229/0x800 drivers/video/fbdev/core/fbcon.c:2286 Call Trace: con_font_get drivers/tty/vt/vt.c:4558 [inline] con_font_op+0x1fc/0xf20 drivers/tty/vt/vt.c:4673 vt_k_ioctl drivers/tty/vt/vt_ioctl.c:474 [inline] vt_ioctl+0x632/0x2ec0 drivers/tty/vt/vt_ioctl.c:752 tty_ioctl+0x6f8/0x1570 drivers/tty/tty_io.c:2803 vfs_ioctl fs/ioctl.c:51 [inline] ... So restore the font data in any case, not only for user fonts. Note the later 'if' is now protected by 'old_userfont' and not 'old_data' as the latter is always set now. (And it is supposed to be non-NULL. Otherwise we would see the bug above again.) Signed-off-by: Jiri Slaby (SUSE) Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed") Reported-and-tested-by: Ubisectech Sirius Cc: Ubisectech Sirius Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240208114411.14604-1-jirislaby@kernel.org Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbcon.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index fa205be94a4b8..14498a0d13e0b 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2397,11 +2397,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; int resize, ret, old_userfont, old_width, old_height, old_charcount; - char *old_data = NULL; + u8 *old_data = vc->vc_font.data; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); old_userfont = p->userfont; if ((p->userfont = userfont)) @@ -2435,13 +2433,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, update_screen(vc); } - if (old_data && (--REFCOUNT(old_data) == 0)) + if (old_userfont && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; err_out: p->fontdata = old_data; - vc->vc_font.data = (void *)old_data; + vc->vc_font.data = old_data; if (userfont) { p->userfont = old_userfont; -- GitLab From 058ed71e0f7aa3b6694ca357e23d084e5d3f2470 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Feb 2024 13:15:02 +0000 Subject: [PATCH 0539/2290] afs: Fix endless loop in directory parsing [ Upstream commit 5f7a07646655fb4108da527565dcdc80124b14c4 ] If a directory has a block with only ".__afsXXXX" files in it (from uncompleted silly-rename), these .__afsXXXX files are skipped but without advancing the file position in the dir_context. This leads to afs_dir_iterate() repeating the block again and again. Fix this by making the code that skips the .__afsXXXX file also manually advance the file position. The symptoms are a soft lookup: watchdog: BUG: soft lockup - CPU#3 stuck for 52s! [check:5737] ... RIP: 0010:afs_dir_iterate_block+0x39/0x1fd ... ? watchdog_timer_fn+0x1a6/0x213 ... ? asm_sysvec_apic_timer_interrupt+0x16/0x20 ? afs_dir_iterate_block+0x39/0x1fd afs_dir_iterate+0x10a/0x148 afs_readdir+0x30/0x4a iterate_dir+0x93/0xd3 __do_sys_getdents64+0x6b/0xd4 This is almost certainly the actual fix for: https://bugzilla.kernel.org/show_bug.cgi?id=218496 Fixes: 57e9d49c5452 ("afs: Hide silly-rename files from userspace") Signed-off-by: David Howells Link: https://lore.kernel.org/r/786185.1708694102@warthog.procyon.org.uk Reviewed-by: Marc Dionne cc: Marc Dionne cc: Markus Suvanto cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/afs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index cf811b77ee671..6e2c967fae6fc 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -478,8 +478,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, dire->u.name[0] == '.' && ctx->actor != afs_lookup_filldir && ctx->actor != afs_lookup_one_filldir && - memcmp(dire->u.name, ".__afs", 6) == 0) + memcmp(dire->u.name, ".__afs", 6) == 0) { + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); continue; + } /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, -- GitLab From 8310080799b40fd9f2a8b808c657269678c149af Mon Sep 17 00:00:00 2001 From: Dimitris Vlachos Date: Thu, 29 Feb 2024 21:17:23 +0200 Subject: [PATCH 0540/2290] riscv: Sparse-Memory/vmemmap out-of-bounds fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a11dd49dcb9376776193e15641f84fcc1e5980c9 ] Offset vmemmap so that the first page of vmemmap will be mapped to the first page of physical memory in order to ensure that vmemmap’s bounds will be respected during pfn_to_page()/page_to_pfn() operations. The conversion macros will produce correct SV39/48/57 addresses for every possible/valid DRAM_BASE inside the physical memory limits. v2:Address Alex's comments Suggested-by: Alexandre Ghiti Signed-off-by: Dimitris Vlachos Reported-by: Dimitris Vlachos Closes: https://lore.kernel.org/linux-riscv/20240202135030.42265-1-csd4492@csd.uoc.gr Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240229191723.32779-1-dvlachos@ics.forth.gr Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 59bb53da473dd..63055c6ad2c25 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -79,7 +79,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START) +#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START -- GitLab From 8f626221e5fa89134515d358e7d614609b612a5c Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 23 Feb 2024 21:24:35 -0800 Subject: [PATCH 0541/2290] of: property: fw_devlink: Fix stupid bug in remote-endpoint parsing [ Upstream commit 7cb50f6c9fbaa1c0b80100b8971bf13db5d75d06 ] Introduced a stupid bug in commit 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property") due to a last minute incorrect edit of "index !=0" into "!index". This patch fixes it to be "index > 0" to match the comment right next to it. Reported-by: Luca Ceresoli Link: https://lore.kernel.org/lkml/20240223171849.10f9901d@booty/ Fixes: 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property") Signed-off-by: Saravana Kannan Reviewed-by: Herve Codina Reviewed-by: Luca Ceresoli Tested-by: Luca Ceresoli Link: https://lore.kernel.org/r/20240224052436.3552333-1-saravanak@google.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 33d5f16c81204..da5d712197704 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1332,7 +1332,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ - if (!index || strcmp(prop_name, "remote-endpoint")) + if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); -- GitLab From 3bfe04c1273d30b866f4c7c238331ed3b08e5824 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 1 Mar 2024 22:04:06 +0900 Subject: [PATCH 0542/2290] tomoyo: fix UAF write bug in tomoyo_write_control() commit 2f03fc340cac9ea1dc63cbf8c93dd2eb0f227815 upstream. Since tomoyo_write_control() updates head->write_buf when write() of long lines is requested, we need to fetch head->write_buf after head->io_sem is held. Otherwise, concurrent write() requests can cause use-after-free-write and double-free problems. Reported-by: Sam Sun Closes: https://lkml.kernel.org/r/CAEkJfYNDspuGxYx5kym8Lvp--D36CMDUErg4rxfWFJuPbbji8g@mail.gmail.com Fixes: bd03a3e4c9a9 ("TOMOYO: Add policy namespace support.") Cc: # Linux 3.1+ Signed-off-by: Tetsuo Handa Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- security/tomoyo/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index f4cd9b58b2054..a7af085550b2d 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -2648,13 +2648,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, { int error = buffer_len; size_t avail_len = buffer_len; - char *cp0 = head->write_buf; + char *cp0; int idx; if (!head->write) return -EINVAL; if (mutex_lock_interruptible(&head->io_sem)) return -EINTR; + cp0 = head->write_buf; head->read_user_buf_avail = 0; idx = tomoyo_read_lock(); /* Read a line and dispatch it to the policy handler. */ -- GitLab From 8cec41a35065dcfcca5a2337f4edd56dadd1425c Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 18 Feb 2024 12:30:26 +0900 Subject: [PATCH 0543/2290] ALSA: firewire-lib: fix to check cycle continuity commit 77ce96543b03f437c6b45f286d8110db2b6622a3 upstream. The local helper function to compare the given pair of cycle count evaluates them. If the left value is less than the right value, the function returns negative value. If the safe cycle is less than the current cycle, it is the case of cycle lost. However, it is not currently handled properly. This commit fixes the bug. Cc: Fixes: 705794c53b00 ("ALSA: firewire-lib: check cycle continuity") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20240218033026.72577-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/amdtp-stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 9be2260e4ca2d..f8b644cb9157a 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -934,7 +934,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s, // to the reason. unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle, IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES); - lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0); + lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0); } if (lost) { dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n", -- GitLab From 4cbbc2f0dbe22498e290997c52f088413d6b9ad5 Mon Sep 17 00:00:00 2001 From: Hans Peter Date: Mon, 19 Feb 2024 17:38:49 +0100 Subject: [PATCH 0544/2290] ALSA: hda/realtek: Enable Mute LED on HP 840 G8 (MB 8AB8) commit 1fdf4e8be7059e7784fec11d30cd32784f0bdc83 upstream. On my EliteBook 840 G8 Notebook PC (ProdId 5S7R6EC#ABD; built 2022 for german market) the Mute LED is always on. The mute button itself works as expected. alsa-info.sh shows a different subsystem-id 0x8ab9 for Realtek ALC285 Codec, thus the existing quirks for HP 840 G8 don't work. Therefore, add a new quirk for this type of EliteBook. Signed-off-by: Hans Peter Cc: Link: https://lore.kernel.org/r/20240219164518.4099-1-flurry123@gmx.ch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 92a656fb53212..53fe38a1b5da3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9687,6 +9687,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), -- GitLab From fd3289ab8ed1f8a2f6e3593adf39bb610fbc17a5 Mon Sep 17 00:00:00 2001 From: Eniac Zhang Date: Tue, 20 Feb 2024 17:58:12 +0000 Subject: [PATCH 0545/2290] ALSA: hda/realtek: fix mute/micmute LED For HP mt440 commit 67c3d7717efbd46092f217b1f811df1b205cce06 upstream. The HP mt440 Thin Client uses an ALC236 codec and needs the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make the mute and micmute LEDs work. There are two variants of the USB-C PD chip on this device. Each uses a different BIOS and board ID, hence the two entries. Signed-off-by: Eniac Zhang Signed-off-by: Alexandru Gagniuc Cc: Link: https://lore.kernel.org/r/20240220175812.782687-1-alexandru.gagniuc@hp.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 53fe38a1b5da3..75bd7b2fa4ee6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9662,6 +9662,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), @@ -9693,6 +9694,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), + SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), -- GitLab From 59ed284c7bff4da0f6cafd05ca15de1c0ae1d087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Mon, 19 Feb 2024 20:03:45 +0100 Subject: [PATCH 0546/2290] landlock: Fix asymmetric private inodes referring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d9818b3e906a0ee1ab02ea79e74a2f755fc5461a upstream. When linking or renaming a file, if only one of the source or destination directory is backed by an S_PRIVATE inode, then the related set of layer masks would be used as uninitialized by is_access_to_paths_allowed(). This would result to indeterministic access for one side instead of always being allowed. This bug could only be triggered with a mounted filesystem containing both S_PRIVATE and !S_PRIVATE inodes, which doesn't seem possible. The collect_domain_accesses() calls return early if is_nouser_or_private() returns false, which means that the directory's superblock has SB_NOUSER or its inode has S_PRIVATE. Because rename or link actions are only allowed on the same mounted filesystem, the superblock is always the same for both source and destination directories. However, it might be possible in theory to have an S_PRIVATE parent source inode with an !S_PRIVATE parent destination inode, or vice versa. To make sure this case is not an issue, explicitly initialized both set of layer masks to 0, which means to allow all actions on the related side. If at least on side has !S_PRIVATE, then collect_domain_accesses() and is_access_to_paths_allowed() check for the required access rights. Cc: Arnd Bergmann Cc: Christian Brauner Cc: Günther Noack Cc: Jann Horn Cc: Shervin Oloumi Cc: stable@vger.kernel.org Fixes: b91c3e4ea756 ("landlock: Add support for file reparenting with LANDLOCK_ACCESS_FS_REFER") Link: https://lore.kernel.org/r/20240219190345.2928627-1-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Greg Kroah-Hartman --- security/landlock/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 64ed7665455fe..d328965f32f7f 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -824,8 +824,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; - layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], - layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; + layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, + layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; if (!dom) return 0; -- GitLab From abd32d7f5c0294c1b2454c5a3b13b18446bac627 Mon Sep 17 00:00:00 2001 From: Alexander Ofitserov Date: Wed, 28 Feb 2024 14:47:03 +0300 Subject: [PATCH 0547/2290] gtp: fix use-after-free and null-ptr-deref in gtp_newlink() commit 616d82c3cfa2a2146dd7e3ae47bda7e877ee549e upstream. The gtp_link_ops operations structure for the subsystem must be registered after registering the gtp_net_ops pernet operations structure. Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: [ 1010.702740] gtp: GTP module unloaded [ 1010.715877] general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] SMP KASAN NOPTI [ 1010.715888] KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] [ 1010.715895] CPU: 1 PID: 128616 Comm: a.out Not tainted 6.8.0-rc6-std-def-alt1 #1 [ 1010.715899] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 [ 1010.715908] RIP: 0010:gtp_newlink+0x4d7/0x9c0 [gtp] [ 1010.715915] Code: 80 3c 02 00 0f 85 41 04 00 00 48 8b bb d8 05 00 00 e8 ed f6 ff ff 48 89 c2 48 89 c5 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 4f 04 00 00 4c 89 e2 4c 8b 6d 00 48 b8 00 00 00 [ 1010.715920] RSP: 0018:ffff888020fbf180 EFLAGS: 00010203 [ 1010.715929] RAX: dffffc0000000000 RBX: ffff88800399c000 RCX: 0000000000000000 [ 1010.715933] RDX: 0000000000000001 RSI: ffffffff84805280 RDI: 0000000000000282 [ 1010.715938] RBP: 000000000000000d R08: 0000000000000001 R09: 0000000000000000 [ 1010.715942] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88800399cc80 [ 1010.715947] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000400 [ 1010.715953] FS: 00007fd1509ab5c0(0000) GS:ffff88805b300000(0000) knlGS:0000000000000000 [ 1010.715958] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1010.715962] CR2: 0000000000000000 CR3: 000000001c07a000 CR4: 0000000000750ee0 [ 1010.715968] PKRU: 55555554 [ 1010.715972] Call Trace: [ 1010.715985] ? __die_body.cold+0x1a/0x1f [ 1010.715995] ? die_addr+0x43/0x70 [ 1010.716002] ? exc_general_protection+0x199/0x2f0 [ 1010.716016] ? asm_exc_general_protection+0x1e/0x30 [ 1010.716026] ? gtp_newlink+0x4d7/0x9c0 [gtp] [ 1010.716034] ? gtp_net_exit+0x150/0x150 [gtp] [ 1010.716042] __rtnl_newlink+0x1063/0x1700 [ 1010.716051] ? rtnl_setlink+0x3c0/0x3c0 [ 1010.716063] ? is_bpf_text_address+0xc0/0x1f0 [ 1010.716070] ? kernel_text_address.part.0+0xbb/0xd0 [ 1010.716076] ? __kernel_text_address+0x56/0xa0 [ 1010.716084] ? unwind_get_return_address+0x5a/0xa0 [ 1010.716091] ? create_prof_cpu_mask+0x30/0x30 [ 1010.716098] ? arch_stack_walk+0x9e/0xf0 [ 1010.716106] ? stack_trace_save+0x91/0xd0 [ 1010.716113] ? stack_trace_consume_entry+0x170/0x170 [ 1010.716121] ? __lock_acquire+0x15c5/0x5380 [ 1010.716139] ? mark_held_locks+0x9e/0xe0 [ 1010.716148] ? kmem_cache_alloc_trace+0x35f/0x3c0 [ 1010.716155] ? __rtnl_newlink+0x1700/0x1700 [ 1010.716160] rtnl_newlink+0x69/0xa0 [ 1010.716166] rtnetlink_rcv_msg+0x43b/0xc50 [ 1010.716172] ? rtnl_fdb_dump+0x9f0/0x9f0 [ 1010.716179] ? lock_acquire+0x1fe/0x560 [ 1010.716188] ? netlink_deliver_tap+0x12f/0xd50 [ 1010.716196] netlink_rcv_skb+0x14d/0x440 [ 1010.716202] ? rtnl_fdb_dump+0x9f0/0x9f0 [ 1010.716208] ? netlink_ack+0xab0/0xab0 [ 1010.716213] ? netlink_deliver_tap+0x202/0xd50 [ 1010.716220] ? netlink_deliver_tap+0x218/0xd50 [ 1010.716226] ? __virt_addr_valid+0x30b/0x590 [ 1010.716233] netlink_unicast+0x54b/0x800 [ 1010.716240] ? netlink_attachskb+0x870/0x870 [ 1010.716248] ? __check_object_size+0x2de/0x3b0 [ 1010.716254] netlink_sendmsg+0x938/0xe40 [ 1010.716261] ? netlink_unicast+0x800/0x800 [ 1010.716269] ? __import_iovec+0x292/0x510 [ 1010.716276] ? netlink_unicast+0x800/0x800 [ 1010.716284] __sock_sendmsg+0x159/0x190 [ 1010.716290] ____sys_sendmsg+0x712/0x880 [ 1010.716297] ? sock_write_iter+0x3d0/0x3d0 [ 1010.716304] ? __ia32_sys_recvmmsg+0x270/0x270 [ 1010.716309] ? lock_acquire+0x1fe/0x560 [ 1010.716315] ? drain_array_locked+0x90/0x90 [ 1010.716324] ___sys_sendmsg+0xf8/0x170 [ 1010.716331] ? sendmsg_copy_msghdr+0x170/0x170 [ 1010.716337] ? lockdep_init_map_type+0x2c7/0x860 [ 1010.716343] ? lockdep_hardirqs_on_prepare+0x430/0x430 [ 1010.716350] ? debug_mutex_init+0x33/0x70 [ 1010.716360] ? percpu_counter_add_batch+0x8b/0x140 [ 1010.716367] ? lock_acquire+0x1fe/0x560 [ 1010.716373] ? find_held_lock+0x2c/0x110 [ 1010.716384] ? __fd_install+0x1b6/0x6f0 [ 1010.716389] ? lock_downgrade+0x810/0x810 [ 1010.716396] ? __fget_light+0x222/0x290 [ 1010.716403] __sys_sendmsg+0xea/0x1b0 [ 1010.716409] ? __sys_sendmsg_sock+0x40/0x40 [ 1010.716419] ? lockdep_hardirqs_on_prepare+0x2b3/0x430 [ 1010.716425] ? syscall_enter_from_user_mode+0x1d/0x60 [ 1010.716432] do_syscall_64+0x30/0x40 [ 1010.716438] entry_SYSCALL_64_after_hwframe+0x62/0xc7 [ 1010.716444] RIP: 0033:0x7fd1508cbd49 [ 1010.716452] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ef 70 0d 00 f7 d8 64 89 01 48 [ 1010.716456] RSP: 002b:00007fff18872348 EFLAGS: 00000202 ORIG_RAX: 000000000000002e [ 1010.716463] RAX: ffffffffffffffda RBX: 000055f72bf0eac0 RCX: 00007fd1508cbd49 [ 1010.716468] RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000006 [ 1010.716473] RBP: 00007fff18872360 R08: 00007fff18872360 R09: 00007fff18872360 [ 1010.716478] R10: 00007fff18872360 R11: 0000000000000202 R12: 000055f72bf0e1b0 [ 1010.716482] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 1010.716491] Modules linked in: gtp(+) udp_tunnel ib_core uinput af_packet rfkill qrtr joydev hid_generic usbhid hid kvm_intel iTCO_wdt intel_pmc_bxt iTCO_vendor_support kvm snd_hda_codec_generic ledtrig_audio irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel snd_hda_intel nls_utf8 snd_intel_dspcfg nls_cp866 psmouse aesni_intel vfat crypto_simd fat cryptd glue_helper snd_hda_codec pcspkr snd_hda_core i2c_i801 snd_hwdep i2c_smbus xhci_pci snd_pcm lpc_ich xhci_pci_renesas xhci_hcd qemu_fw_cfg tiny_power_button button sch_fq_codel vboxvideo drm_vram_helper drm_ttm_helper ttm vboxsf vboxguest snd_seq_midi snd_seq_midi_event snd_seq snd_rawmidi snd_seq_device snd_timer snd soundcore msr fuse efi_pstore dm_mod ip_tables x_tables autofs4 virtio_gpu virtio_dma_buf drm_kms_helper cec rc_core drm virtio_rng virtio_scsi rng_core virtio_balloon virtio_blk virtio_net virtio_console net_failover failover ahci libahci libata evdev scsi_mod input_leds serio_raw virtio_pci intel_agp [ 1010.716674] virtio_ring intel_gtt virtio [last unloaded: gtp] [ 1010.716693] ---[ end trace 04990a4ce61e174b ]--- Cc: stable@vger.kernel.org Signed-off-by: Alexander Ofitserov Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240228114703.465107-1-oficerovas@altlinux.org Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 937dd9cf4fbaf..7086acfed5b90 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1902,26 +1902,26 @@ static int __init gtp_init(void) get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); - err = rtnl_link_register(>p_link_ops); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto error_out; - err = register_pernet_subsys(>p_net_ops); + err = rtnl_link_register(>p_link_ops); if (err < 0) - goto unreg_rtnl_link; + goto unreg_pernet_subsys; err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_pernet_subsys; + goto unreg_rtnl_link; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_pernet_subsys: - unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); error_out: pr_err("error loading GTP module loaded\n"); return err; -- GitLab From 930e826962d9f01dcd2220176134427358d112f2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Feb 2024 20:08:35 +0100 Subject: [PATCH 0548/2290] wifi: nl80211: reject iftype change with mesh ID change commit f78c1375339a291cba492a70eaf12ec501d28a8e upstream. It's currently possible to change the mesh ID when the interface isn't yet in mesh mode, at the same time as changing it into mesh mode. This leads to an overwrite of data in the wdev->u union for the interface type it currently has, causing cfg80211_change_iface() to do wrong things when switching. We could probably allow setting an interface to mesh while setting the mesh ID at the same time by doing a different order of operations here, but realistically there's no userspace that's going to do this, so just disallow changes in iftype when setting mesh ID. Cc: stable@vger.kernel.org Fixes: 29cbe68c516a ("cfg80211/mac80211: add mesh join/leave commands") Reported-by: syzbot+dd4779978217b1973180@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c259d3227a9e2..1a3bd554e2586 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4137,6 +4137,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; + if (otype != NL80211_IFTYPE_MESH_POINT) + return -EINVAL; if (netif_running(dev)) return -EBUSY; -- GitLab From c34adc20b91a8e55e048b18d63f4f4ae003ecf8f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 23 Feb 2024 16:38:43 +0000 Subject: [PATCH 0549/2290] btrfs: fix double free of anonymous device after snapshot creation failure commit e2b54eaf28df0c978626c9736b94f003b523b451 upstream. When creating a snapshot we may do a double free of an anonymous device in case there's an error committing the transaction. The second free may result in freeing an anonymous device number that was allocated by some other subsystem in the kernel or another btrfs filesystem. The steps that lead to this: 1) At ioctl.c:create_snapshot() we allocate an anonymous device number and assign it to pending_snapshot->anon_dev; 2) Then we call btrfs_commit_transaction() and end up at transaction.c:create_pending_snapshot(); 3) There we call btrfs_get_new_fs_root() and pass it the anonymous device number stored in pending_snapshot->anon_dev; 4) btrfs_get_new_fs_root() frees that anonymous device number because btrfs_lookup_fs_root() returned a root - someone else did a lookup of the new root already, which could some task doing backref walking; 5) After that some error happens in the transaction commit path, and at ioctl.c:create_snapshot() we jump to the 'fail' label, and after that we free again the same anonymous device number, which in the meanwhile may have been reallocated somewhere else, because pending_snapshot->anon_dev still has the same value as in step 1. Recently syzbot ran into this and reported the following trace: ------------[ cut here ]------------ ida_free called for id=51 which is not allocated. WARNING: CPU: 1 PID: 31038 at lib/idr.c:525 ida_free+0x370/0x420 lib/idr.c:525 Modules linked in: CPU: 1 PID: 31038 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00410-gc02197fc9076 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 RIP: 0010:ida_free+0x370/0x420 lib/idr.c:525 Code: 10 42 80 3c 28 (...) RSP: 0018:ffffc90015a67300 EFLAGS: 00010246 RAX: be5130472f5dd000 RBX: 0000000000000033 RCX: 0000000000040000 RDX: ffffc90009a7a000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: ffffc90015a673f0 R08: ffffffff81577992 R09: 1ffff92002b4cdb4 R10: dffffc0000000000 R11: fffff52002b4cdb5 R12: 0000000000000246 R13: dffffc0000000000 R14: ffffffff8e256b80 R15: 0000000000000246 FS: 00007fca3f4b46c0(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f167a17b978 CR3: 000000001ed26000 CR4: 0000000000350ef0 Call Trace: btrfs_get_root_ref+0xa48/0xaf0 fs/btrfs/disk-io.c:1346 create_pending_snapshot+0xff2/0x2bc0 fs/btrfs/transaction.c:1837 create_pending_snapshots+0x195/0x1d0 fs/btrfs/transaction.c:1931 btrfs_commit_transaction+0xf1c/0x3740 fs/btrfs/transaction.c:2404 create_snapshot+0x507/0x880 fs/btrfs/ioctl.c:848 btrfs_mksubvol+0x5d0/0x750 fs/btrfs/ioctl.c:998 btrfs_mksnapshot+0xb5/0xf0 fs/btrfs/ioctl.c:1044 __btrfs_ioctl_snap_create+0x387/0x4b0 fs/btrfs/ioctl.c:1306 btrfs_ioctl_snap_create_v2+0x1ca/0x400 fs/btrfs/ioctl.c:1393 btrfs_ioctl+0xa74/0xd40 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0xfe/0x170 fs/ioctl.c:857 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7fca3e67dda9 Code: 28 00 00 00 (...) RSP: 002b:00007fca3f4b40c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fca3e7abf80 RCX: 00007fca3e67dda9 RDX: 00000000200005c0 RSI: 0000000050009417 RDI: 0000000000000003 RBP: 00007fca3e6ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fca3e7abf80 R15: 00007fff6bf95658 Where we get an explicit message where we attempt to free an anonymous device number that is not currently allocated. It happens in a different code path from the example below, at btrfs_get_root_ref(), so this change may not fix the case triggered by syzbot. To fix at least the code path from the example above, change btrfs_get_root_ref() and its callers to receive a dev_t pointer argument for the anonymous device number, so that in case it frees the number, it also resets it to 0, so that up in the call chain we don't attempt to do the double free. CC: stable@vger.kernel.org # 5.10+ Link: https://lore.kernel.org/linux-btrfs/000000000000f673a1061202f630@google.com/ Fixes: e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 22 +++++++++++----------- fs/btrfs/disk-io.h | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/transaction.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d1b05ded1e35..5756edb37c61e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1643,12 +1643,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, - * pass 0 for new allocation. + * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev, + u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; @@ -1668,9 +1668,9 @@ again: * that common but still possible. In that case, we just need * to free the anon_dev. */ - if (unlikely(anon_dev)) { - free_anon_bdev(anon_dev); - anon_dev = 0; + if (unlikely(anon_dev && *anon_dev)) { + free_anon_bdev(*anon_dev); + *anon_dev = 0; } if (check_ref && btrfs_root_refs(&root->root_item) == 0) { @@ -1692,7 +1692,7 @@ again: goto fail; } - ret = btrfs_init_fs_root(root, anon_dev); + ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; @@ -1728,7 +1728,7 @@ fail: * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ - if (anon_dev) + if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); @@ -1744,7 +1744,7 @@ fail: struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* @@ -1752,11 +1752,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, * the anonymous block device id * * @objectid: tree objectid - * @anon_dev: if zero, allocate a new anonymous block device or use the - * parameter value + * @anon_dev: if NULL, allocate a new anonymous block device or use the + * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev) + u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7322af63c0cc7..24bddca86e9c9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -65,7 +65,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev); + u64 objectid, dev_t *anon_dev); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 196e222749ccd..64b37afb7c87f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -708,7 +708,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, free_extent_buffer(leaf); leaf = NULL; - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); + new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 60db4c3b82fa1..b172091f42612 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1809,7 +1809,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); pending->snap = NULL; -- GitLab From f590040ce2b712177306b03c2a63b16f7d48d3c8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 14 Feb 2024 16:19:24 +0100 Subject: [PATCH 0550/2290] btrfs: dev-replace: properly validate device names commit 9845664b9ee47ce7ee7ea93caf47d39a9d4552c4 upstream. There's a syzbot report that device name buffers passed to device replace are not properly checked for string termination which could lead to a read out of bounds in getname_kernel(). Add a helper that validates both source and target device name buffers. For devid as the source initialize the buffer to empty string in case something tries to read it later. This was originally analyzed and fixed in a different way by Edward Adam Davis (see links). Link: https://lore.kernel.org/linux-btrfs/000000000000d1a1d1060cc9c5e7@google.com/ Link: https://lore.kernel.org/linux-btrfs/tencent_44CA0665C9836EF9EEC80CB9E7E206DF5206@qq.com/ CC: stable@vger.kernel.org # 4.19+ CC: Edward Adam Davis Reported-and-tested-by: syzbot+33f23b49ac24f986c9e8@syzkaller.appspotmail.com Reviewed-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/dev-replace.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 61e58066b5fd2..9c856a73d5333 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -740,6 +740,23 @@ leave: return ret; } +static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) +{ + if (args->start.srcdevid == 0) { + if (memchr(args->start.srcdev_name, 0, + sizeof(args->start.srcdev_name)) == NULL) + return -ENAMETOOLONG; + } else { + args->start.srcdev_name[0] = 0; + } + + if (memchr(args->start.tgtdev_name, 0, + sizeof(args->start.tgtdev_name)) == NULL) + return -ENAMETOOLONG; + + return 0; +} + int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { @@ -752,10 +769,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, default: return -EINVAL; } - - if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || - args->start.tgtdev_name[0] == '\0') - return -EINVAL; + ret = btrfs_check_replace_dev_names(args); + if (ret < 0) + return ret; ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, args->start.srcdevid, -- GitLab From 444d70889d199b7f74eec45f14768a83c0b04d73 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 16 Feb 2024 22:17:10 +0000 Subject: [PATCH 0551/2290] btrfs: send: don't issue unnecessary zero writes for trailing hole commit 5897710b28cabab04ea6c7547f27b7989de646ae upstream. If we have a sparse file with a trailing hole (from the last extent's end to i_size) and then create an extent in the file that ends before the file's i_size, then when doing an incremental send we will issue a write full of zeroes for the range that starts immediately after the new extent ends up to i_size. While this isn't incorrect because the file ends up with exactly the same data, it unnecessarily results in using extra space at the destination with one or more extents full of zeroes instead of having a hole. In same cases this results in using megabytes or even gigabytes of unnecessary space. Example, reproducer: $ cat test.sh #!/bin/bash DEV=/dev/sdh MNT=/mnt/sdh mkfs.btrfs -f $DEV mount $DEV $MNT # Create 1G sparse file. xfs_io -f -c "truncate 1G" $MNT/foobar # Create base snapshot. btrfs subvolume snapshot -r $MNT $MNT/mysnap1 # Create send stream (full send) for the base snapshot. btrfs send -f /tmp/1.snap $MNT/mysnap1 # Now write one extent at the beginning of the file and one somewhere # in the middle, leaving a gap between the end of this second extent # and the file's size. xfs_io -c "pwrite -S 0xab 0 128K" \ -c "pwrite -S 0xcd 512M 128K" \ $MNT/foobar # Now create a second snapshot which is going to be used for an # incremental send operation. btrfs subvolume snapshot -r $MNT $MNT/mysnap2 # Create send stream (incremental send) for the second snapshot. btrfs send -p $MNT/mysnap1 -f /tmp/2.snap $MNT/mysnap2 # Now recreate the filesystem by receiving both send streams and # verify we get the same content that the original filesystem had # and file foobar has only two extents with a size of 128K each. umount $MNT mkfs.btrfs -f $DEV mount $DEV $MNT btrfs receive -f /tmp/1.snap $MNT btrfs receive -f /tmp/2.snap $MNT echo -e "\nFile fiemap in the second snapshot:" # Should have: # # 128K extent at file range [0, 128K[ # hole at file range [128K, 512M[ # 128K extent file range [512M, 512M + 128K[ # hole at file range [512M + 128K, 1G[ xfs_io -r -c "fiemap -v" $MNT/mysnap2/foobar # File should be using 256K of data (two 128K extents). echo -e "\nSpace used by the file: $(du -h $MNT/mysnap2/foobar | cut -f 1)" umount $MNT Running the test, we can see with fiemap that we get an extent for the range [512M, 1G[, while in the source filesystem we have an extent for the range [512M, 512M + 128K[ and a hole for the rest of the file (the range [512M + 128K, 1G[): $ ./test.sh (...) File fiemap in the second snapshot: /mnt/sdh/mysnap2/foobar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..255]: 26624..26879 256 0x0 1: [256..1048575]: hole 1048320 2: [1048576..2097151]: 2156544..3205119 1048576 0x1 Space used by the file: 513M This happens because once we finish processing an inode, at finish_inode_if_needed(), we always issue a hole (write operations full of zeros) if there's a gap between the end of the last processed extent and the file's size, even if that range is already a hole in the parent snapshot. Fix this by issuing the hole only if the range is not already a hole. After this change, running the test above, we get the expected layout: $ ./test.sh (...) File fiemap in the second snapshot: /mnt/sdh/mysnap2/foobar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..255]: 26624..26879 256 0x0 1: [256..1048575]: hole 1048320 2: [1048576..1048831]: 26880..27135 256 0x1 3: [1048832..2097151]: hole 1048320 Space used by the file: 256K A test case for fstests will follow soon. CC: stable@vger.kernel.org # 6.1+ Reported-by: Dorai Ashok S A Link: https://lore.kernel.org/linux-btrfs/c0bf7818-9c45-46a8-b3d3-513230d0c86e@inix.me/ Reviewed-by: Sweet Tea Dorminy Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a75669972dc73..9f7ffd9ef6fd7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6462,11 +6462,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret) goto out; } - if (sctx->cur_inode_last_extent < - sctx->cur_inode_size) { - ret = send_hole(sctx, sctx->cur_inode_size); - if (ret) + if (sctx->cur_inode_last_extent < sctx->cur_inode_size) { + ret = range_is_hole_in_parent(sctx, + sctx->cur_inode_last_extent, + sctx->cur_inode_size); + if (ret < 0) { goto out; + } else if (ret == 0) { + ret = send_hole(sctx, sctx->cur_inode_size); + if (ret < 0) + goto out; + } else { + /* Range is already a hole, skip. */ + ret = 0; + } } } if (need_truncate) { -- GitLab From 2e443ed55fe3ffb08327b331a9f45e9382413c94 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Aug 2023 15:06:00 -0400 Subject: [PATCH 0552/2290] Revert "drm/amd/pm: resolve reboot exception for si oland" commit 955558030954b9637b41c97b730f9b38c92ac488 upstream. This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86. This causes hangs on SI when DC is enabled and errors on driver reboot and power off cycles. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755 Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index dc0a6fba7050f..ff1032de4f76d 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } -- GitLab From 8dafc066c54669384ce01b4bbdfe9708a085afb9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:52 +0000 Subject: [PATCH 0553/2290] drm/buddy: fix range bias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f41900e4a6ef019d64a70394b0e0c3bd048d4ec8 upstream. There is a corner case here where start/end is after/before the block range we are currently checking. If so we need to be sure that splitting the block will eventually give use the block size we need. To do that we should adjust the block range to account for the start/end, and only continue with the split if the size/alignment will fit the requested size. Not doing so can result in leaving split blocks unmerged when it eventually fails. Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function") Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: # v5.18+ Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-4-matthew.auld@intel.com Signed-off-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_buddy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 7098f125b54a9..fd32041f82263 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* -- GitLab From 237ecf1afe6c22534fa43abdf2bf0b0f52de0aaa Mon Sep 17 00:00:00 2001 From: Peng Ma Date: Thu, 1 Feb 2024 16:50:07 -0500 Subject: [PATCH 0554/2290] dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read commit 9d739bccf261dd93ec1babf82f5c5d71dd4caa3e upstream. There is chip (ls1028a) errata: The SoC may hang on 16 byte unaligned read transactions by QDMA. Unaligned read transactions initiated by QDMA may stall in the NOC (Network On-Chip), causing a deadlock condition. Stalled transactions will trigger completion timeouts in PCIe controller. Workaround: Enable prefetch by setting the source descriptor prefetchable bit ( SD[PF] = 1 ). Implement this workaround. Cc: stable@vger.kernel.org Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Peng Ma Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240201215007.439503-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/fsl-qdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index f383f219ed008..915fa0cbb5539 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -109,6 +109,7 @@ #define FSL_QDMA_CMD_WTHROTL_OFFSET 20 #define FSL_QDMA_CMD_DSEN_OFFSET 19 #define FSL_QDMA_CMD_LWC_OFFSET 16 +#define FSL_QDMA_CMD_PF BIT(17) /* Field definition for Descriptor status */ #define QDMA_CCDF_STATUS_RTE BIT(5) @@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); + FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF; sdf->data = QDMA_SDDF_CMD(cmd); cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << -- GitLab From 034e2d70b5c7f578200ad09955aeb2aa65d1164a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 23 Feb 2024 14:20:35 +0100 Subject: [PATCH 0555/2290] crypto: arm64/neonbs - fix out-of-bounds access on short input commit 1c0cf6d19690141002889d72622b90fc01562ce4 upstream. The bit-sliced implementation of AES-CTR operates on blocks of 128 bytes, and will fall back to the plain NEON version for tail blocks or inputs that are shorter than 128 bytes to begin with. It will call straight into the plain NEON asm helper, which performs all memory accesses in granules of 16 bytes (the size of a NEON register). For this reason, the associated plain NEON glue code will copy inputs shorter than 16 bytes into a temporary buffer, given that this is a rare occurrence and it is not worth the effort to work around this in the asm code. The fallback from the bit-sliced NEON version fails to take this into account, potentially resulting in out-of-bounds accesses. So clone the same workaround, and use a temp buffer for short in/outputs. Fixes: fc074e130051 ("crypto: arm64/aes-neonbs-ctr - fallback to plain NEON for final chunk") Cc: Reported-by: syzbot+f1ceaa1a09ab891e1934@syzkaller.appspotmail.com Reviewed-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-neonbs-glue.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index bac4cabef6073..467ac2f768ac2 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req) src += blocks * AES_BLOCK_SIZE; } if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, nbytes, walk.iv); + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); + nbytes = 0; } kernel_neon_end(); -- GitLab From 300111cd9042d133d1edd0255f50556211125ce9 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 22 Feb 2024 17:30:53 +0100 Subject: [PATCH 0556/2290] dmaengine: ptdma: use consistent DMA masks commit df2515a17914ecfc2a0594509deaf7fcb8d191ac upstream. The PTDMA driver sets DMA masks in two different places for the same device inconsistently. First call is in pt_pci_probe(), where it uses 48bit mask. The second call is in pt_dmaengine_register(), where it uses a 64bit mask. Using 64bit dma mask causes IO_PAGE_FAULT errors on DMA transfers between main memory and other devices. Without the extra call it works fine. Additionally the second call doesn't check the return value so it can silently fail. Remove the superfluous dma_set_mask() call and only use 48bit mask. Cc: stable@vger.kernel.org Fixes: b0b4a6b10577 ("dmaengine: ptdma: register PTDMA controller as a DMA resource") Reviewed-by: Basavaraj Natikar Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20240222163053.13842-1-tstruk@gigaio.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ptdma/ptdma-dmaengine.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c index 1aa65e5de0f3a..f792407348077 100644 --- a/drivers/dma/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/ptdma/ptdma-dmaengine.c @@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt) chan->vc.desc_free = pt_do_cleanup; vchan_init(&chan->vc, dma_dev); - dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); - ret = dma_async_device_register(dma_dev); if (ret) goto err_reg; -- GitLab From 474d521da890b3e3585335fb80a6044cb2553d99 Mon Sep 17 00:00:00 2001 From: Curtis Klein Date: Thu, 1 Feb 2024 17:04:06 -0500 Subject: [PATCH 0557/2290] dmaengine: fsl-qdma: init irq after reg initialization commit 87a39071e0b639f45e05d296cc0538eef44ec0bd upstream. Initialize the qDMA irqs after the registers are configured so that interrupts that may have been pending from a primary kernel don't get processed by the irq handler before it is ready to and cause panic with the following trace: Call trace: fsl_qdma_queue_handler+0xf8/0x3e8 __handle_irq_event_percpu+0x78/0x2b0 handle_irq_event_percpu+0x1c/0x68 handle_irq_event+0x44/0x78 handle_fasteoi_irq+0xc8/0x178 generic_handle_irq+0x24/0x38 __handle_domain_irq+0x90/0x100 gic_handle_irq+0x5c/0xb8 el1_irq+0xb8/0x180 _raw_spin_unlock_irqrestore+0x14/0x40 __setup_irq+0x4bc/0x798 request_threaded_irq+0xd8/0x190 devm_request_threaded_irq+0x74/0xe8 fsl_qdma_probe+0x4d4/0xca8 platform_drv_probe+0x50/0xa0 really_probe+0xe0/0x3f8 driver_probe_device+0x64/0x130 device_driver_attach+0x6c/0x78 __driver_attach+0xbc/0x158 bus_for_each_dev+0x5c/0x98 driver_attach+0x20/0x28 bus_add_driver+0x158/0x220 driver_register+0x60/0x110 __platform_driver_register+0x44/0x50 fsl_qdma_driver_init+0x18/0x20 do_one_initcall+0x48/0x258 kernel_init_freeable+0x1a4/0x23c kernel_init+0x10/0xf8 ret_from_fork+0x10/0x18 Cc: stable@vger.kernel.org Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Curtis Klein Signed-off-by: Yi Zhao Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240201220406.440145-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/fsl-qdma.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 915fa0cbb5539..7082a5a6814a4 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -1203,10 +1203,6 @@ static int fsl_qdma_probe(struct platform_device *pdev) if (!fsl_qdma->queue) return -ENOMEM; - ret = fsl_qdma_irq_init(pdev, fsl_qdma); - if (ret) - return ret; - fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); if (fsl_qdma->irq_base < 0) return fsl_qdma->irq_base; @@ -1245,16 +1241,19 @@ static int fsl_qdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fsl_qdma); - ret = dma_async_device_register(&fsl_qdma->dma_dev); + ret = fsl_qdma_reg_init(fsl_qdma); if (ret) { - dev_err(&pdev->dev, - "Can't register NXP Layerscape qDMA engine.\n"); + dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); return ret; } - ret = fsl_qdma_reg_init(fsl_qdma); + ret = fsl_qdma_irq_init(pdev, fsl_qdma); + if (ret) + return ret; + + ret = dma_async_device_register(&fsl_qdma->dma_dev); if (ret) { - dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); + dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n"); return ret; } -- GitLab From 70af82bb9c897faa25a44e4181f36c60312b71ef Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Wed, 7 Feb 2024 15:39:51 +0100 Subject: [PATCH 0558/2290] mmc: mmci: stm32: fix DMA API overlapping mappings warning commit 6b1ba3f9040be5efc4396d86c9752cdc564730be upstream. Turning on CONFIG_DMA_API_DEBUG_SG results in the following warning: DMA-API: mmci-pl18x 48220000.mmc: cacheline tracking EEXIST, overlapping mappings aren't supported WARNING: CPU: 1 PID: 51 at kernel/dma/debug.c:568 add_dma_entry+0x234/0x2f4 Modules linked in: CPU: 1 PID: 51 Comm: kworker/1:2 Not tainted 6.1.28 #1 Hardware name: STMicroelectronics STM32MP257F-EV1 Evaluation Board (DT) Workqueue: events_freezable mmc_rescan Call trace: add_dma_entry+0x234/0x2f4 debug_dma_map_sg+0x198/0x350 __dma_map_sg_attrs+0xa0/0x110 dma_map_sg_attrs+0x10/0x2c sdmmc_idma_prep_data+0x80/0xc0 mmci_prep_data+0x38/0x84 mmci_start_data+0x108/0x2dc mmci_request+0xe4/0x190 __mmc_start_request+0x68/0x140 mmc_start_request+0x94/0xc0 mmc_wait_for_req+0x70/0x100 mmc_send_tuning+0x108/0x1ac sdmmc_execute_tuning+0x14c/0x210 mmc_execute_tuning+0x48/0xec mmc_sd_init_uhs_card.part.0+0x208/0x464 mmc_sd_init_card+0x318/0x89c mmc_attach_sd+0xe4/0x180 mmc_rescan+0x244/0x320 DMA API debug brings to light leaking dma-mappings as dma_map_sg and dma_unmap_sg are not correctly balanced. If an error occurs in mmci_cmd_irq function, only mmci_dma_error function is called and as this API is not managed on stm32 variant, dma_unmap_sg is never called in this error path. Signed-off-by: Christophe Kerello Fixes: 46b723dd867d ("mmc: mmci: add stm32 sdmmc variant") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240207143951.938144-1-christophe.kerello@foss.st.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mmci_stm32_sdmmc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 60bca78a72b19..0511583ffa764 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -200,6 +200,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; + host->dma_in_progress = true; + if (!host->variant->dma_lli || data->sg_len == 1 || idma->use_bounce_buffer) { u32 dma_addr; @@ -238,9 +240,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) return 0; } +static void sdmmc_idma_error(struct mmci_host *host) +{ + struct mmc_data *data = host->data; + struct sdmmc_idma *idma = host->dma_priv; + + if (!dma_inprogress(host)) + return; + + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; + data->host_cookie = 0; + + if (!idma->use_bounce_buffer) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); +} + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) { + if (!dma_inprogress(host)) + return; + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; if (!data->host_cookie) sdmmc_idma_unprep_data(host, data, 0); @@ -567,6 +590,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { .dma_setup = sdmmc_idma_setup, .dma_start = sdmmc_idma_start, .dma_finalize = sdmmc_idma_finalize, + .dma_error = sdmmc_idma_error, .set_clkreg = mmci_sdmmc_set_clkreg, .set_pwrreg = mmci_sdmmc_set_pwrreg, .busy_complete = sdmmc_busy_complete, -- GitLab From bc9f87a41d185d7678c5742a4f5952df04bf2375 Mon Sep 17 00:00:00 2001 From: Ivan Semenov Date: Tue, 6 Feb 2024 19:28:45 +0200 Subject: [PATCH 0559/2290] mmc: core: Fix eMMC initialization with 1-bit bus connection commit ff3206d2186d84e4f77e1378ba1d225633f17b9b upstream. Initializing an eMMC that's connected via a 1-bit bus is current failing, if the HW (DT) informs that 4-bit bus is supported. In fact this is a regression, as we were earlier capable of falling back to 1-bit mode, when switching to 4/8-bit bus failed. Therefore, let's restore the behaviour. Log for Samsung eMMC 5.1 chip connected via 1bit bus (only D0 pin) Before patch: [134509.044225] mmc0: switch to bus width 4 failed [134509.044509] mmc0: new high speed MMC card at address 0001 [134509.054594] mmcblk0: mmc0:0001 BGUF4R 29.1 GiB [134509.281602] mmc0: switch to bus width 4 failed [134509.282638] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.282657] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.284598] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.284602] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.284609] ldm_validate_partition_table(): Disk read failed. [134509.286495] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.286500] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.288303] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.288308] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.289540] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.289544] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.289553] mmcblk0: unable to read partition table [134509.289728] mmcblk0boot0: mmc0:0001 BGUF4R 31.9 MiB [134509.290283] mmcblk0boot1: mmc0:0001 BGUF4R 31.9 MiB [134509.294577] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2 [134509.295835] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.295841] Buffer I/O error on dev mmcblk0, logical block 0, async page read After patch: [134551.089613] mmc0: switch to bus width 4 failed [134551.090377] mmc0: new high speed MMC card at address 0001 [134551.102271] mmcblk0: mmc0:0001 BGUF4R 29.1 GiB [134551.113365] mmcblk0: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 p11 p12 p13 p14 p15 p16 p17 p18 p19 p20 p21 [134551.114262] mmcblk0boot0: mmc0:0001 BGUF4R 31.9 MiB [134551.114925] mmcblk0boot1: mmc0:0001 BGUF4R 31.9 MiB Fixes: 577fb13199b1 ("mmc: rework selection of bus speed mode") Cc: stable@vger.kernel.org Signed-off-by: Ivan Semenov Link: https://lore.kernel.org/r/20240206172845.34316-1-ivan@semenov.dev Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a46ce0868fe1f..3a927452a6501 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1007,10 +1007,12 @@ static int mmc_select_bus_width(struct mmc_card *card) static unsigned ext_csd_bits[] = { EXT_CSD_BUS_WIDTH_8, EXT_CSD_BUS_WIDTH_4, + EXT_CSD_BUS_WIDTH_1, }; static unsigned bus_widths[] = { MMC_BUS_WIDTH_8, MMC_BUS_WIDTH_4, + MMC_BUS_WIDTH_1, }; struct mmc_host *host = card->host; unsigned idx, bus_width = 0; -- GitLab From c65c475560851291ad64272d4b85b55e70c4adbb Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Thu, 22 Feb 2024 21:17:14 +0200 Subject: [PATCH 0560/2290] mmc: sdhci-xenon: add timeout for PHY init complete commit 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129 upstream. AC5X spec says PHY init complete bit must be polled until zero. We see cases in which timeout can take longer than the standard calculation on AC5X, which is expected following the spec comment above. According to the spec, we must wait as long as it takes for that bit to toggle on AC5X. Cap that with 100 delay loops so we won't get stuck forever. Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Link: https://lore.kernel.org/r/20240222191714.1216470-3-enachman@marvell.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-xenon-phy.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 8cf3a375de659..52a8e1217124a 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -109,6 +109,8 @@ #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18) #define XENON_LOGIC_TIMING_VALUE 0x00AA8977 +#define XENON_MAX_PHY_TIMEOUT_LOOPS 100 + /* * List offset of PHY registers and some special register values * in eMMC PHY 5.0 or eMMC PHY 5.1 @@ -259,18 +261,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) /* get the wait time */ wait /= clock; wait++; - /* wait for host eMMC PHY init completes */ - udelay(wait); - reg = sdhci_readl(host, phy_regs->timing_adj); - reg &= XENON_PHY_INITIALIZAION; - if (reg) { + /* + * AC5X spec says bit must be polled until zero. + * We see cases in which timeout can take longer + * than the standard calculation on AC5X, which is + * expected following the spec comment above. + * According to the spec, we must wait as long as + * it takes for that bit to toggle on AC5X. + * Cap that with 100 delay loops so we won't get + * stuck here forever: + */ + + ret = read_poll_timeout(sdhci_readl, reg, + !(reg & XENON_PHY_INITIALIZAION), + wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait, + false, host, phy_regs->timing_adj); + if (ret) dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n", - wait); - return -ETIMEDOUT; - } + wait * XENON_MAX_PHY_TIMEOUT_LOOPS); - return 0; + return ret; } #define ARMADA_3700_SOC_PAD_1_8V 0x1 -- GitLab From 4974d928d5e3909bd8cfe4b0bca2509636a8ebf2 Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Thu, 22 Feb 2024 22:09:30 +0200 Subject: [PATCH 0561/2290] mmc: sdhci-xenon: fix PHY init clock stability commit 8e9f25a290ae0016353c9ea13314c95fb3207812 upstream. Each time SD/mmc phy is initialized, at times, in some of the attempts, phy fails to completes its initialization which results into timeout error. Per the HW spec, it is a pre-requisite to ensure a stable SD clock before a phy initialization is attempted. Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Link: https://lore.kernel.org/r/20240222200930.1277665-1-enachman@marvell.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-xenon-phy.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 52a8e1217124a..cc9d28b75eb91 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "sdhci-pltfm.h" @@ -218,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host) return 0; } +static int xenon_check_stability_internal_clk(struct sdhci_host *host) +{ + u32 reg; + int err; + + err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE, + 1100, 20000, false, host, SDHCI_CLOCK_CONTROL); + if (err) + dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n"); + + return err; +} + /* * eMMC 5.0/5.1 PHY init/re-init. * eMMC PHY init should be executed after: @@ -234,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs; + int ret = xenon_check_stability_internal_clk(host); + + if (ret) + return ret; + reg = sdhci_readl(host, phy_regs->timing_adj); reg |= XENON_PHY_INITIALIZAION; sdhci_writel(host, reg, phy_regs->timing_adj); -- GitLab From 76109a226a39aea5d621b9b0af04ba23fc9cf7de Mon Sep 17 00:00:00 2001 From: Zong Li Date: Fri, 2 Feb 2024 01:51:02 +0000 Subject: [PATCH 0562/2290] riscv: add CALLER_ADDRx support commit 680341382da56bd192ebfa4e58eaf4fec2e5bca7 upstream. CALLER_ADDRx returns caller's address at specified level, they are used for several tracers. These macros eventually use __builtin_return_address(n) to get the caller's address if arch doesn't define their own implementation. In RISC-V, __builtin_return_address(n) only works when n == 0, we need to walk the stack frame to get the caller's address at specified level. data.level started from 'level + 3' due to the call flow of getting caller's address in RISC-V implementation. If we don't have additional three iteration, the level is corresponding to follows: callsite -> return_address -> arch_stack_walk -> walk_stackframe | | | | level 3 level 2 level 1 level 0 Fixes: 10626c32e382 ("riscv/ftrace: Add basic support") Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Signed-off-by: Zong Li Link: https://lore.kernel.org/r/20240202015102.26251-1-zong.li@sifive.com Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/ftrace.h | 5 ++++ arch/riscv/kernel/Makefile | 2 ++ arch/riscv/kernel/return_address.c | 48 ++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 arch/riscv/kernel/return_address.c diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index d47d87c2d7e3d..dcf1bc9de5841 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -25,6 +25,11 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ + +extern void *return_address(unsigned int level); + +#define ftrace_return_address(n) return_address(n) + void MCOUNT_NAME(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index ab333cb792fd9..4c0805d264ca8 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) @@ -41,6 +42,7 @@ obj-y += irq.o obj-y += process.o obj-y += ptrace.o obj-y += reset.o +obj-y += return_address.o obj-y += setup.o obj-y += signal.o obj-y += syscall_table.o diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c new file mode 100644 index 0000000000000..c8115ec8fb304 --- /dev/null +++ b/arch/riscv/kernel/return_address.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code come from arch/arm64/kernel/return_address.c + * + * Copyright (C) 2023 SiFive. + */ + +#include +#include +#include + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static bool save_return_addr(void *d, unsigned long pc) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)pc; + return false; + } + + --data->level; + + return true; +} +NOKPROBE_SYMBOL(save_return_addr); + +noinline void *return_address(unsigned int level) +{ + struct return_address_data data; + + data.level = level + 3; + data.addr = NULL; + + arch_stack_walk(save_return_addr, &data, current, NULL); + + if (!data.level) + return data.addr; + else + return NULL; + +} +EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); -- GitLab From 249d6ca4ff0022a4b51a8eb9fac6d7bff2c94d1b Mon Sep 17 00:00:00 2001 From: Tim Schumacher Date: Fri, 26 Jan 2024 17:25:23 +0100 Subject: [PATCH 0563/2290] efivarfs: Request at most 512 bytes for variable names commit f45812cc23fb74bef62d4eb8a69fe7218f4b9f2a upstream. Work around a quirk in a few old (2011-ish) UEFI implementations, where a call to `GetNextVariableName` with a buffer size larger than 512 bytes will always return EFI_INVALID_PARAMETER. There is some lore around EFI variable names being up to 1024 bytes in size, but this has no basis in the UEFI specification, and the upper bounds are typically platform specific, and apply to the entire variable (name plus payload). Given that Linux does not permit creating files with names longer than NAME_MAX (255) bytes, 512 bytes (== 256 UTF-16 characters) is a reasonable limit. Cc: # 6.1+ Signed-off-by: Tim Schumacher Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- fs/efivarfs/vars.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 9e4f47808bd5a..13bc606989557 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -372,7 +372,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head) { - unsigned long variable_name_size = 1024; + unsigned long variable_name_size = 512; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; @@ -389,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), goto free; /* - * Per EFI spec, the maximum storage allocated for both - * the variable name and variable data is 1024 bytes. + * A small set of old UEFI implementations reject sizes + * above a certain threshold, the lowest seen in the wild + * is 512. */ do { - variable_name_size = 1024; + variable_name_size = 512; status = efivar_get_next_variable(&variable_name_size, variable_name, @@ -431,9 +432,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), break; case EFI_NOT_FOUND: break; + case EFI_BUFFER_TOO_SMALL: + pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n", + variable_name_size); + status = EFI_NOT_FOUND; + break; default: - printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n", - status); + pr_warn("efivars: get_next_variable: status=%lx\n", status); status = EFI_NOT_FOUND; break; } -- GitLab From 396a4120011d8d574eb57793efb0eec5f271a2c8 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 26 Feb 2024 17:49:57 -0800 Subject: [PATCH 0564/2290] pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation commit 2a93c6cbd5a703d44c414a3c3945a87ce11430ba upstream. Commit 'e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain")' aimed to make sure that a power-domain that is being enabled without any particular performance-state requested will at least turn the rail on, to avoid filling DeviceTree with otherwise unnecessary required-opps properties. But in the event that aggregation happens on a disabled power-domain, with an enabled peer without performance-state, both the local and peer corner are 0. The peer's enabled_corner is not considered, with the result that the underlying (shared) resource is disabled. One case where this can be observed is when the display stack keeps mmcx enabled (but without a particular performance-state vote) in order to access registers and sync_state happens in the rpmhpd driver. As mmcx_ao is flushed the state of the peer (mmcx) is not considered and mmcx_ao ends up turning off "mmcx.lvl" underneath mmcx. This has been observed several times, but has been painted over in DeviceTree by adding an explicit vote for the lowest non-disabled performance-state. Fixes: e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain") Reported-by: Johan Hovold Closes: https://lore.kernel.org/linux-arm-msm/ZdMwZa98L23mu3u6@hovoldconsulting.com/ Cc: Signed-off-by: Bjorn Andersson Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Reviewed-by: Stephen Boyd Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20240226-rpmhpd-enable-corner-fix-v1-1-68c004cec48c@quicinc.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/soc/qcom/rpmhpd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index 092f6ab09acf3..9a90f241bb97f 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -492,12 +492,15 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) unsigned int active_corner, sleep_corner; unsigned int this_active_corner = 0, this_sleep_corner = 0; unsigned int peer_active_corner = 0, peer_sleep_corner = 0; + unsigned int peer_enabled_corner; to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner); - if (peer && peer->enabled) - to_active_sleep(peer, peer->corner, &peer_active_corner, + if (peer && peer->enabled) { + peer_enabled_corner = max(peer->corner, peer->enable_corner); + to_active_sleep(peer, peer_enabled_corner, &peer_active_corner, &peer_sleep_corner); + } active_corner = max(this_active_corner, peer_active_corner); -- GitLab From c9fa51d4c434fa7bdafd0c7a9e19cf9023787fd4 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 31 Jan 2024 01:04:28 +0100 Subject: [PATCH 0565/2290] x86/e820: Don't reserve SETUP_RNG_SEED in e820 commit 7fd817c906503b6813ea3b41f5fdf4192449a707 upstream. SETUP_RNG_SEED in setup_data is supplied by kexec and should not be reserved in the e820 map. Doing so reserves 16 bytes of RAM when booting with kexec. (16 bytes because data->len is zeroed by parse_setup_data so only sizeof(setup_data) is reserved.) When kexec is used repeatedly, each boot adds two entries in the kexec-provided e820 map as the 16-byte range splits a larger range of usable memory. Eventually all of the 128 available entries get used up. The next split will result in losing usable memory as the new entries cannot be added to the e820 map. Fixes: 68b8e9713c8e ("x86/setup: Use rng seeds from setup_data") Signed-off-by: Jiri Bohac Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Dave Hansen Cc: Link: https://lore.kernel.org/r/ZbmOjKnARGiaYBd5@dwarf.suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/e820.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9dac24680ff8e..993734e96615a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* - * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need - * to be reserved. + * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by + * kexec and do not need to be reserved. */ - if (data->type != SETUP_EFI && data->type != SETUP_IMA) + if (data->type != SETUP_EFI && + data->type != SETUP_IMA && + data->type != SETUP_RNG_SEED) e820__range_update_kexec(pa_data, sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); -- GitLab From 65742f4bb1f919caa564b8a20b15b8cdd6eca2ef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Feb 2024 00:09:02 +0100 Subject: [PATCH 0566/2290] x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers commit 6890cb1ace350b4386c8aee1343dc3b3ddd214da upstream. MKTME repurposes the high bit of physical address to key id for encryption key and, even though MAXPHYADDR in CPUID[0x80000008] remains the same, the valid bits in the MTRR mask register are based on the reduced number of physical address bits. detect_tme() in arch/x86/kernel/cpu/intel.c detects TME and subtracts it from the total usable physical bits, but it is called too late. Move the call to early_init_intel() so that it is called in setup_arch(), before MTRRs are setup. This fixes boot on TDX-enabled systems, which until now only worked with "disable_mtrr_cleanup". Without the patch, the values written to the MTRRs mask registers were 52-bit wide (e.g. 0x000fffff_80000800) and the writes failed; with the patch, the values are 46-bit wide, which matches the reduced MAXPHYADDR that is shown in /proc/cpuinfo. Reported-by: Zixi Chen Signed-off-by: Paolo Bonzini Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240131230902.1867092-3-pbonzini%40redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 178 ++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 87 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4278996504833..32bd640170475 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -216,6 +216,90 @@ int intel_cpu_collect_info(struct ucode_cpu_info *uci) } EXPORT_SYMBOL_GPL(intel_cpu_collect_info); +#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme_early(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + */ + c->x86_phys_bits -= keyid_bits; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -367,6 +451,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (detect_extended_topology_early(c) < 0) detect_ht_early(c); + + /* + * Adjust the number of physical bits early because it affects the + * valid bits of the MTRR mask registers. + */ + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme_early(c); } static void bsp_init_intel(struct cpuinfo_x86 *c) @@ -527,90 +618,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -#define MSR_IA32_TME_ACTIVATE 0x982 - -/* Helpers to access TME_ACTIVATE MSR */ -#define TME_ACTIVATE_LOCKED(x) (x & 0x1) -#define TME_ACTIVATE_ENABLED(x) (x & 0x2) - -#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ -#define TME_ACTIVATE_POLICY_AES_XTS_128 0 - -#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ - -#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ -#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 - -/* Values for mktme_status (SW only construct) */ -#define MKTME_ENABLED 0 -#define MKTME_DISABLED 1 -#define MKTME_UNINITIALIZED 2 -static int mktme_status = MKTME_UNINITIALIZED; - -static void detect_tme(struct cpuinfo_x86 *c) -{ - u64 tme_activate, tme_policy, tme_crypto_algs; - int keyid_bits = 0, nr_keyids = 0; - static u64 tme_activate_cpu0 = 0; - - rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); - - if (mktme_status != MKTME_UNINITIALIZED) { - if (tme_activate != tme_activate_cpu0) { - /* Broken BIOS? */ - pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); - pr_err_once("x86/tme: MKTME is not usable\n"); - mktme_status = MKTME_DISABLED; - - /* Proceed. We may need to exclude bits from x86_phys_bits. */ - } - } else { - tme_activate_cpu0 = tme_activate; - } - - if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { - pr_info_once("x86/tme: not enabled by BIOS\n"); - mktme_status = MKTME_DISABLED; - return; - } - - if (mktme_status != MKTME_UNINITIALIZED) - goto detect_keyid_bits; - - pr_info("x86/tme: enabled by BIOS\n"); - - tme_policy = TME_ACTIVATE_POLICY(tme_activate); - if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) - pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); - - tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); - if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { - pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", - tme_crypto_algs); - mktme_status = MKTME_DISABLED; - } -detect_keyid_bits: - keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); - nr_keyids = (1UL << keyid_bits) - 1; - if (nr_keyids) { - pr_info_once("x86/mktme: enabled by BIOS\n"); - pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); - } else { - pr_info_once("x86/mktme: disabled by BIOS\n"); - } - - if (mktme_status == MKTME_UNINITIALIZED) { - /* MKTME is usable */ - mktme_status = MKTME_ENABLED; - } - - /* - * KeyID bits effectively lower the number of physical address - * bits. Update cpuinfo_x86::x86_phys_bits accordingly. - */ - c->x86_phys_bits -= keyid_bits; -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -747,9 +754,6 @@ static void init_intel(struct cpuinfo_x86 *c) init_ia32_feat_ctl(c); - if (cpu_has(c, X86_FEATURE_TME)) - detect_tme(c); - init_intel_misc_features(c); split_lock_init(); -- GitLab From e6e04845c2e8af9fef7d58439e9f62a3ed93f33b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:31 +0100 Subject: [PATCH 0567/2290] mptcp: fix data races on local_id commit a7cfe776637004a4c938fde78be4bd608c32c3ef upstream. The local address id is accessed lockless by the NL PM, add all the required ONCE annotation. There is a caveat: the local id can be initialized late in the subflow life-cycle, and its validity is controlled by the local_id_valid flag. Remove such flag and encode the validity in the local_id field itself with negative value before initialization. That allows accessing the field consistently with a single read operation. Fixes: 0ee4261a3681 ("mptcp: implement mptcp_pm_remove_subflow") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/diag.c | 2 +- net/mptcp/pm_netlink.c | 6 +++--- net/mptcp/pm_userspace.c | 2 +- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 13 +++++++++++-- net/mptcp/subflow.c | 9 +++++---- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index e57c5f47f0351..6ff6f14674aa2 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -65,7 +65,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { err = -EMSGSIZE; goto nla_failure; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 70a1025f093cf..3632f4830420a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -799,7 +799,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow->local_id; + u8 id = subflow_get_local_id(subflow); if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) continue; @@ -808,7 +808,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, subflow->local_id, subflow->remote_id, + i, rm_id, id, subflow->remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -2028,7 +2028,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) if (WARN_ON_ONCE(!sf)) return -EINVAL; - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 631fa104617c3..67eccc141a6c1 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -233,7 +233,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { - if (subflow->local_id == 0) { + if (READ_ONCE(subflow->local_id) == 0) { has_id_0 = true; break; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 859b18cb8e4f6..cdabb00648bd2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -119,7 +119,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) subflow->request_mptcp = 1; /* This is the first subflow, always with id 0 */ - subflow->local_id_valid = 1; + WRITE_ONCE(subflow->local_id, 0); mptcp_sock_graft(msk->first, sk->sk_socket); return 0; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b092205213234..2bc37773e7803 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -475,7 +475,6 @@ struct mptcp_subflow_context { can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; u32 remote_nonce; @@ -483,7 +482,7 @@ struct mptcp_subflow_context { u32 local_nonce; u32 remote_token; u8 hmac[MPTCPOPT_HMAC_LEN]; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -529,6 +528,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -909,6 +909,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} + void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 45d20e20cfc00..83bc438b98257 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -489,8 +489,8 @@ do_reset: static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -499,7 +499,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -1630,6 +1630,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1867,7 +1868,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; -- GitLab From e64148635509bf13eea851986f5a0b150e5bd066 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:32 +0100 Subject: [PATCH 0568/2290] mptcp: fix data races on remote_id commit 967d3c27127e71a10ff5c083583a038606431b61 upstream. Similar to the previous patch, address the data race on remote_id, adding the suitable ONCE annotations. Fixes: bedee0b56113 ("mptcp: address lookup improvements") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 8 ++++---- net/mptcp/subflow.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3632f4830420a..582d0c641ed14 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -449,7 +449,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = subflow->remote_id; + addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; @@ -798,18 +798,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, id, subflow->remote_id, - msk->mpc_endpoint_id); + i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 83bc438b98257..891c2f4fed080 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -446,7 +446,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -1477,7 +1477,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr, ssk->sk_family); @@ -1874,7 +1874,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->mp_join = 1; new_ctx->fully_established = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; -- GitLab From fbccc5eb1652b6c4ff446f34eb5a18869b7f4f3b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:33 +0100 Subject: [PATCH 0569/2290] mptcp: fix duplicate subflow creation commit 045e9d812868a2d80b7a57b224ce8009444b7bbc upstream. Fullmesh endpoints could end-up unexpectedly generating duplicate subflows - same local and remote addresses - when multiple incoming ADD_ADDR are processed before the PM creates the subflow for the local endpoints. Address the issue explicitly checking for duplicates at subflow creation time. To avoid a quadratic computational complexity, track the unavailable remote address ids in a temporary bitmap and initialize such bitmap with the remote ids of all the existing subflows matching the local address currently processed. The above allows additionally replacing the existing code checking for duplicate entry in the current set with a simple bit test operation. Fixes: 2843ff6f36db ("mptcp: remote addresses fullmesh") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/435 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 582d0c641ed14..3328870b0c1f8 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -407,23 +407,12 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } -static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, - const struct mptcp_addr_info *addr) -{ - int i; - - for (i = 0; i < nr; i++) { - if (addrs[i].id == addr->id) - return true; - } - - return false; -} - /* Fill all the remote addresses into the array addrs[], * and return the array size. */ -static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *local, + bool fullmesh, struct mptcp_addr_info *addrs) { bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); @@ -446,6 +435,16 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm msk->pm.subflows++; addrs[i++] = remote; } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); @@ -453,8 +452,11 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm if (deny_id0 && !addrs[i].id) continue; - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && - msk->pm.subflows < subflows_max) { + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } @@ -603,7 +605,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); msk->pm.local_addr_used++; - nr = fill_remote_addresses_vec(msk, fullmesh, addrs); + nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); if (nr) __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); spin_unlock_bh(&msk->pm.lock); -- GitLab From 53e3f2ee8a0ce7fb33488325a74b678ddf74632a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Feb 2024 18:21:21 +0100 Subject: [PATCH 0570/2290] mptcp: continue marking the first subflow as UNCONNECTED After the 'Fixes' commit mentioned below, which is a partial backport, the MPTCP worker was no longer marking the first subflow as "UNCONNECTED" when the socket was transitioning to TCP_CLOSE state. As a result, in v6.1, it was no longer possible to reconnect to the just disconnected socket. Continue to do that like before, only for the first subflow. A few refactoring have been done around the 'msk->subflow' in later versions, and it looks like this is not needed to do that there, but still needed in v6.1. Without that, the 'disconnect' tests from the mptcp_connect.sh selftest fail: they repeat the transfer 3 times by reconnecting to the server each time. Fixes: 7857e35ef10e ("mptcp: get rid of msk->subflow") Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cdabb00648bd2..125825db642cc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2440,6 +2440,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { __mptcp_subflow_disconnect(ssk, subflow, flags); + if (msk->subflow && ssk == msk->subflow->sk) + msk->subflow->state = SS_UNCONNECTED; release_sock(ssk); goto out; -- GitLab From fb7be5e5ec265a47f6763b6d772873da78bb09d0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 23 Feb 2024 17:14:11 +0100 Subject: [PATCH 0571/2290] mptcp: map v4 address to v6 when destroying subflow commit 535d620ea5ff1a033dc64ee3d912acadc7470619 upstream. Address family of server side mismatches with that of client side, like in "userspace pm add & remove address" test: userspace_pm_add_addr $ns1 10.0.2.1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED That's because on the server side, the family is set to AF_INET6 and the v4 address is mapped in a v6 one. This patch fixes this issue. In mptcp_pm_nl_subflow_destroy_doit(), before checking local address family with remote address family, map an IPv4 address to an IPv6 address if the pair is a v4-mapped address. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/387 Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-1-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 67eccc141a6c1..414ed70e7ba2e 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -489,6 +489,16 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) goto destroy_err; } +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { + ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6); + addr_l.family = AF_INET6; + } + if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) { + ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); + addr_r.family = AF_INET6; + } +#endif if (addr_l.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); err = -EINVAL; -- GitLab From 84a3c10a0c79ede027b030f61a89b6ab7cf98226 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 23 Feb 2024 17:14:14 +0100 Subject: [PATCH 0572/2290] mptcp: push at DSS boundaries commit b9cd26f640a308ea314ad23532de9a8592cd09d2 upstream. when inserting not contiguous data in the subflow write queue, the protocol creates a new skb and prevent the TCP stack from merging it later with already queued skbs by setting the EOR marker. Still no push flag is explicitly set at the end of previous GSO packet, making the aggregation on the receiver side sub-optimal - and packetdrill self-tests less predictable. Explicitly mark the end of not contiguous DSS with the push flag. Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-4-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 125825db642cc..be53e5460962e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1319,6 +1319,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext = skb_ext_find(skb, SKB_EXT_MPTCP); if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; + tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } -- GitLab From 03ad085eb14db2ddc4de5d9474426d258dc53954 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 23 Feb 2024 17:14:17 +0100 Subject: [PATCH 0573/2290] selftests: mptcp: join: add ss mptcp support check commit 9480f388a2ef54fba911d9325372abd69a328601 upstream. Commands 'ss -M' are used in script mptcp_join.sh to display only MPTCP sockets. So it must be checked if ss tool supports MPTCP in this script. Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-7-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2107579e2939d..a20dca9d26d68 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -144,6 +144,11 @@ check_tools() exit $ksft_skip fi + if ! ss -h | grep -q MPTCP; then + echo "SKIP: ss tool does not support MPTCP" + exit $ksft_skip + fi + # Use the legacy version if available to support old kernel versions if iptables-legacy -V &> /dev/null; then iptables="iptables-legacy" -- GitLab From a8722cece375838f7067aa929d89a819f7d1ae96 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 23 Feb 2024 17:14:15 +0100 Subject: [PATCH 0574/2290] mptcp: fix snd_wnd initialization for passive socket commit adf1bb78dab55e36d4d557aa2fb446ebcfe9e5ce upstream. Such value should be inherited from the first subflow, but passive sockets always used 'rsk_rcv_wnd'. Fixes: 6f8a612a33e4 ("mptcp: keep track of advertised windows right edge") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-5-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index be53e5460962e..1fc5f6649e32d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3203,7 +3203,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; msk->snd_una = msk->write_seq; - msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; + msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) { -- GitLab From d93fd40c62397326046902a2c5cb75af50882a85 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 23 Feb 2024 17:14:18 +0100 Subject: [PATCH 0575/2290] mptcp: fix double-free on socket dismantle commit 10048689def7e40a4405acda16fdc6477d4ecc5c upstream. when MPTCP server accepts an incoming connection, it clones its listener socket. However, the pointer to 'inet_opt' for the new socket has the same value as the original one: as a consequence, on program exit it's possible to observe the following splat: BUG: KASAN: double-free in inet_sock_destruct+0x54f/0x8b0 Free of addr ffff888485950880 by task swapper/25/0 CPU: 25 PID: 0 Comm: swapper/25 Kdump: loaded Not tainted 6.8.0-rc1+ #609 Hardware name: Supermicro SYS-6027R-72RF/X9DRH-7TF/7F/iTF/iF, BIOS 3.0 07/26/2013 Call Trace: dump_stack_lvl+0x32/0x50 print_report+0xca/0x620 kasan_report_invalid_free+0x64/0x90 __kasan_slab_free+0x1aa/0x1f0 kfree+0xed/0x2e0 inet_sock_destruct+0x54f/0x8b0 __sk_destruct+0x48/0x5b0 rcu_do_batch+0x34e/0xd90 rcu_core+0x559/0xac0 __do_softirq+0x183/0x5a4 irq_exit_rcu+0x12d/0x170 sysvec_apic_timer_interrupt+0x6b/0x80 asm_sysvec_apic_timer_interrupt+0x16/0x20 RIP: 0010:cpuidle_enter_state+0x175/0x300 Code: 30 00 0f 84 1f 01 00 00 83 e8 01 83 f8 ff 75 e5 48 83 c4 18 44 89 e8 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc fb 45 85 ed <0f> 89 60 ff ff ff 48 c1 e5 06 48 c7 43 18 00 00 00 00 48 83 44 2b RSP: 0018:ffff888481cf7d90 EFLAGS: 00000202 RAX: 0000000000000000 RBX: ffff88887facddc8 RCX: 0000000000000000 RDX: 1ffff1110ff588b1 RSI: 0000000000000019 RDI: ffff88887fac4588 RBP: 0000000000000004 R08: 0000000000000002 R09: 0000000000043080 R10: 0009b02ea273363f R11: ffff88887fabf42b R12: ffffffff932592e0 R13: 0000000000000004 R14: 0000000000000000 R15: 00000022c880ec80 cpuidle_enter+0x4a/0xa0 do_idle+0x310/0x410 cpu_startup_entry+0x51/0x60 start_secondary+0x211/0x270 secondary_startup_64_no_verify+0x184/0x18b Allocated by task 6853: kasan_save_stack+0x1c/0x40 kasan_save_track+0x10/0x30 __kasan_kmalloc+0xa6/0xb0 __kmalloc+0x1eb/0x450 cipso_v4_sock_setattr+0x96/0x360 netlbl_sock_setattr+0x132/0x1f0 selinux_netlbl_socket_post_create+0x6c/0x110 selinux_socket_post_create+0x37b/0x7f0 security_socket_post_create+0x63/0xb0 __sock_create+0x305/0x450 __sys_socket_create.part.23+0xbd/0x130 __sys_socket+0x37/0xb0 __x64_sys_socket+0x6f/0xb0 do_syscall_64+0x83/0x160 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Freed by task 6858: kasan_save_stack+0x1c/0x40 kasan_save_track+0x10/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x12c/0x1f0 kfree+0xed/0x2e0 inet_sock_destruct+0x54f/0x8b0 __sk_destruct+0x48/0x5b0 subflow_ulp_release+0x1f0/0x250 tcp_cleanup_ulp+0x6e/0x110 tcp_v4_destroy_sock+0x5a/0x3a0 inet_csk_destroy_sock+0x135/0x390 tcp_fin+0x416/0x5c0 tcp_data_queue+0x1bc8/0x4310 tcp_rcv_state_process+0x15a3/0x47b0 tcp_v4_do_rcv+0x2c1/0x990 tcp_v4_rcv+0x41fb/0x5ed0 ip_protocol_deliver_rcu+0x6d/0x9f0 ip_local_deliver_finish+0x278/0x360 ip_local_deliver+0x182/0x2c0 ip_rcv+0xb5/0x1c0 __netif_receive_skb_one_core+0x16e/0x1b0 process_backlog+0x1e3/0x650 __napi_poll+0xa6/0x500 net_rx_action+0x740/0xbb0 __do_softirq+0x183/0x5a4 The buggy address belongs to the object at ffff888485950880 which belongs to the cache kmalloc-64 of size 64 The buggy address is located 0 bytes inside of 64-byte region [ffff888485950880, ffff8884859508c0) The buggy address belongs to the physical page: page:0000000056d1e95e refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888485950700 pfn:0x485950 flags: 0x57ffffc0000800(slab|node=1|zone=2|lastcpupid=0x1fffff) page_type: 0xffffffff() raw: 0057ffffc0000800 ffff88810004c640 ffffea00121b8ac0 dead000000000006 raw: ffff888485950700 0000000000200019 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888485950780: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff888485950800: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc >ffff888485950880: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ^ ffff888485950900: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff888485950980: 00 00 00 00 00 01 fc fc fc fc fc fc fc fc fc fc Something similar (a refcount underflow) happens with CALIPSO/IPv6. Fix this by duplicating IP / IPv6 options after clone, so that ip{,6}_sock_destruct() doesn't end up freeing the same memory area twice. Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming connections") Cc: stable@vger.kernel.org Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-8-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1fc5f6649e32d..3bc21581486ae 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3169,8 +3169,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } + +static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) +{ + const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; + struct ipv6_pinfo *newnp; + + newnp = inet6_sk(newsk); + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + if (!opt) + net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__); + } + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); +} #endif +static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) +{ + struct ip_options_rcu *inet_opt, *newopt = NULL; + const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; + + newinet = inet_sk(newsk); + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + inet_opt->opt.optlen, GFP_ATOMIC); + if (newopt) + memcpy(newopt, inet_opt, sizeof(*inet_opt) + + inet_opt->opt.optlen); + else + net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); + } + RCU_INIT_POINTER(newinet->inet_opt, newopt); + rcu_read_unlock(); +} + struct sock *mptcp_sk_clone_init(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct sock *ssk, @@ -3191,6 +3233,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_init_sock(nsk); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (nsk->sk_family == AF_INET6) + mptcp_copy_ip6_options(nsk, sk); + else +#endif + mptcp_copy_ip_options(nsk, sk); + msk = mptcp_sk(nsk); msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; -- GitLab From f27d319df055629480b84b9288a502337b6f2a2e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 23 Feb 2024 17:14:19 +0100 Subject: [PATCH 0576/2290] mptcp: fix possible deadlock in subflow diag commit d6a9608af9a75d13243d217f6ce1e30e57d56ffe upstream. Syzbot and Eric reported a lockdep splat in the subflow diag: WARNING: possible circular locking dependency detected 6.8.0-rc4-syzkaller-00212-g40b9385dd8e6 #0 Not tainted syz-executor.2/24141 is trying to acquire lock: ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 but task is already holding lock: ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: inet_diag_dump_icsk+0x39f/0x1f80 net/ipv4/inet_diag.c:1038 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&h->lhash2[i].lock){+.+.}-{2:2}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __inet_hash+0x335/0xbe0 net/ipv4/inet_hashtables.c:743 inet_csk_listen_start+0x23a/0x320 net/ipv4/inet_connection_sock.c:1261 __inet_listen_sk+0x2a2/0x770 net/ipv4/af_inet.c:217 inet_listen+0xa3/0x110 net/ipv4/af_inet.c:239 rds_tcp_listen_init+0x3fd/0x5a0 net/rds/tcp_listen.c:316 rds_tcp_init_net+0x141/0x320 net/rds/tcp.c:577 ops_init+0x352/0x610 net/core/net_namespace.c:136 __register_pernet_operations net/core/net_namespace.c:1214 [inline] register_pernet_operations+0x2cb/0x660 net/core/net_namespace.c:1283 register_pernet_device+0x33/0x80 net/core/net_namespace.c:1370 rds_tcp_init+0x62/0xd0 net/rds/tcp.c:735 do_one_initcall+0x238/0x830 init/main.c:1236 do_initcall_level+0x157/0x210 init/main.c:1298 do_initcalls+0x3f/0x80 init/main.c:1314 kernel_init_freeable+0x42f/0x5d0 init/main.c:1551 kernel_init+0x1d/0x2a0 init/main.c:1441 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 -> #0 (k-sk_lock-AF_INET6){+.+.}-{0:0}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 lock_sock_fast include/net/sock.h:1723 [inline] subflow_get_info+0x166/0xd20 net/mptcp/diag.c:28 tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 inet_sk_diag_fill+0x10ed/0x1e00 net/ipv4/inet_diag.c:345 inet_diag_dump_icsk+0x55b/0x1f80 net/ipv4/inet_diag.c:1061 __inet_diag_dump+0x211/0x3a0 net/ipv4/inet_diag.c:1263 inet_diag_dump_compat+0x1c1/0x2d0 net/ipv4/inet_diag.c:1371 netlink_dump+0x59b/0xc80 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x5df/0x790 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:338 [inline] inet_diag_rcv_msg_compat+0x209/0x4c0 net/ipv4/inet_diag.c:1405 sock_diag_rcv_msg+0xe7/0x410 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 As noted by Eric we can break the lock dependency chain avoid dumping any extended info for the mptcp subflow listener: nothing actually useful is presented there. Fixes: b8adb69a7d29 ("mptcp: fix lockless access in subflow ULP diag") Cc: stable@vger.kernel.org Reported-by: Eric Dumazet Closes: https://lore.kernel.org/netdev/CANn89iJ=Oecw6OZDwmSYc9HJKQ_G32uN11L+oUcMu+TOD5Xiaw@mail.gmail.com/ Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-9-162e87e48497@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/diag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 6ff6f14674aa2..7017dd60659dc 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) bool slow; int err; + if (inet_sk_state_load(sk) == TCP_LISTEN) + return 0; + start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; -- GitLab From 88067197e97af3fcb104dd86030f788ec1b32fdb Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 4 Jan 2023 10:01:38 +0200 Subject: [PATCH 0577/2290] RDMA/core: Refactor rdma_bind_addr commit 8d037973d48c026224ab285e6a06985ccac6f7bf upstream. Refactor rdma_bind_addr function so that it doesn't require that the cma destination address be changed before calling it. So now it will update the destination address internally only when it is really needed and after passing all the required checks. Which in turn results in a cleaner and more sensible call and error handling flows for the functions that call it directly or indirectly. Signed-off-by: Patrisious Haddad Reported-by: Wei Chen Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/3d0e9a2fd62bc10ba02fed1c7c48a48638952320.1672819273.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 253 +++++++++++++++++----------------- 1 file changed, 130 insertions(+), 123 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0773ca7ace247..950c8422aec29 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3547,121 +3547,6 @@ err: return ret; } -static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - struct sockaddr_storage zero_sock = {}; - - if (src_addr && src_addr->sa_family) - return rdma_bind_addr(id, src_addr); - - /* - * When the src_addr is not specified, automatically supply an any addr - */ - zero_sock.ss_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = - (struct sockaddr_in6 *)&zero_sock; - struct sockaddr_in6 *dst_addr6 = - (struct sockaddr_in6 *)dst_addr; - - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = - dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *)&zero_sock)->sib_pkey = - ((struct sockaddr_ib *)dst_addr)->sib_pkey; - } - return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); -} - -/* - * If required, resolve the source address for bind and leave the id_priv in - * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior - * calls made by ULP, a previously bound ID will not be re-bound and src_addr is - * ignored. - */ -static int resolve_prepare_src(struct rdma_id_private *id_priv, - struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - int ret; - - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - /* For a well behaved ULP state will be RDMA_CM_IDLE */ - ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); - if (ret) - goto err_dst; - if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, - RDMA_CM_ADDR_QUERY))) { - ret = -EINVAL; - goto err_dst; - } - } - - if (cma_family(id_priv) != dst_addr->sa_family) { - ret = -EINVAL; - goto err_state; - } - return 0; - -err_state: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); -err_dst: - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return ret; -} - -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, unsigned long timeout_ms) -{ - struct rdma_id_private *id_priv = - container_of(id, struct rdma_id_private, id); - int ret; - - ret = resolve_prepare_src(id_priv, src_addr, dst_addr); - if (ret) - return ret; - - if (cma_any_addr(dst_addr)) { - ret = cma_resolve_loopback(id_priv); - } else { - if (dst_addr->sa_family == AF_IB) { - ret = cma_resolve_ib_addr(id_priv); - } else { - /* - * The FSM can return back to RDMA_CM_ADDR_BOUND after - * rdma_resolve_ip() is called, eg through the error - * path in addr_handler(). If this happens the existing - * request must be canceled before issuing a new one. - * Since canceling a request is a bit slow and this - * oddball path is rare, keep track once a request has - * been issued. The track turns out to be a permanent - * state since this is the only cancel as it is - * immediately before rdma_resolve_ip(). - */ - if (id_priv->used_resolve_ip) - rdma_addr_cancel(&id->route.addr.dev_addr); - else - id_priv->used_resolve_ip = 1; - ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, - &id->route.addr.dev_addr, - timeout_ms, addr_handler, - false, id_priv); - } - } - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); - return ret; -} -EXPORT_SYMBOL(rdma_resolve_addr); - int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; @@ -4064,27 +3949,26 @@ err: } EXPORT_SYMBOL(rdma_listen); -int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +static int rdma_bind_addr_dst(struct rdma_id_private *id_priv, + struct sockaddr *addr, const struct sockaddr *daddr) { - struct rdma_id_private *id_priv; + struct sockaddr *id_daddr; int ret; - struct sockaddr *daddr; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) return -EAFNOSUPPORT; - id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; - ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr); if (ret) goto err1; memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { - ret = cma_translate_addr(addr, &id->route.addr.dev_addr); + ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr); if (ret) goto err1; @@ -4104,8 +3988,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } #endif } - daddr = cma_dst_addr(id_priv); - daddr->sa_family = addr->sa_family; + id_daddr = cma_dst_addr(id_priv); + if (daddr != id_daddr) + memcpy(id_daddr, daddr, rdma_addr_size(addr)); + id_daddr->sa_family = addr->sa_family; ret = cma_get_port(id_priv); if (ret) @@ -4121,6 +4007,127 @@ err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr_dst(id_priv, src_addr, dst_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; + } + return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr); +} + +/* + * If required, resolve the source address for bind and leave the id_priv in + * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior + * calls made by ULP, a previously bound ID will not be re-bound and src_addr is + * ignored. + */ +static int resolve_prepare_src(struct rdma_id_private *id_priv, + struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + int ret; + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ + ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); + if (ret) + return ret; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_ADDR_QUERY))) + return -EINVAL; + + } + + if (cma_family(id_priv) != dst_addr->sa_family) { + ret = -EINVAL; + goto err_state; + } + return 0; + +err_state: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr, unsigned long timeout_ms) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + int ret; + + ret = resolve_prepare_src(id_priv, src_addr, dst_addr); + if (ret) + return ret; + + if (cma_any_addr(dst_addr)) { + ret = cma_resolve_loopback(id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + /* + * The FSM can return back to RDMA_CM_ADDR_BOUND after + * rdma_resolve_ip() is called, eg through the error + * path in addr_handler(). If this happens the existing + * request must be canceled before issuing a new one. + * Since canceling a request is a bit slow and this + * oddball path is rare, keep track once a request has + * been issued. The track turns out to be a permanent + * state since this is the only cancel as it is + * immediately before rdma_resolve_ip(). + */ + if (id_priv->used_resolve_ip) + rdma_addr_cancel(&id->route.addr.dev_addr); + else + id_priv->used_resolve_ip = 1; + ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, + &id->route.addr.dev_addr, + timeout_ms, addr_handler, + false, id_priv); + } + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv)); +} EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) -- GitLab From 2d9b3e1ae1bed1f20621d5cc95e74746a4afbe7d Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 12 Jul 2023 18:41:33 -0500 Subject: [PATCH 0578/2290] RDMA/core: Update CMA destination address on rdma_resolve_addr commit 0e15863015d97c1ee2cc29d599abcc7fa2dc3e95 upstream. 8d037973d48c ("RDMA/core: Refactor rdma_bind_addr") intoduces as regression on irdma devices on certain tests which uses rdma CM, such as cmtime. No connections can be established with the MAD QP experiences a fatal error on the active side. The cma destination address is not updated with the dst_addr when ULP on active side calls rdma_bind_addr followed by rdma_resolve_addr. The id_priv state is 'bound' in resolve_prepare_src and update is skipped. This leaves the dgid passed into irdma driver to create an Address Handle (AH) for the MAD QP at 0. The create AH descriptor as well as the ARP cache entry is invalid and HW throws an asynchronous events as result. [ 1207.656888] resolve_prepare_src caller: ucma_resolve_addr+0xff/0x170 [rdma_ucm] daddr=200.0.4.28 id_priv->state=7 [....] [ 1207.680362] ice 0000:07:00.1 rocep7s0f1: caller: irdma_create_ah+0x3e/0x70 [irdma] ah_id=0 arp_idx=0 dest_ip=0.0.0.0 destMAC=00:00:64:ca:b7:52 ipvalid=1 raw=0000:0000:0000:0000:0000:ffff:0000:0000 [ 1207.682077] ice 0000:07:00.1 rocep7s0f1: abnormal ae_id = 0x401 bool qp=1 qp_id = 1, ae_src=5 [ 1207.691657] infiniband rocep7s0f1: Fatal error (1) on MAD QP (1) Fix this by updating the CMA destination address when the ULP calls a resolve address with the CM state already bound. Fixes: 8d037973d48c ("RDMA/core: Refactor rdma_bind_addr") Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230712234133.1343-1-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 950c8422aec29..067d7f42871ff 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4060,6 +4060,8 @@ static int resolve_prepare_src(struct rdma_id_private *id_priv, RDMA_CM_ADDR_QUERY))) return -EINVAL; + } else { + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); } if (cma_family(id_priv) != dst_addr->sa_family) { -- GitLab From e7945d93fece3ae43d2ed47d5ef4e254c8a3b712 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Aug 2022 11:00:16 +0200 Subject: [PATCH 0579/2290] efi: libstub: use EFI_LOADER_CODE region when moving the kernel in memory commit 9cf42bca30e98a1c6c9e8abf876940a551eaa3d1 upstream. The EFI spec is not very clear about which permissions are being given when allocating pages of a certain type. However, it is quite obvious that EFI_LOADER_CODE is more likely to permit execution than EFI_LOADER_DATA, which becomes relevant once we permit booting the kernel proper with the firmware's 1:1 mapping still active. Ostensibly, recent systems such as the Surface Pro X grant executable permissions to EFI_LOADER_CODE regions but not EFI_LOADER_DATA regions. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/alignedmem.c | 5 +++-- drivers/firmware/efi/libstub/arm64-stub.c | 6 ++++-- drivers/firmware/efi/libstub/efistub.h | 6 ++++-- drivers/firmware/efi/libstub/mem.c | 3 ++- drivers/firmware/efi/libstub/randomalloc.c | 5 +++-- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c index 1de9878ddd3a2..174832661251e 100644 --- a/drivers/firmware/efi/libstub/alignedmem.c +++ b/drivers/firmware/efi/libstub/alignedmem.c @@ -22,7 +22,8 @@ * Return: status code */ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, - unsigned long max, unsigned long align) + unsigned long max, unsigned long align, + int memory_type) { efi_physical_addr_t alloc_addr; efi_status_t status; @@ -36,7 +37,7 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, slack = align / EFI_PAGE_SIZE - 1; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, - EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack, + memory_type, size / EFI_PAGE_SIZE + slack, &alloc_addr); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e2f90566b291a..08f46c072da56 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -180,7 +180,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, * locate the kernel at a randomized offset in physical memory. */ status = efi_random_alloc(*reserve_size, min_kimg_align, - reserve_addr, phys_seed); + reserve_addr, phys_seed, + EFI_LOADER_CODE); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { @@ -201,7 +202,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, - ULONG_MAX, min_kimg_align); + ULONG_MAX, min_kimg_align, + EFI_LOADER_CODE); if (status != EFI_SUCCESS) { efi_err("Failed to relocate kernel\n"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 970e86e3aab05..ab505b07e626b 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -880,7 +880,8 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, - unsigned long *addr, unsigned long random_seed); + unsigned long *addr, unsigned long random_seed, + int memory_type); efi_status_t efi_random_get_seed(void); @@ -907,7 +908,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, unsigned long max); efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, - unsigned long max, unsigned long align); + unsigned long max, unsigned long align, + int memory_type); efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index 45841ef55a9f6..03d147f17185b 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -91,7 +91,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE) return efi_allocate_pages_aligned(size, addr, max, - EFI_ALLOC_ALIGN); + EFI_ALLOC_ALIGN, + EFI_LOADER_DATA); alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 9fb5869896be7..ec44bb7e092fa 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -53,7 +53,8 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, - unsigned long random_seed) + unsigned long random_seed, + int memory_type) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -118,7 +119,7 @@ efi_status_t efi_random_alloc(unsigned long size, pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, pages, &target); + memory_type, pages, &target); if (status == EFI_SUCCESS) *addr = target; break; -- GitLab From bad6e66d0701d88a1b7018ca0334b551fb71d74a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:01 +0100 Subject: [PATCH 0580/2290] x86/boot/compressed: Rename efi_thunk_64.S to efi-mixed.S commit cb8bda8ad4438b4bcfcf89697fc84803fb210017 upstream. In preparation for moving the mixed mode specific code out of head_64.S, rename the existing file to clarify that it contains more than just the mixed mode thunk. While at it, clean up the Makefile rules that add it to the build. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-2-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 6 +++--- arch/x86/boot/compressed/{efi_thunk_64.S => efi_mixed.S} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename arch/x86/boot/compressed/{efi_thunk_64.S => efi_mixed.S} (100%) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 15b7b403a4bd0..27c82c78ac260 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -108,11 +108,11 @@ endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o -vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o -efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o +vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_mixed.S similarity index 100% rename from arch/x86/boot/compressed/efi_thunk_64.S rename to arch/x86/boot/compressed/efi_mixed.S -- GitLab From 3bad8dc0ae8db10540290c69bbb3a3f8e6e5aff4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:02 +0100 Subject: [PATCH 0581/2290] x86/boot/compressed: Move 32-bit entrypoint code into .text section commit e2ab9eab324cdf240de89741e4a1aa79919f0196 upstream. Move the code that stores the arguments passed to the EFI entrypoint into the .text section, so that it can be moved into a separate compilation unit in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-3-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 48 +++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b4bd6df29116f..61b1be41867ce 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -303,24 +303,41 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + jmp efi32_entry +SYM_FUNC_END(efi32_stub_entry) + .text +/* + * This is the common EFI stub entry point for mixed mode. + * + * Arguments: %ecx image handle + * %edx EFI system table pointer + * %esi struct bootparams pointer (or NULL when not using + * the EFI handover protocol) + * + * Since this is the point of no return for ordinary execution, no registers + * are considered live except for the function parameters. [Note that the EFI + * stub may still exit and return to the firmware using the Exit() EFI boot + * service.] + */ +SYM_FUNC_START_LOCAL(efi32_entry) call 1f -1: pop %ebp - subl $ rva(1b), %ebp - - movl %esi, rva(efi32_boot_args+8)(%ebp) -SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) - movl %ecx, rva(efi32_boot_args)(%ebp) - movl %edx, rva(efi32_boot_args+4)(%ebp) - movb $0, rva(efi_is64)(%ebp) +1: pop %ebx /* Save firmware GDTR and code/data selectors */ - sgdtl rva(efi32_boot_gdt)(%ebp) - movw %cs, rva(efi32_boot_cs)(%ebp) - movw %ds, rva(efi32_boot_ds)(%ebp) + sgdtl (efi32_boot_gdt - 1b)(%ebx) + movw %cs, (efi32_boot_cs - 1b)(%ebx) + movw %ds, (efi32_boot_ds - 1b)(%ebx) /* Store firmware IDT descriptor */ - sidtl rva(efi32_boot_idt)(%ebp) + sidtl (efi32_boot_idt - 1b)(%ebx) + + /* Store boot arguments */ + leal (efi32_boot_args - 1b)(%ebx), %ebx + movl %ecx, 0(%ebx) + movl %edx, 4(%ebx) + movl %esi, 8(%ebx) + movb $0x0, 12(%ebx) // efi_is64 /* Disable paging */ movl %cr0, %eax @@ -328,7 +345,8 @@ SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) movl %eax, %cr0 jmp startup_32 -SYM_FUNC_END(efi32_stub_entry) +SYM_FUNC_END(efi32_entry) + __HEAD #endif .code64 @@ -847,7 +865,9 @@ SYM_FUNC_START(efi32_pe_entry) */ subl %esi, %ebx movl %ebx, rva(image_offset)(%ebp) // save image_offset - jmp efi32_pe_stub_entry + xorl %esi, %esi + jmp efi32_entry // pass %ecx, %edx, %esi + // no other registers remain live 2: popl %edi // restore callee-save registers popl %ebx -- GitLab From d8950e8e20e006c8cbc4cc1ff81c35921053a8a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:03 +0100 Subject: [PATCH 0582/2290] x86/boot/compressed: Move bootargs parsing out of 32-bit startup code commit 5c3a85f35b583259cf5ca0344cd79c8899ba1bb7 upstream. Move the logic that chooses between the different EFI entrypoints out of the 32-bit boot path, and into a 64-bit helper that can perform the same task much more cleanly. While at it, document the mixed mode boot flow in a code comment. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-4-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 43 ++++++++++++++++++++++++++++ arch/x86/boot/compressed/head_64.S | 24 +++------------- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 67e7edcdfea8f..58ab2e1ffd92a 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -22,6 +22,49 @@ .code64 .text +/* + * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() + * is the first thing that runs after switching to long mode. Depending on + * whether the EFI handover protocol or the compat entry point was used to + * enter the kernel, it will either branch to the 64-bit EFI handover + * entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF + * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a + * struct bootparams pointer as the third argument, so the presence of such a + * pointer is used to disambiguate. + * + * +--------------+ + * +------------------+ +------------+ +------>| efi_pe_entry | + * | efi32_pe_entry |---->| | | +-----------+--+ + * +------------------+ | | +------+----------------+ | + * | startup_32 |---->| startup_64_mixed_mode | | + * +------------------+ | | +------+----------------+ V + * | efi32_stub_entry |---->| | | +------------------+ + * +------------------+ +------------+ +---->| efi64_stub_entry | + * +-------------+----+ + * +------------+ +----------+ | + * | startup_64 |<----| efi_main |<--------------+ + * +------------+ +----------+ + */ +SYM_FUNC_START(startup_64_mixed_mode) + lea efi32_boot_args(%rip), %rdx + mov 0(%rdx), %edi + mov 4(%rdx), %esi + mov 8(%rdx), %edx // saved bootparams pointer + test %edx, %edx + jnz efi64_stub_entry + /* + * efi_pe_entry uses MS calling convention, which requires 32 bytes of + * shadow space on the stack even if all arguments are passed in + * registers. We also need an additional 8 bytes for the space that + * would be occupied by the return address, and this also results in + * the correct stack alignment for entry. + */ + sub $40, %rsp + mov %rdi, %rcx // MS calling convention + mov %rsi, %rdx + jmp efi_pe_entry +SYM_FUNC_END(startup_64_mixed_mode) + SYM_FUNC_START(__efi64_thunk) push %rbp push %rbx diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 61b1be41867ce..71542630f00f5 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -261,25 +261,9 @@ SYM_FUNC_START(startup_32) */ leal rva(startup_64)(%ebp), %eax #ifdef CONFIG_EFI_MIXED - movl rva(efi32_boot_args)(%ebp), %edi - testl %edi, %edi - jz 1f - leal rva(efi64_stub_entry)(%ebp), %eax - movl rva(efi32_boot_args+4)(%ebp), %esi - movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer - testl %edx, %edx - jnz 1f - /* - * efi_pe_entry uses MS calling convention, which requires 32 bytes of - * shadow space on the stack even if all arguments are passed in - * registers. We also need an additional 8 bytes for the space that - * would be occupied by the return address, and this also results in - * the correct stack alignment for entry. - */ - subl $40, %esp - leal rva(efi_pe_entry)(%ebp), %eax - movl %edi, %ecx // MS calling convention - movl %esi, %edx + cmpb $1, rva(efi_is64)(%ebp) + je 1f + leal rva(startup_64_mixed_mode)(%ebp), %eax 1: #endif /* Check if the C-bit position is correct when SEV is active */ @@ -795,7 +779,7 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) SYM_DATA(image_offset, .long 0) #endif #ifdef CONFIG_EFI_MIXED -SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) +SYM_DATA(efi32_boot_args, .long 0, 0, 0) SYM_DATA(efi_is64, .byte 1) #define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -- GitLab From c577208f81c9ddbc5ab1418bfe810680a671fa84 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:04 +0100 Subject: [PATCH 0583/2290] x86/boot/compressed: Move efi32_pe_entry into .text section commit 91592b5c0c2f076ff9d8cc0c14aa563448ac9fc4 upstream. Move efi32_pe_entry() into the .text section, so that it can be moved out of head_64.S and into a separate compilation unit in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-5-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 71542630f00f5..14cd51d397195 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -786,7 +786,7 @@ SYM_DATA(efi_is64, .byte 1) #define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) #define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - __HEAD + .text .code32 SYM_FUNC_START(efi32_pe_entry) /* @@ -808,12 +808,11 @@ SYM_FUNC_START(efi32_pe_entry) call 1f 1: pop %ebx - subl $ rva(1b), %ebx /* Get the loaded image protocol pointer from the image handle */ leal -4(%ebp), %eax pushl %eax // &loaded_image - leal rva(loaded_image_proto)(%ebx), %eax + leal (loaded_image_proto - 1b)(%ebx), %eax pushl %eax // pass the GUID address pushl 8(%ebp) // pass the image handle @@ -842,13 +841,13 @@ SYM_FUNC_START(efi32_pe_entry) movl 12(%ebp), %edx // sys_table movl -4(%ebp), %esi // loaded_image movl LI32_image_base(%esi), %esi // loaded_image->image_base - movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry + leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 /* * We need to set the image_offset variable here since startup_32() will * use it before we get to the 64-bit efi_pe_entry() in C code. */ - subl %esi, %ebx - movl %ebx, rva(image_offset)(%ebp) // save image_offset + subl %esi, %ebp // calculate image_offset + movl %ebp, (image_offset - 1b)(%ebx) // save image_offset xorl %esi, %esi jmp efi32_entry // pass %ecx, %edx, %esi // no other registers remain live -- GitLab From 469b84516cc456fdf003dc99d9a9b0e7eab27c24 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:05 +0100 Subject: [PATCH 0584/2290] x86/boot/compressed: Move efi32_entry out of head_64.S commit 73a6dec80e2acedaef3ca603d4b5799049f6e9f8 upstream. Move the efi32_entry() routine out of head_64.S and into efi-mixed.S, which reduces clutter in the complicated startup routines. It also permits linkage of some symbols used by code to be made local. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-6-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 57 +++++++++++++++++++++++----- arch/x86/boot/compressed/head_64.S | 45 ---------------------- 2 files changed, 47 insertions(+), 55 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 58ab2e1ffd92a..3487484ac1fd5 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -105,7 +105,7 @@ SYM_FUNC_START(__efi64_thunk) /* * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT * and IDT that was installed when the kernel started executing. The - * pointers were saved at the EFI stub entry point in head_64.S. + * pointers were saved by the efi32_entry() routine below. * * Pass the saved DS selector to the 32-bit code, and use far return to * restore the saved CS selector. @@ -217,22 +217,59 @@ SYM_FUNC_START_LOCAL(efi_enter32) lret SYM_FUNC_END(efi_enter32) +/* + * This is the common EFI stub entry point for mixed mode. + * + * Arguments: %ecx image handle + * %edx EFI system table pointer + * %esi struct bootparams pointer (or NULL when not using + * the EFI handover protocol) + * + * Since this is the point of no return for ordinary execution, no registers + * are considered live except for the function parameters. [Note that the EFI + * stub may still exit and return to the firmware using the Exit() EFI boot + * service.] + */ +SYM_FUNC_START(efi32_entry) + call 1f +1: pop %ebx + + /* Save firmware GDTR and code/data selectors */ + sgdtl (efi32_boot_gdt - 1b)(%ebx) + movw %cs, (efi32_boot_cs - 1b)(%ebx) + movw %ds, (efi32_boot_ds - 1b)(%ebx) + + /* Store firmware IDT descriptor */ + sidtl (efi32_boot_idt - 1b)(%ebx) + + /* Store boot arguments */ + leal (efi32_boot_args - 1b)(%ebx), %ebx + movl %ecx, 0(%ebx) + movl %edx, 4(%ebx) + movl %esi, 8(%ebx) + movb $0x0, 12(%ebx) // efi_is64 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + jmp startup_32 +SYM_FUNC_END(efi32_entry) + .data .balign 8 -SYM_DATA_START(efi32_boot_gdt) +SYM_DATA_START_LOCAL(efi32_boot_gdt) .word 0 .quad 0 SYM_DATA_END(efi32_boot_gdt) -SYM_DATA_START(efi32_boot_idt) +SYM_DATA_START_LOCAL(efi32_boot_idt) .word 0 .quad 0 SYM_DATA_END(efi32_boot_idt) -SYM_DATA_START(efi32_boot_cs) - .word 0 -SYM_DATA_END(efi32_boot_cs) - -SYM_DATA_START(efi32_boot_ds) - .word 0 -SYM_DATA_END(efi32_boot_ds) +SYM_DATA_LOCAL(efi32_boot_cs, .word 0) +SYM_DATA_LOCAL(efi32_boot_ds, .word 0) +SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) +SYM_DATA(efi_is64, .byte 1) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 14cd51d397195..2b812028fb2f0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -289,48 +289,6 @@ SYM_FUNC_START(efi32_stub_entry) popl %esi jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) - - .text -/* - * This is the common EFI stub entry point for mixed mode. - * - * Arguments: %ecx image handle - * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) - * - * Since this is the point of no return for ordinary execution, no registers - * are considered live except for the function parameters. [Note that the EFI - * stub may still exit and return to the firmware using the Exit() EFI boot - * service.] - */ -SYM_FUNC_START_LOCAL(efi32_entry) - call 1f -1: pop %ebx - - /* Save firmware GDTR and code/data selectors */ - sgdtl (efi32_boot_gdt - 1b)(%ebx) - movw %cs, (efi32_boot_cs - 1b)(%ebx) - movw %ds, (efi32_boot_ds - 1b)(%ebx) - - /* Store firmware IDT descriptor */ - sidtl (efi32_boot_idt - 1b)(%ebx) - - /* Store boot arguments */ - leal (efi32_boot_args - 1b)(%ebx), %ebx - movl %ecx, 0(%ebx) - movl %edx, 4(%ebx) - movl %esi, 8(%ebx) - movb $0x0, 12(%ebx) // efi_is64 - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - jmp startup_32 -SYM_FUNC_END(efi32_entry) - __HEAD #endif .code64 @@ -779,9 +737,6 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) SYM_DATA(image_offset, .long 0) #endif #ifdef CONFIG_EFI_MIXED -SYM_DATA(efi32_boot_args, .long 0, 0, 0) -SYM_DATA(efi_is64, .byte 1) - #define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) #define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) #define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) -- GitLab From beeeb4655db99ed0eb70f6756518fd202fd12e1a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:06 +0100 Subject: [PATCH 0585/2290] x86/boot/compressed: Move efi32_pe_entry() out of head_64.S commit 7f22ca396778fea9332d83ec2359dbe8396e9a06 upstream. Move the implementation of efi32_pe_entry() into efi-mixed.S, which is a more suitable location that only gets built if EFI mixed mode is actually enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-7-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 82 ++++++++++++++++++++++++++ arch/x86/boot/compressed/head_64.S | 87 +--------------------------- 2 files changed, 83 insertions(+), 86 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 3487484ac1fd5..8844d8ed4b1c7 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -257,6 +257,88 @@ SYM_FUNC_START(efi32_entry) jmp startup_32 SYM_FUNC_END(efi32_entry) +#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) +#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) +#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) + +/* + * efi_status_t efi32_pe_entry(efi_handle_t image_handle, + * efi_system_table_32_t *sys_table) + */ +SYM_FUNC_START(efi32_pe_entry) + pushl %ebp + movl %esp, %ebp + pushl %eax // dummy push to allocate loaded_image + + pushl %ebx // save callee-save registers + pushl %edi + + call verify_cpu // check for long mode support + testl %eax, %eax + movl $0x80000003, %eax // EFI_UNSUPPORTED + jnz 2f + + call 1f +1: pop %ebx + + /* Get the loaded image protocol pointer from the image handle */ + leal -4(%ebp), %eax + pushl %eax // &loaded_image + leal (loaded_image_proto - 1b)(%ebx), %eax + pushl %eax // pass the GUID address + pushl 8(%ebp) // pass the image handle + + /* + * Note the alignment of the stack frame. + * sys_table + * handle <-- 16-byte aligned on entry by ABI + * return address + * frame pointer + * loaded_image <-- local variable + * saved %ebx <-- 16-byte aligned here + * saved %edi + * &loaded_image + * &loaded_image_proto + * handle <-- 16-byte aligned for call to handle_protocol + */ + + movl 12(%ebp), %eax // sys_table + movl ST32_boottime(%eax), %eax // sys_table->boottime + call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol + addl $12, %esp // restore argument space + testl %eax, %eax + jnz 2f + + movl 8(%ebp), %ecx // image_handle + movl 12(%ebp), %edx // sys_table + movl -4(%ebp), %esi // loaded_image + movl LI32_image_base(%esi), %esi // loaded_image->image_base + leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 + /* + * We need to set the image_offset variable here since startup_32() will + * use it before we get to the 64-bit efi_pe_entry() in C code. + */ + subl %esi, %ebp // calculate image_offset + movl %ebp, (image_offset - 1b)(%ebx) // save image_offset + xorl %esi, %esi + jmp efi32_entry // pass %ecx, %edx, %esi + // no other registers remain live + +2: popl %edi // restore callee-save registers + popl %ebx + leave + RET +SYM_FUNC_END(efi32_pe_entry) + + .section ".rodata" + /* EFI loaded image protocol GUID */ + .balign 4 +SYM_DATA_START_LOCAL(loaded_image_proto) + .long 0x5b1b31a1 + .word 0x9562, 0x11d2 + .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b +SYM_DATA_END(loaded_image_proto) + .data .balign 8 SYM_DATA_START_LOCAL(efi32_boot_gdt) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2b812028fb2f0..d04d981c2b9b6 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -689,6 +689,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) jmp 1b SYM_FUNC_END(.Lno_longmode) + .globl verify_cpu #include "../../kernel/verify_cpu.S" .data @@ -736,92 +737,6 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) #ifdef CONFIG_EFI_STUB SYM_DATA(image_offset, .long 0) #endif -#ifdef CONFIG_EFI_MIXED -#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) -#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - - .text - .code32 -SYM_FUNC_START(efi32_pe_entry) -/* - * efi_status_t efi32_pe_entry(efi_handle_t image_handle, - * efi_system_table_32_t *sys_table) - */ - - pushl %ebp - movl %esp, %ebp - pushl %eax // dummy push to allocate loaded_image - - pushl %ebx // save callee-save registers - pushl %edi - - call verify_cpu // check for long mode support - testl %eax, %eax - movl $0x80000003, %eax // EFI_UNSUPPORTED - jnz 2f - - call 1f -1: pop %ebx - - /* Get the loaded image protocol pointer from the image handle */ - leal -4(%ebp), %eax - pushl %eax // &loaded_image - leal (loaded_image_proto - 1b)(%ebx), %eax - pushl %eax // pass the GUID address - pushl 8(%ebp) // pass the image handle - - /* - * Note the alignment of the stack frame. - * sys_table - * handle <-- 16-byte aligned on entry by ABI - * return address - * frame pointer - * loaded_image <-- local variable - * saved %ebx <-- 16-byte aligned here - * saved %edi - * &loaded_image - * &loaded_image_proto - * handle <-- 16-byte aligned for call to handle_protocol - */ - - movl 12(%ebp), %eax // sys_table - movl ST32_boottime(%eax), %eax // sys_table->boottime - call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol - addl $12, %esp // restore argument space - testl %eax, %eax - jnz 2f - - movl 8(%ebp), %ecx // image_handle - movl 12(%ebp), %edx // sys_table - movl -4(%ebp), %esi // loaded_image - movl LI32_image_base(%esi), %esi // loaded_image->image_base - leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 - /* - * We need to set the image_offset variable here since startup_32() will - * use it before we get to the 64-bit efi_pe_entry() in C code. - */ - subl %esi, %ebp // calculate image_offset - movl %ebp, (image_offset - 1b)(%ebx) // save image_offset - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi - // no other registers remain live - -2: popl %edi // restore callee-save registers - popl %ebx - leave - RET -SYM_FUNC_END(efi32_pe_entry) - - .section ".rodata" - /* EFI loaded image protocol GUID */ - .balign 4 -SYM_DATA_START_LOCAL(loaded_image_proto) - .long 0x5b1b31a1 - .word 0x9562, 0x11d2 - .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b -SYM_DATA_END(loaded_image_proto) -#endif #ifdef CONFIG_AMD_MEM_ENCRYPT __HEAD -- GitLab From ef12d049fa7b429a0f1842307e921da30ff2e97b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:07 +0100 Subject: [PATCH 0586/2290] x86/boot/compressed, efi: Merge multiple definitions of image_offset into one commit 4b52016247aeaa55ca3e3bc2e03cd91114c145c2 upstream. There is no need for head_32.S and head_64.S both declaring a copy of the global 'image_offset' variable, so drop those and make the extern C declaration the definition. When image_offset is moved to the .c file, it needs to be placed particularly in the .data section because it lands by default in the .bss section which is cleared too late, in .Lrelocated, before the first access to it and thus garbage gets read, leading to SEV guests exploding in early boot. This happens only when the SEV guest kernel is loaded through grub. If supplied with qemu's -kernel command line option, that memory is always cleared upfront by qemu and all is fine there. [ bp: Expand commit message with SEV aspect. ] Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-8-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_32.S | 4 ---- arch/x86/boot/compressed/head_64.S | 4 ---- drivers/firmware/efi/libstub/x86-stub.c | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3b354eb9516df..6589ddd4cfaf2 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -208,10 +208,6 @@ SYM_DATA_START_LOCAL(gdt) .quad 0x00cf92000000ffff /* __KERNEL_DS */ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) -#ifdef CONFIG_EFI_STUB -SYM_DATA(image_offset, .long 0) -#endif - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d04d981c2b9b6..2235c3a13f547 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -734,10 +734,6 @@ SYM_DATA_START(boot32_idt) SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) #endif -#ifdef CONFIG_EFI_STUB -SYM_DATA(image_offset, .long 0) -#endif - #ifdef CONFIG_AMD_MEM_ENCRYPT __HEAD .code32 diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 4f0152b11a890..9ae0d6d0c285f 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -23,7 +23,7 @@ const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; -extern u32 image_offset; +u32 image_offset __section(".data"); static efi_loaded_image_t *image = NULL; static efi_status_t -- GitLab From 88035744b91a187bcc23253a73ef3b1ccc08a2f9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:08 +0100 Subject: [PATCH 0587/2290] x86/boot/compressed: Simplify IDT/GDT preserve/restore in the EFI thunk commit 630f337f0c4fd80390e8600adcab31550aea33df upstream. Tweak the asm and remove some redundant instructions. While at it, fix the associated comment for style and correctness. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-9-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 8844d8ed4b1c7..8b02e507d3bb0 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -96,24 +96,20 @@ SYM_FUNC_START(__efi64_thunk) leaq 0x20(%rsp), %rbx sgdt (%rbx) - - addq $16, %rbx - sidt (%rbx) + sidt 16(%rbx) leaq 1f(%rip), %rbp /* - * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT - * and IDT that was installed when the kernel started executing. The - * pointers were saved by the efi32_entry() routine below. + * Switch to IDT and GDT with 32-bit segments. These are the firmware + * GDT and IDT that were installed when the kernel started executing. + * The pointers were saved by the efi32_entry() routine below. * * Pass the saved DS selector to the 32-bit code, and use far return to * restore the saved CS selector. */ - leaq efi32_boot_idt(%rip), %rax - lidt (%rax) - leaq efi32_boot_gdt(%rip), %rax - lgdt (%rax) + lidt efi32_boot_idt(%rip) + lgdt efi32_boot_gdt(%rip) movzwl efi32_boot_ds(%rip), %edx movzwq efi32_boot_cs(%rip), %rax @@ -187,9 +183,7 @@ SYM_FUNC_START_LOCAL(efi_enter32) */ cli - lidtl (%ebx) - subl $16, %ebx - + lidtl 16(%ebx) lgdtl (%ebx) movl %cr4, %eax -- GitLab From 530a4271b7ba5776b7f5a67015ae63a3ba3d2348 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:09 +0100 Subject: [PATCH 0588/2290] x86/boot/compressed: Avoid touching ECX in startup32_set_idt_entry() commit 6aac80a8da46d70f2ae7ff97c9f45a15c7c9b3ef upstream. Avoid touching register %ecx in startup32_set_idt_entry(), by folding the MOV, SHL and ORL instructions into a single ORL which no longer requires a temp register. This permits ECX to be used as a function argument in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-10-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2235c3a13f547..e90cbbb2903d9 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -749,7 +749,6 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) */ SYM_FUNC_START(startup32_set_idt_entry) push %ebx - push %ecx /* IDT entry address to %ebx */ leal rva(boot32_idt)(%ebp), %ebx @@ -758,10 +757,8 @@ SYM_FUNC_START(startup32_set_idt_entry) /* Build IDT entry, lower 4 bytes */ movl %eax, %edx - andl $0x0000ffff, %edx # Target code segment offset [15:0] - movl $__KERNEL32_CS, %ecx # Target code segment selector - shl $16, %ecx - orl %ecx, %edx + andl $0x0000ffff, %edx # Target code segment offset [15:0] + orl $(__KERNEL32_CS << 16), %edx # Target code segment selector /* Store lower 4 bytes to IDT */ movl %edx, (%ebx) @@ -774,7 +771,6 @@ SYM_FUNC_START(startup32_set_idt_entry) /* Store upper 4 bytes to IDT */ movl %edx, 4(%ebx) - pop %ecx pop %ebx RET SYM_FUNC_END(startup32_set_idt_entry) -- GitLab From 29134968f72da9337dff949bba0bdb0c5134ba0a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:10 +0100 Subject: [PATCH 0589/2290] x86/boot/compressed: Pull global variable reference into startup32_load_idt() commit d73a257f7f86871c3aac24dc20538e3983096647 upstream. In preparation for moving startup32_load_idt() out of head_64.S and turning it into an ordinary function using the ordinary 32-bit calling convention, pull the global variable reference to boot32_idt up into startup32_load_idt() so that startup32_set_idt_entry() does not need to discover its own runtime physical address, which will no longer be correlated with startup_32 once this code is moved into .text. While at it, give startup32_set_idt_entry() static linkage. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-11-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e90cbbb2903d9..9d1e1ccab2566 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -744,16 +744,11 @@ SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) * * %eax: Handler address * %edx: Vector number - * - * Physical offset is expected in %ebp + * %ecx: IDT address */ -SYM_FUNC_START(startup32_set_idt_entry) - push %ebx - - /* IDT entry address to %ebx */ - leal rva(boot32_idt)(%ebp), %ebx - shl $3, %edx - addl %edx, %ebx +SYM_FUNC_START_LOCAL(startup32_set_idt_entry) + /* IDT entry address to %ecx */ + leal (%ecx, %edx, 8), %ecx /* Build IDT entry, lower 4 bytes */ movl %eax, %edx @@ -761,7 +756,7 @@ SYM_FUNC_START(startup32_set_idt_entry) orl $(__KERNEL32_CS << 16), %edx # Target code segment selector /* Store lower 4 bytes to IDT */ - movl %edx, (%ebx) + movl %edx, (%ecx) /* Build IDT entry, upper 4 bytes */ movl %eax, %edx @@ -769,15 +764,16 @@ SYM_FUNC_START(startup32_set_idt_entry) orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate /* Store upper 4 bytes to IDT */ - movl %edx, 4(%ebx) + movl %edx, 4(%ecx) - pop %ebx RET SYM_FUNC_END(startup32_set_idt_entry) #endif SYM_FUNC_START(startup32_load_idt) #ifdef CONFIG_AMD_MEM_ENCRYPT + leal rva(boot32_idt)(%ebp), %ecx + /* #VC handler */ leal rva(startup32_vc_handler)(%ebp), %eax movl $X86_TRAP_VC, %edx -- GitLab From 2e47116315a08bd5fa451bbeb66cb14ffc3f0de1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:11 +0100 Subject: [PATCH 0590/2290] x86/boot/compressed: Move startup32_load_idt() into .text section commit c6355995ba471d7ad574174e593192ce805c7e1a upstream. Convert startup32_load_idt() into an ordinary function and move it into the .text section. This involves turning the rva() immediates into ones derived from a local label, and preserving/restoring the %ebp and %ebx as per the calling convention. Also move the #ifdef to the only existing call site. This makes it clear that the function call does nothing if support for memory encryption is not compiled in. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-12-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 31 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 9d1e1ccab2566..53f3f01f88af2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -118,7 +118,9 @@ SYM_FUNC_START(startup_32) 1: /* Setup Exception handling for SEV-ES */ +#ifdef CONFIG_AMD_MEM_ENCRYPT call startup32_load_idt +#endif /* Make sure cpu supports long mode. */ call verify_cpu @@ -732,10 +734,8 @@ SYM_DATA_START(boot32_idt) .quad 0 .endr SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) -#endif -#ifdef CONFIG_AMD_MEM_ENCRYPT - __HEAD + .text .code32 /* * Write an IDT entry into boot32_idt @@ -768,24 +768,32 @@ SYM_FUNC_START_LOCAL(startup32_set_idt_entry) RET SYM_FUNC_END(startup32_set_idt_entry) -#endif SYM_FUNC_START(startup32_load_idt) -#ifdef CONFIG_AMD_MEM_ENCRYPT - leal rva(boot32_idt)(%ebp), %ecx + push %ebp + push %ebx + + call 1f +1: pop %ebp + + leal (boot32_idt - 1b)(%ebp), %ebx /* #VC handler */ - leal rva(startup32_vc_handler)(%ebp), %eax + leal (startup32_vc_handler - 1b)(%ebp), %eax movl $X86_TRAP_VC, %edx + movl %ebx, %ecx call startup32_set_idt_entry /* Load IDT */ - leal rva(boot32_idt)(%ebp), %eax - movl %eax, rva(boot32_idt_desc+2)(%ebp) - lidt rva(boot32_idt_desc)(%ebp) -#endif + leal (boot32_idt_desc - 1b)(%ebp), %ecx + movl %ebx, 2(%ecx) + lidt (%ecx) + + pop %ebx + pop %ebp RET SYM_FUNC_END(startup32_load_idt) +#endif /* * Check for the correct C-bit position when the startup_32 boot-path is used. @@ -804,6 +812,7 @@ SYM_FUNC_END(startup32_load_idt) * succeed. An incorrect C-bit position will map all memory unencrypted, so that * the compare will use the encrypted random data and fail. */ + __HEAD SYM_FUNC_START(startup32_check_sev_cbit) #ifdef CONFIG_AMD_MEM_ENCRYPT pushl %eax -- GitLab From 801873f1750aa1cc42e290d8a818e340fd7d0987 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:12 +0100 Subject: [PATCH 0591/2290] x86/boot/compressed: Move startup32_load_idt() out of head_64.S commit 9ea813be3d345dfb8ac5bf6fbb29e6a63647a39d upstream. Now that startup32_load_idt() has been refactored into an ordinary callable function, move it into mem-encrypt.S where it belongs. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-13-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 72 -------------------------- arch/x86/boot/compressed/mem_encrypt.S | 72 +++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 73 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 53f3f01f88af2..b1d00f862af91 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -723,78 +723,6 @@ SYM_DATA_START(boot_idt) .endr SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) -#ifdef CONFIG_AMD_MEM_ENCRYPT -SYM_DATA_START(boot32_idt_desc) - .word boot32_idt_end - boot32_idt - 1 - .long 0 -SYM_DATA_END(boot32_idt_desc) - .balign 8 -SYM_DATA_START(boot32_idt) - .rept 32 - .quad 0 - .endr -SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) - - .text - .code32 -/* - * Write an IDT entry into boot32_idt - * - * Parameters: - * - * %eax: Handler address - * %edx: Vector number - * %ecx: IDT address - */ -SYM_FUNC_START_LOCAL(startup32_set_idt_entry) - /* IDT entry address to %ecx */ - leal (%ecx, %edx, 8), %ecx - - /* Build IDT entry, lower 4 bytes */ - movl %eax, %edx - andl $0x0000ffff, %edx # Target code segment offset [15:0] - orl $(__KERNEL32_CS << 16), %edx # Target code segment selector - - /* Store lower 4 bytes to IDT */ - movl %edx, (%ecx) - - /* Build IDT entry, upper 4 bytes */ - movl %eax, %edx - andl $0xffff0000, %edx # Target code segment offset [31:16] - orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate - - /* Store upper 4 bytes to IDT */ - movl %edx, 4(%ecx) - - RET -SYM_FUNC_END(startup32_set_idt_entry) - -SYM_FUNC_START(startup32_load_idt) - push %ebp - push %ebx - - call 1f -1: pop %ebp - - leal (boot32_idt - 1b)(%ebp), %ebx - - /* #VC handler */ - leal (startup32_vc_handler - 1b)(%ebp), %eax - movl $X86_TRAP_VC, %edx - movl %ebx, %ecx - call startup32_set_idt_entry - - /* Load IDT */ - leal (boot32_idt_desc - 1b)(%ebp), %ecx - movl %ebx, 2(%ecx) - lidt (%ecx) - - pop %ebx - pop %ebp - RET -SYM_FUNC_END(startup32_load_idt) -#endif - /* * Check for the correct C-bit position when the startup_32 boot-path is used. * diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index a73e4d783cae2..6747e5e4c6966 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -12,6 +12,8 @@ #include #include #include +#include +#include .text .code32 @@ -98,7 +100,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid) jmp 1b SYM_CODE_END(sev_es_req_cpuid) -SYM_CODE_START(startup32_vc_handler) +SYM_CODE_START_LOCAL(startup32_vc_handler) pushl %eax pushl %ebx pushl %ecx @@ -184,6 +186,63 @@ SYM_CODE_START(startup32_vc_handler) jmp .Lfail SYM_CODE_END(startup32_vc_handler) +/* + * Write an IDT entry into boot32_idt + * + * Parameters: + * + * %eax: Handler address + * %edx: Vector number + * %ecx: IDT address + */ +SYM_FUNC_START_LOCAL(startup32_set_idt_entry) + /* IDT entry address to %ecx */ + leal (%ecx, %edx, 8), %ecx + + /* Build IDT entry, lower 4 bytes */ + movl %eax, %edx + andl $0x0000ffff, %edx # Target code segment offset [15:0] + orl $(__KERNEL32_CS << 16), %edx # Target code segment selector + + /* Store lower 4 bytes to IDT */ + movl %edx, (%ecx) + + /* Build IDT entry, upper 4 bytes */ + movl %eax, %edx + andl $0xffff0000, %edx # Target code segment offset [31:16] + orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate + + /* Store upper 4 bytes to IDT */ + movl %edx, 4(%ecx) + + RET +SYM_FUNC_END(startup32_set_idt_entry) + +SYM_FUNC_START(startup32_load_idt) + push %ebp + push %ebx + + call 1f +1: pop %ebp + + leal (boot32_idt - 1b)(%ebp), %ebx + + /* #VC handler */ + leal (startup32_vc_handler - 1b)(%ebp), %eax + movl $X86_TRAP_VC, %edx + movl %ebx, %ecx + call startup32_set_idt_entry + + /* Load IDT */ + leal (boot32_idt_desc - 1b)(%ebp), %ecx + movl %ebx, 2(%ecx) + lidt (%ecx) + + pop %ebx + pop %ebp + RET +SYM_FUNC_END(startup32_load_idt) + .code64 #include "../../kernel/sev_verify_cbit.S" @@ -195,4 +254,15 @@ SYM_CODE_END(startup32_vc_handler) SYM_DATA(sme_me_mask, .quad 0) SYM_DATA(sev_status, .quad 0) SYM_DATA(sev_check_data, .quad 0) + +SYM_DATA_START_LOCAL(boot32_idt) + .rept 32 + .quad 0 + .endr +SYM_DATA_END(boot32_idt) + +SYM_DATA_START_LOCAL(boot32_idt_desc) + .word . - boot32_idt - 1 + .long 0 +SYM_DATA_END(boot32_idt_desc) #endif -- GitLab From e840ae3dc277f7f4ae38f600e7f5da7f169b8d7c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:13 +0100 Subject: [PATCH 0592/2290] x86/boot/compressed: Move startup32_check_sev_cbit() into .text commit b5d854cd4b6a314edd6c15dabc4233b84a0f8e5e upstream. Move startup32_check_sev_cbit() into the .text section and turn it into an ordinary function using the ordinary 32-bit calling convention, instead of saving/restoring the registers that are known to be live at the only call site. This improves maintainability, and makes it possible to move this function out of head_64.S and into a separate compilation unit that is specific to memory encryption. Note that this requires the call site to be moved before the mixed mode check, as %eax will be live otherwise. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-14-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 35 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b1d00f862af91..c7655a9dfd3f8 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -251,6 +251,11 @@ SYM_FUNC_START(startup_32) movl $__BOOT_TSS, %eax ltr %ax +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* Check if the C-bit position is correct when SEV is active */ + call startup32_check_sev_cbit +#endif + /* * Setup for the jump to 64bit mode * @@ -268,8 +273,6 @@ SYM_FUNC_START(startup_32) leal rva(startup_64_mixed_mode)(%ebp), %eax 1: #endif - /* Check if the C-bit position is correct when SEV is active */ - call startup32_check_sev_cbit pushl $__KERNEL_CS pushl %eax @@ -740,16 +743,17 @@ SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) * succeed. An incorrect C-bit position will map all memory unencrypted, so that * the compare will use the encrypted random data and fail. */ - __HEAD -SYM_FUNC_START(startup32_check_sev_cbit) #ifdef CONFIG_AMD_MEM_ENCRYPT - pushl %eax + .text +SYM_FUNC_START(startup32_check_sev_cbit) pushl %ebx - pushl %ecx - pushl %edx + pushl %ebp + + call 0f +0: popl %ebp /* Check for non-zero sev_status */ - movl rva(sev_status)(%ebp), %eax + movl (sev_status - 0b)(%ebp), %eax testl %eax, %eax jz 4f @@ -764,17 +768,18 @@ SYM_FUNC_START(startup32_check_sev_cbit) jnc 2b /* Store to memory and keep it in the registers */ - movl %eax, rva(sev_check_data)(%ebp) - movl %ebx, rva(sev_check_data+4)(%ebp) + leal (sev_check_data - 0b)(%ebp), %ebp + movl %eax, 0(%ebp) + movl %ebx, 4(%ebp) /* Enable paging to see if encryption is active */ movl %cr0, %edx /* Backup %cr0 in %edx */ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ movl %ecx, %cr0 - cmpl %eax, rva(sev_check_data)(%ebp) + cmpl %eax, 0(%ebp) jne 3f - cmpl %ebx, rva(sev_check_data+4)(%ebp) + cmpl %ebx, 4(%ebp) jne 3f movl %edx, %cr0 /* Restore previous %cr0 */ @@ -786,13 +791,11 @@ SYM_FUNC_START(startup32_check_sev_cbit) jmp 3b 4: - popl %edx - popl %ecx + popl %ebp popl %ebx - popl %eax -#endif RET SYM_FUNC_END(startup32_check_sev_cbit) +#endif /* * Stack and heap for uncompression -- GitLab From 0912dce9ed4e8a6442fb39627cd37ca5a25beec5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:14 +0100 Subject: [PATCH 0593/2290] x86/boot/compressed: Move startup32_check_sev_cbit() out of head_64.S commit 9d7eaae6a071ff1f718e0aa5e610bb712f8cc632 upstream. Now that the startup32_check_sev_cbit() routine can execute from anywhere and behaves like an ordinary function, it can be moved where it belongs. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-15-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 71 -------------------------- arch/x86/boot/compressed/mem_encrypt.S | 68 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 71 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c7655a9dfd3f8..43a82df0e9d63 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -726,77 +726,6 @@ SYM_DATA_START(boot_idt) .endr SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) -/* - * Check for the correct C-bit position when the startup_32 boot-path is used. - * - * The check makes use of the fact that all memory is encrypted when paging is - * disabled. The function creates 64 bits of random data using the RDRAND - * instruction. RDRAND is mandatory for SEV guests, so always available. If the - * hypervisor violates that the kernel will crash right here. - * - * The 64 bits of random data are stored to a memory location and at the same - * time kept in the %eax and %ebx registers. Since encryption is always active - * when paging is off the random data will be stored encrypted in main memory. - * - * Then paging is enabled. When the C-bit position is correct all memory is - * still mapped encrypted and comparing the register values with memory will - * succeed. An incorrect C-bit position will map all memory unencrypted, so that - * the compare will use the encrypted random data and fail. - */ -#ifdef CONFIG_AMD_MEM_ENCRYPT - .text -SYM_FUNC_START(startup32_check_sev_cbit) - pushl %ebx - pushl %ebp - - call 0f -0: popl %ebp - - /* Check for non-zero sev_status */ - movl (sev_status - 0b)(%ebp), %eax - testl %eax, %eax - jz 4f - - /* - * Get two 32-bit random values - Don't bail out if RDRAND fails - * because it is better to prevent forward progress if no random value - * can be gathered. - */ -1: rdrand %eax - jnc 1b -2: rdrand %ebx - jnc 2b - - /* Store to memory and keep it in the registers */ - leal (sev_check_data - 0b)(%ebp), %ebp - movl %eax, 0(%ebp) - movl %ebx, 4(%ebp) - - /* Enable paging to see if encryption is active */ - movl %cr0, %edx /* Backup %cr0 in %edx */ - movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ - movl %ecx, %cr0 - - cmpl %eax, 0(%ebp) - jne 3f - cmpl %ebx, 4(%ebp) - jne 3f - - movl %edx, %cr0 /* Restore previous %cr0 */ - - jmp 4f - -3: /* Check failed - hlt the machine */ - hlt - jmp 3b - -4: - popl %ebp - popl %ebx - RET -SYM_FUNC_END(startup32_check_sev_cbit) -#endif - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index 6747e5e4c6966..14cf04a1ed091 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -243,6 +243,74 @@ SYM_FUNC_START(startup32_load_idt) RET SYM_FUNC_END(startup32_load_idt) +/* + * Check for the correct C-bit position when the startup_32 boot-path is used. + * + * The check makes use of the fact that all memory is encrypted when paging is + * disabled. The function creates 64 bits of random data using the RDRAND + * instruction. RDRAND is mandatory for SEV guests, so always available. If the + * hypervisor violates that the kernel will crash right here. + * + * The 64 bits of random data are stored to a memory location and at the same + * time kept in the %eax and %ebx registers. Since encryption is always active + * when paging is off the random data will be stored encrypted in main memory. + * + * Then paging is enabled. When the C-bit position is correct all memory is + * still mapped encrypted and comparing the register values with memory will + * succeed. An incorrect C-bit position will map all memory unencrypted, so that + * the compare will use the encrypted random data and fail. + */ +SYM_FUNC_START(startup32_check_sev_cbit) + pushl %ebx + pushl %ebp + + call 0f +0: popl %ebp + + /* Check for non-zero sev_status */ + movl (sev_status - 0b)(%ebp), %eax + testl %eax, %eax + jz 4f + + /* + * Get two 32-bit random values - Don't bail out if RDRAND fails + * because it is better to prevent forward progress if no random value + * can be gathered. + */ +1: rdrand %eax + jnc 1b +2: rdrand %ebx + jnc 2b + + /* Store to memory and keep it in the registers */ + leal (sev_check_data - 0b)(%ebp), %ebp + movl %eax, 0(%ebp) + movl %ebx, 4(%ebp) + + /* Enable paging to see if encryption is active */ + movl %cr0, %edx /* Backup %cr0 in %edx */ + movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ + movl %ecx, %cr0 + + cmpl %eax, 0(%ebp) + jne 3f + cmpl %ebx, 4(%ebp) + jne 3f + + movl %edx, %cr0 /* Restore previous %cr0 */ + + jmp 4f + +3: /* Check failed - hlt the machine */ + hlt + jmp 3b + +4: + popl %ebp + popl %ebx + RET +SYM_FUNC_END(startup32_check_sev_cbit) + .code64 #include "../../kernel/sev_verify_cbit.S" -- GitLab From cac22c9a5e661a000e734af797641375fa181dbc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:15 +0100 Subject: [PATCH 0594/2290] x86/boot/compressed: Adhere to calling convention in get_sev_encryption_bit() commit 30c9ca16a5271ba6f8ad9c86507ff1c789c94677 upstream. Make get_sev_encryption_bit() follow the ordinary i386 calling convention, and only call it if CONFIG_AMD_MEM_ENCRYPT is actually enabled. This clarifies the calling code, and makes it more maintainable. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-16-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 5 +++-- arch/x86/boot/compressed/mem_encrypt.S | 10 ---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 43a82df0e9d63..cd4eb22aa84b1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -180,12 +180,13 @@ SYM_FUNC_START(startup_32) */ /* * If SEV is active then set the encryption mask in the page tables. - * This will insure that when the kernel is copied and decompressed + * This will ensure that when the kernel is copied and decompressed * it will be done so encrypted. */ - call get_sev_encryption_bit xorl %edx, %edx #ifdef CONFIG_AMD_MEM_ENCRYPT + call get_sev_encryption_bit + xorl %edx, %edx testl %eax, %eax jz 1f subl $32, %eax /* Encryption bit is always above bit 31 */ diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index 14cf04a1ed091..e69674588a31c 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -18,12 +18,7 @@ .text .code32 SYM_FUNC_START(get_sev_encryption_bit) - xor %eax, %eax - -#ifdef CONFIG_AMD_MEM_ENCRYPT push %ebx - push %ecx - push %edx movl $0x80000000, %eax /* CPUID to check the highest leaf */ cpuid @@ -54,12 +49,7 @@ SYM_FUNC_START(get_sev_encryption_bit) xor %eax, %eax .Lsev_exit: - pop %edx - pop %ecx pop %ebx - -#endif /* CONFIG_AMD_MEM_ENCRYPT */ - RET SYM_FUNC_END(get_sev_encryption_bit) -- GitLab From 71c43b714fd688ff5ee6d906e5cc38e6a8f2836f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:16 +0100 Subject: [PATCH 0595/2290] x86/boot/compressed: Only build mem_encrypt.S if AMD_MEM_ENCRYPT=y commit 61de13df95901bc58456bc5acdbd3c18c66cf859 upstream. Avoid building the mem_encrypt.o object if memory encryption support is not enabled to begin with. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-17-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/mem_encrypt.S | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 27c82c78ac260..0c9ebf74fac59 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -100,7 +100,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/ident_map_64.o vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o - vmlinux-objs-y += $(obj)/mem_encrypt.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index e69674588a31c..32f7cc8a86254 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -307,7 +307,6 @@ SYM_FUNC_END(startup32_check_sev_cbit) .data -#ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 SYM_DATA(sme_me_mask, .quad 0) SYM_DATA(sev_status, .quad 0) @@ -323,4 +322,3 @@ SYM_DATA_START_LOCAL(boot32_idt_desc) .word . - boot32_idt - 1 .long 0 SYM_DATA_END(boot32_idt_desc) -#endif -- GitLab From a8901f331b8b7f95a7315d033a22bc84c8365f35 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Jan 2023 17:42:54 +0100 Subject: [PATCH 0596/2290] efi: verify that variable services are supported commit bad267f9e18f8e9e628abd1811d2899b1735a4e1 upstream. Current Qualcomm UEFI firmware does not implement the variable services but not all revisions clear the corresponding bits in the RT_PROP table services mask and instead the corresponding calls return EFI_UNSUPPORTED. This leads to efi core registering the generic efivar ops even when the variable services are not supported or when they are accessed through some other interface (e.g. Google SMI or the upcoming Qualcomm SCM implementation). Instead of playing games with init call levels to make sure that the custom implementations are registered after the generic one, make sure that get_next_variable() is actually supported before registering the generic ops. Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efi.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index b7c0e8cc0764f..9077353d1c98d 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -185,8 +185,27 @@ static const struct attribute_group efi_subsys_attr_group = { static struct efivars generic_efivars; static struct efivar_operations generic_ops; +static bool generic_ops_supported(void) +{ + unsigned long name_size; + efi_status_t status; + efi_char16_t name; + efi_guid_t guid; + + name_size = sizeof(name); + + status = efi.get_next_variable(&name_size, &name, &guid); + if (status == EFI_UNSUPPORTED) + return false; + + return true; +} + static int generic_ops_register(void) { + if (!generic_ops_supported()) + return 0; + generic_ops.get_variable = efi.get_variable; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; @@ -200,6 +219,9 @@ static int generic_ops_register(void) static void generic_ops_unregister(void) { + if (!generic_ops.get_variable) + return; + efivars_unregister(&generic_efivars); } -- GitLab From 7bc9533e077e2553264b447189d13f83c47770a0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 22 Nov 2022 17:10:17 +0100 Subject: [PATCH 0597/2290] x86/efi: Make the deprecated EFI handover protocol optional commit cc3fdda2876e58a7e83e558ab51853cf106afb6a upstream. The EFI handover protocol permits a bootloader to invoke the kernel as a EFI PE/COFF application, while passing a bootparams struct as a third argument to the entrypoint function call. This has no basis in the UEFI specification, and there are better ways to pass additional data to a UEFI application (UEFI configuration tables, UEFI variables, UEFI protocols) than going around the StartImage() boot service and jumping to a fixed offset in the loaded image, just to call a different function that takes a third parameter. The reason for handling struct bootparams in the bootloader was that the EFI stub could only load initrd images from the EFI system partition, and so passing it via struct bootparams was needed for loaders like GRUB, which pass the initrd in memory, and may load it from anywhere, including from the network. Another motivation was EFI mixed mode, which could not use the initrd loader in the EFI stub at all due to 32/64 bit incompatibilities (which will be fixed shortly [0]), and could not invoke the ordinary PE/COFF entry point either, for the same reasons. Given that loaders such as GRUB already carried the bootparams handling in order to implement non-EFI boot, retaining that code and just passing bootparams to the EFI stub was a reasonable choice (although defining an alternate entrypoint could have been avoided.) However, the GRUB side changes never made it upstream, and are only shipped by some of the distros in their downstream versions. In the meantime, EFI support has been added to other Linux architecture ports, as well as to U-boot and systemd, including arch-agnostic methods for passing initrd images in memory [1], and for doing mixed mode boot [2], none of them requiring anything like the EFI handover protocol. So given that only out-of-tree distro GRUB relies on this, let's permit it to be omitted from the build, in preparation for retiring it completely at a later date. (Note that systemd-boot does have an implementation as well, but only uses it as a fallback for booting images that do not implement the LoadFile2 based initrd loading method, i.e., v5.8 or older) [0] https://lore.kernel.org/all/20220927085842.2860715-1-ardb@kernel.org/ [1] ec93fc371f01 ("efi/libstub: Add support for loading the initrd from a device path") [2] 97aa276579b2 ("efi/x86: Add true mixed mode entry point into .compat section") Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20221122161017.2426828-18-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 17 +++++++++++++++++ arch/x86/boot/compressed/head_64.S | 4 +++- arch/x86/boot/header.S | 2 +- arch/x86/boot/tools/build.c | 2 ++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4c9bfc4be58d4..2f7af61b49b6c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1982,6 +1982,23 @@ config EFI_STUB See Documentation/admin-guide/efi-stub.rst for more information. +config EFI_HANDOVER_PROTOCOL + bool "EFI handover protocol (DEPRECATED)" + depends on EFI_STUB + default y + help + Select this in order to include support for the deprecated EFI + handover protocol, which defines alternative entry points into the + EFI stub. This is a practice that has no basis in the UEFI + specification, and requires a priori knowledge on the part of the + bootloader about Linux/x86 specific ways of passing the command line + and initrd, and where in memory those assets may be loaded. + + If in doubt, say Y. Even though the corresponding support is not + present in upstream GRUB or other bootloaders, most distros build + GRUB with numerous downstream patches applied, and may rely on the + handover protocol as as result. + config EFI_MIXED bool "EFI mixed-mode support" depends on EFI_STUB && X86_64 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index cd4eb22aa84b1..96c61c9883c36 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -286,7 +286,7 @@ SYM_FUNC_START(startup_32) lret SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_MIXED +#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) .org 0x190 SYM_FUNC_START(efi32_stub_entry) add $0x4, %esp /* Discard return address */ @@ -535,7 +535,9 @@ trampoline_return: SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL .org 0x390 +#endif SYM_FUNC_START(efi64_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f912d77701305..d31982509654d 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -406,7 +406,7 @@ xloadflags: # define XLF1 0 #endif -#ifdef CONFIG_EFI_STUB +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL # ifdef CONFIG_EFI_MIXED # define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64) # else diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a3725ad46c5a0..bd247692b7017 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -290,6 +290,7 @@ static void efi_stub_entry_update(void) { unsigned long addr = efi32_stub_entry; +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL #ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ addr = efi64_stub_entry - 0x200; @@ -298,6 +299,7 @@ static void efi_stub_entry_update(void) #ifdef CONFIG_EFI_MIXED if (efi32_stub_entry != addr) die("32-bit and 64-bit EFI entry points do not match\n"); +#endif #endif put_unaligned_le32(addr, &buf[0x264]); } -- GitLab From 4f3077c3eae7e68e2c0ba6d1bd3f5afeb61eb269 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 9 Jan 2023 18:04:02 +0100 Subject: [PATCH 0598/2290] x86/boot: Robustify calling startup_{32,64}() from the decompressor code commit 7734a0f31e99c433df3063bbb7e8ee5a16a2cb82 upstream. After commit ce697ccee1a8 ("kbuild: remove head-y syntax"), I started digging whether x86 is ready for removing this old cruft. Removing its objects from the list makes the kernel unbootable. This applies only to bzImage, vmlinux still works correctly. The reason is that with no strict object order determined by the linker arguments, not the linker script, startup_64 can be placed not right at the beginning of the kernel. Here's vmlinux.map's beginning before removing: ffffffff81000000 vmlinux.o:(.head.text) ffffffff81000000 startup_64 ffffffff81000070 secondary_startup_64 ffffffff81000075 secondary_startup_64_no_verify ffffffff81000160 verify_cpu and after: ffffffff81000000 vmlinux.o:(.head.text) ffffffff81000000 pvh_start_xen ffffffff81000080 startup_64 ffffffff810000f0 secondary_startup_64 ffffffff810000f5 secondary_startup_64_no_verify Not a problem itself, but the self-extractor code has the address of that function hardcoded the beginning, not looking onto the ELF header, which always contains the address of startup_{32,64}(). So, instead of doing an "act of blind faith", just take the address from the ELF header and extract a relative offset to the entry point. The decompressor function already returns a pointer to the beginning of the kernel to the Asm code, which then jumps to it, so add that offset to the return value. This doesn't change anything for now, but allows to resign from the "head object list" for x86 and makes sure valid Kbuild or any other improvements won't break anything here in general. Signed-off-by: Alexander Lobakin Signed-off-by: Ingo Molnar Tested-by: Jiri Slaby Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20230109170403.4117105-2-alexandr.lobakin@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_32.S | 2 +- arch/x86/boot/compressed/head_64.S | 2 +- arch/x86/boot/compressed/misc.c | 18 +++++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 6589ddd4cfaf2..987ae727cf9f0 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -187,7 +187,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) leal boot_heap@GOTOFF(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call extract_kernel /* returns kernel location in %eax */ + call extract_kernel /* returns kernel entry point in %eax */ addl $24, %esp /* diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 96c61c9883c36..3a970a388adb8 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -580,7 +580,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) movl input_len(%rip), %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movl output_len(%rip), %r9d /* decompressed length, end of relocs */ - call extract_kernel /* returns kernel location in %rax */ + call extract_kernel /* returns kernel entry point in %rax */ popq %rsi /* diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index cf690d8712f4e..014ff222bf4b3 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len, { } #endif -static void parse_elf(void *output) +static size_t parse_elf(void *output) { #ifdef CONFIG_X86_64 Elf64_Ehdr ehdr; @@ -293,10 +293,8 @@ static void parse_elf(void *output) if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || ehdr.e_ident[EI_MAG1] != ELFMAG1 || ehdr.e_ident[EI_MAG2] != ELFMAG2 || - ehdr.e_ident[EI_MAG3] != ELFMAG3) { + ehdr.e_ident[EI_MAG3] != ELFMAG3) error("Kernel is not a valid ELF file"); - return; - } debug_putstr("Parsing ELF... "); @@ -328,6 +326,8 @@ static void parse_elf(void *output) } free(phdrs); + + return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } /* @@ -356,6 +356,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; unsigned long needed_size; + size_t entry_offset; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -456,14 +457,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, debug_putstr("\nDecompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, output_len, NULL, error); - parse_elf(output); + entry_offset = parse_elf(output); handle_relocations(output, output_len, virt_addr); - debug_putstr("done.\nBooting the kernel.\n"); + + debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); + debug_puthex(entry_offset); + debug_putstr(").\n"); /* Disable exception handling before booting the kernel */ cleanup_exception_handling(); - return output; + return output + entry_offset; } void fortify_panic(const char *name) -- GitLab From 51a0710218cea5c7d5528b92ba19e964423c7f5a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:00 +0200 Subject: [PATCH 0599/2290] x86/efistub: Branch straight to kernel entry point from C code commit d2d7a54f69b67cd0a30e0ebb5307cb2de625baac upstream. Instead of returning to the calling code in assembler that does nothing more than perform an indirect call with the boot_params pointer in register ESI/RSI, perform the jump directly from the EFI stub C code. This will allow the asm entrypoint code to be dropped entirely in subsequent patches. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-4-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9ae0d6d0c285f..9422fddfbc8f1 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -279,7 +279,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) #define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) #define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) -void startup_32(struct boot_params *boot_params); +extern const u8 startup_32[], startup_64[]; static void setup_memory_protection(unsigned long image_base, unsigned long image_size) @@ -760,10 +760,19 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return EFI_SUCCESS; } +static void __noreturn enter_kernel(unsigned long kernel_addr, + struct boot_params *boot_params) +{ + /* enter decompressed kernel with boot_params pointer in RSI/ESI */ + asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params)); + + unreachable(); +} + /* - * On success, we return the address of startup_32, which has potentially been - * relocated by efi_relocate_kernel. - * On failure, we exit to the firmware via efi_exit instead of returning. + * On success, this routine will jump to the relocated image directly and never + * return. On failure, it will exit to the firmware via efi_exit() instead of + * returning. */ asmlinkage unsigned long efi_main(efi_handle_t handle, efi_system_table_t *sys_table_arg, @@ -905,7 +914,10 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, goto fail; } - return bzimage_addr; + if (IS_ENABLED(CONFIG_X86_64)) + bzimage_addr += startup_64 - startup_32; + + enter_kernel(bzimage_addr, boot_params); fail: efi_err("efi_main() failed!\n"); -- GitLab From 2cca5f519e3a967f4b5b72e69758521401f021eb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:04 +0200 Subject: [PATCH 0600/2290] x86/decompressor: Store boot_params pointer in callee save register commit 8b63cba746f86a754d66e302c43209cc9b9b6e39 upstream. Instead of pushing and popping %RSI several times to preserve the struct boot_params pointer across the execution of the startup code, move it into a callee save register before the first call into C, and copy it back when needed. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-8-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 42 ++++++++++++------------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 3a970a388adb8..bb268d165906b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -408,10 +408,14 @@ SYM_CODE_START(startup_64) lretq .Lon_kernel_cs: + /* + * RSI holds a pointer to a boot_params structure provided by the + * loader, and this needs to be preserved across C function calls. So + * move it into a callee saved register. + */ + movq %rsi, %r15 - pushq %rsi call load_stage1_idt - popq %rsi #ifdef CONFIG_AMD_MEM_ENCRYPT /* @@ -422,12 +426,10 @@ SYM_CODE_START(startup_64) * CPUID instructions being issued, so go ahead and do that now via * sev_enable(), which will also handle the rest of the SEV-related * detection/setup to ensure that has been done in advance of any dependent - * code. + * code. Pass the boot_params pointer as the first argument. */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ + movq %r15, %rdi call sev_enable - popq %rsi #endif /* @@ -440,13 +442,10 @@ SYM_CODE_START(startup_64) * - Non zero RDX means trampoline needs to enable 5-level * paging. * - * RSI holds real mode data and needs to be preserved across - * this function call. + * Pass the boot_params pointer as the first argument. */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ + movq %r15, %rdi call paging_prepare - popq %rsi /* Save the trampoline address in RCX */ movq %rax, %rcx @@ -459,9 +458,9 @@ SYM_CODE_START(startup_64) * because the architecture does not guarantee that GPRs will retain * their full 64-bit values across a 32-bit mode switch. */ + pushq %r15 pushq %rbp pushq %rbx - pushq %rsi /* * Push the 64-bit address of trampoline_return() onto the new stack. @@ -478,9 +477,9 @@ SYM_CODE_START(startup_64) lretq trampoline_return: /* Restore live 64-bit registers */ - popq %rsi popq %rbx popq %rbp + popq %r15 /* Restore the stack, the 32-bit trampoline uses its own stack */ leaq rva(boot_stack_end)(%rbx), %rsp @@ -490,14 +489,9 @@ trampoline_return: * * RDI is address of the page table to use instead of page table * in trampoline memory (if required). - * - * RSI holds real mode data and needs to be preserved across - * this function call. */ - pushq %rsi leaq rva(top_pgtable)(%rbx), %rdi call cleanup_trampoline - popq %rsi /* Zero EFLAGS */ pushq $0 @@ -507,7 +501,6 @@ trampoline_return: * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushq %rsi leaq (_bss-8)(%rip), %rsi leaq rva(_bss-8)(%rbx), %rdi movl $(_bss - startup_32), %ecx @@ -515,7 +508,6 @@ trampoline_return: std rep movsq cld - popq %rsi /* * The GDT may get overwritten either during the copy we just did or @@ -562,30 +554,28 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) shrq $3, %rcx rep stosq - pushq %rsi call load_stage2_idt /* Pass boot_params to initialize_identity_maps() */ - movq (%rsp), %rdi + movq %r15, %rdi call initialize_identity_maps - popq %rsi /* * Do the extraction, and jump to the new kernel.. */ - pushq %rsi /* Save the real mode argument */ - movq %rsi, %rdi /* real mode address */ + /* pass struct boot_params pointer */ + movq %r15, %rdi leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ leaq input_data(%rip), %rdx /* input_data */ movl input_len(%rip), %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movl output_len(%rip), %r9d /* decompressed length, end of relocs */ call extract_kernel /* returns kernel entry point in %rax */ - popq %rsi /* * Jump to the decompressed kernel. */ + movq %r15, %rsi jmp *%rax SYM_FUNC_END(.Lrelocated) -- GitLab From 99a20f58913a4093c73817f5b364f0cf050a6d75 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:05 +0200 Subject: [PATCH 0601/2290] x86/decompressor: Assign paging related global variables earlier commit 00c6b0978ec182f1a672095930872168b9d5b1e2 upstream. There is no need to defer the assignment of the paging related global variables 'pgdir_shift' and 'ptrs_per_p4d' until after the trampoline is cleaned up, so assign them as soon as it is clear that 5-level paging will be enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-9-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/misc.h | 2 -- arch/x86/boot/compressed/pgtable_64.c | 14 +++++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index a49d9219c06e5..b6e46435b90b8 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -170,9 +170,7 @@ static inline int count_immovable_mem_regions(void) { return 0; } #endif /* ident_map_64.c */ -#ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; -#endif extern void kernel_add_identity_map(unsigned long start, unsigned long end); /* Used by PAGE_KERN* macros: */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 2ac12ff4111bf..f8092d3244c95 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -130,6 +130,11 @@ struct paging_config paging_prepare(void *rmode) native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { paging_config.l5_required = 1; + + /* Initialize variables for 5-level paging */ + __pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; } paging_config.trampoline_start = find_trampoline_placement(); @@ -206,13 +211,4 @@ void cleanup_trampoline(void *pgtable) /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); - - /* Initialize variables for 5-level paging */ -#ifdef CONFIG_X86_5LEVEL - if (__read_cr4() & X86_CR4_LA57) { - __pgtable_l5_enabled = 1; - pgdir_shift = 48; - ptrs_per_p4d = 512; - } -#endif } -- GitLab From 640f27fc2e7bd69d511675c0c62a90bd9ed977cb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:06 +0200 Subject: [PATCH 0602/2290] x86/decompressor: Call trampoline as a normal function commit e8972a76aa90c05a0078043413f806c02fcb3487 upstream. Move the long return to switch to 32-bit mode into the trampoline code so it can be called as an ordinary function. This will allow it to be called directly from C code in a subsequent patch. While at it, reorganize the code somewhat to keep the prologue and epilogue of the function together, making the code a bit easier to follow. Also, given that the trampoline is now entered in 64-bit mode, a simple RIP-relative reference can be used to take the address of the exit point. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-10-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 79 +++++++++++++----------------- arch/x86/boot/compressed/pgtable.h | 2 +- 2 files changed, 36 insertions(+), 45 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index bb268d165906b..381ca6bf62d35 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -450,39 +450,8 @@ SYM_CODE_START(startup_64) /* Save the trampoline address in RCX */ movq %rax, %rcx - /* Set up 32-bit addressable stack */ - leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp - - /* - * Preserve live 64-bit registers on the stack: this is necessary - * because the architecture does not guarantee that GPRs will retain - * their full 64-bit values across a 32-bit mode switch. - */ - pushq %r15 - pushq %rbp - pushq %rbx - - /* - * Push the 64-bit address of trampoline_return() onto the new stack. - * It will be used by the trampoline to return to the main code. Due to - * the 32-bit mode switch, it cannot be kept it in a register either. - */ - leaq trampoline_return(%rip), %rdi - pushq %rdi - - /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ - pushq $__KERNEL32_CS leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax - pushq %rax - lretq -trampoline_return: - /* Restore live 64-bit registers */ - popq %rbx - popq %rbp - popq %r15 - - /* Restore the stack, the 32-bit trampoline uses its own stack */ - leaq rva(boot_stack_end)(%rbx), %rsp + call *%rax /* * cleanup_trampoline() would restore trampoline memory. @@ -579,7 +548,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) jmp *%rax SYM_FUNC_END(.Lrelocated) - .code32 /* * This is the 32-bit trampoline that will be copied over to low memory. * @@ -588,6 +556,39 @@ SYM_FUNC_END(.Lrelocated) * Non zero RDX means trampoline needs to enable 5-level paging. */ SYM_CODE_START(trampoline_32bit_src) + /* + * Preserve live 64-bit registers on the stack: this is necessary + * because the architecture does not guarantee that GPRs will retain + * their full 64-bit values across a 32-bit mode switch. + */ + pushq %r15 + pushq %rbp + pushq %rbx + + /* Set up 32-bit addressable stack and push the old RSP value */ + leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx + movq %rsp, (%rbx) + movq %rbx, %rsp + + /* Take the address of the trampoline exit code */ + leaq .Lret(%rip), %rbx + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq 0f(%rip), %rax + pushq %rax + lretq + +.Lret: + /* Restore the preserved 64-bit registers */ + movq (%rsp), %rsp + popq %rbx + popq %rbp + popq %r15 + retq + + .code32 +0: /* Set up data and stack segments */ movl $__KERNEL_DS, %eax movl %eax, %ds @@ -651,12 +652,9 @@ SYM_CODE_START(trampoline_32bit_src) 1: movl %eax, %cr4 - /* Calculate address of paging_enabled() once we are executing in the trampoline */ - leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax - /* Prepare the stack for far return to Long Mode */ pushl $__KERNEL_CS - pushl %eax + pushl %ebx /* Enable paging again. */ movl %cr0, %eax @@ -666,12 +664,6 @@ SYM_CODE_START(trampoline_32bit_src) lret SYM_CODE_END(trampoline_32bit_src) - .code64 -SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) - /* Return from the trampoline */ - retq -SYM_FUNC_END(.Lpaging_enabled) - /* * The trampoline code has a size limit. * Make sure we fail to compile if the trampoline code grows @@ -679,7 +671,6 @@ SYM_FUNC_END(.Lpaging_enabled) */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE - .code32 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index cc9b2529a0863..91dbb99203fbc 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -6,7 +6,7 @@ #define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 +#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 #define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE -- GitLab From 6083b4c5908e0e6d1b578af04103f64c257ffb82 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:07 +0200 Subject: [PATCH 0603/2290] x86/decompressor: Use standard calling convention for trampoline commit 918a7a04e71745e99a0efc6753e587439b794b29 upstream. Update the trampoline code so its arguments are passed via RDI and RSI, which matches the ordinary SysV calling convention for x86_64. This will allow this code to be called directly from C. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-11-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 27 +++++++++++++-------------- arch/x86/boot/compressed/pgtable.h | 2 +- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 381ca6bf62d35..c28f7ef80ad27 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -447,9 +447,9 @@ SYM_CODE_START(startup_64) movq %r15, %rdi call paging_prepare - /* Save the trampoline address in RCX */ - movq %rax, %rcx - + /* Pass the trampoline address and boolean flag as args #1 and #2 */ + movq %rax, %rdi + movq %rdx, %rsi leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax call *%rax @@ -549,11 +549,14 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) SYM_FUNC_END(.Lrelocated) /* - * This is the 32-bit trampoline that will be copied over to low memory. + * This is the 32-bit trampoline that will be copied over to low memory. It + * will be called using the ordinary 64-bit calling convention from code + * running in 64-bit mode. * * Return address is at the top of the stack (might be above 4G). - * ECX contains the base address of the trampoline memory. - * Non zero RDX means trampoline needs to enable 5-level paging. + * The first argument (EDI) contains the 32-bit addressable base of the + * trampoline memory. A non-zero second argument (ESI) means that the + * trampoline needs to enable 5-level paging. */ SYM_CODE_START(trampoline_32bit_src) /* @@ -600,7 +603,7 @@ SYM_CODE_START(trampoline_32bit_src) movl %eax, %cr0 /* Check what paging mode we want to be in after the trampoline */ - testl %edx, %edx + testl %esi, %esi jz 1f /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ @@ -615,21 +618,17 @@ SYM_CODE_START(trampoline_32bit_src) jz 3f 2: /* Point CR3 to the trampoline's new top level page table */ - leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax + leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax movl %eax, %cr3 3: /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ - pushl %ecx - pushl %edx movl $MSR_EFER, %ecx rdmsr btsl $_EFER_LME, %eax /* Avoid writing EFER if no change was made (for TDX guest) */ jc 1f wrmsr -1: popl %edx - popl %ecx - +1: #ifdef CONFIG_X86_MCE /* * Preserve CR4.MCE if the kernel will enable #MC support. @@ -646,7 +645,7 @@ SYM_CODE_START(trampoline_32bit_src) /* Enable PAE and LA57 (if required) paging modes */ orl $X86_CR4_PAE, %eax - testl %edx, %edx + testl %esi, %esi jz 1f orl $X86_CR4_LA57, %eax 1: diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index 91dbb99203fbc..4e8cef135226b 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -14,7 +14,7 @@ extern unsigned long *trampoline_32bit; -extern void trampoline_32bit_src(void *return_ptr); +extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); #endif /* __ASSEMBLER__ */ #endif /* BOOT_COMPRESSED_PAGETABLE_H */ -- GitLab From 1523291591de054393ff4d732f18abd222ff5949 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:08 +0200 Subject: [PATCH 0604/2290] x86/decompressor: Avoid the need for a stack in the 32-bit trampoline commit bd328aa01ff77a45aeffea5fc4521854291db11f upstream. The 32-bit trampoline no longer uses the stack for anything except performing a far return back to long mode, and preserving the caller's stack pointer value. Currently, the trampoline stack is placed in the same page that carries the trampoline code, which means this page must be mapped writable and executable, and the stack is therefore executable as well. Replace the far return with a far jump, so that the return address can be pre-calculated and patched into the code before it is called. This removes the need for a 32-bit addressable stack entirely, and in a later patch, this will be taken advantage of by removing writable permissions from (and adding executable permissions to) the trampoline code page when booting via the EFI stub. Note that the value of RSP still needs to be preserved explicitly across the switch into 32-bit mode, as the register may get truncated to 32 bits. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-12-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 45 ++++++++++++++++----------- arch/x86/boot/compressed/pgtable.h | 4 +-- arch/x86/boot/compressed/pgtable_64.c | 12 ++++++- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c28f7ef80ad27..40848b817ac5b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -558,6 +558,7 @@ SYM_FUNC_END(.Lrelocated) * trampoline memory. A non-zero second argument (ESI) means that the * trampoline needs to enable 5-level paging. */ + .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) /* * Preserve live 64-bit registers on the stack: this is necessary @@ -568,13 +569,9 @@ SYM_CODE_START(trampoline_32bit_src) pushq %rbp pushq %rbx - /* Set up 32-bit addressable stack and push the old RSP value */ - leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx - movq %rsp, (%rbx) - movq %rbx, %rsp - - /* Take the address of the trampoline exit code */ - leaq .Lret(%rip), %rbx + /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ + movq %rsp, %rbx + shrq $32, %rbx /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ pushq $__KERNEL32_CS @@ -582,9 +579,17 @@ SYM_CODE_START(trampoline_32bit_src) pushq %rax lretq + /* + * The 32-bit code below will do a far jump back to long mode and end + * up here after reconfiguring the number of paging levels. First, the + * stack pointer needs to be restored to its full 64-bit value before + * the callee save register contents can be popped from the stack. + */ .Lret: + shlq $32, %rbx + orq %rbx, %rsp + /* Restore the preserved 64-bit registers */ - movq (%rsp), %rsp popq %rbx popq %rbp popq %r15 @@ -592,11 +597,6 @@ SYM_CODE_START(trampoline_32bit_src) .code32 0: - /* Set up data and stack segments */ - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %ss - /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -651,18 +651,26 @@ SYM_CODE_START(trampoline_32bit_src) 1: movl %eax, %cr4 - /* Prepare the stack for far return to Long Mode */ - pushl $__KERNEL_CS - pushl %ebx - /* Enable paging again. */ movl %cr0, %eax btsl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - lret + /* + * Return to the 64-bit calling code using LJMP rather than LRET, to + * avoid the need for a 32-bit addressable stack. The destination + * address will be adjusted after the template code is copied into a + * 32-bit addressable buffer. + */ +.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) SYM_CODE_END(trampoline_32bit_src) +/* + * This symbol is placed right after trampoline_32bit_src() so its address can + * be used to infer the size of the trampoline code. + */ +SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) + /* * The trampoline code has a size limit. * Make sure we fail to compile if the trampoline code grows @@ -670,6 +678,7 @@ SYM_CODE_END(trampoline_32bit_src) */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE + .text SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index 4e8cef135226b..c6b0903aded05 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -8,13 +8,13 @@ #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE #define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 -#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE - #ifndef __ASSEMBLER__ extern unsigned long *trampoline_32bit; extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); +extern const u16 trampoline_ljmp_imm_offset; + #endif /* __ASSEMBLER__ */ #endif /* BOOT_COMPRESSED_PAGETABLE_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index f8092d3244c95..5198a05aefa8d 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -109,6 +109,7 @@ static unsigned long find_trampoline_placement(void) struct paging_config paging_prepare(void *rmode) { struct paging_config paging_config = {}; + void *tramp_code; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ boot_params = rmode; @@ -148,9 +149,18 @@ struct paging_config paging_prepare(void *rmode) memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); /* Copy trampoline code in place */ - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), + tramp_code = memcpy(trampoline_32bit + + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); + /* + * Avoid the need for a stack in the 32-bit trampoline code, by using + * LJMP rather than LRET to return back to long mode. LJMP takes an + * immediate absolute address, which needs to be adjusted based on the + * placement of the trampoline. + */ + *(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code; + /* * The code below prepares page table in trampoline memory. * -- GitLab From 364d7745974f20ed940918e3129d10c271638153 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:09 +0200 Subject: [PATCH 0605/2290] x86/decompressor: Call trampoline directly from C code commit 64ef578b6b6866bec012544416946533444036c8 upstream. Instead of returning to the asm calling code to invoke the trampoline, call it straight from the C code that sets it up. That way, the struct return type is no longer needed for returning two values, and the call can be made conditional more cleanly in a subsequent patch. This means that all callee save 64-bit registers need to be preserved and restored, as their contents may not survive the legacy mode switch. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-13-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 31 +++++++++++--------------- arch/x86/boot/compressed/pgtable_64.c | 32 +++++++++++---------------- 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 40848b817ac5b..7d0cf029c8246 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -433,25 +433,14 @@ SYM_CODE_START(startup_64) #endif /* - * paging_prepare() sets up the trampoline and checks if we need to - * enable 5-level paging. - * - * paging_prepare() returns a two-quadword structure which lands - * into RDX:RAX: - * - Address of the trampoline is returned in RAX. - * - Non zero RDX means trampoline needs to enable 5-level - * paging. + * configure_5level_paging() updates the number of paging levels using + * a trampoline in 32-bit addressable memory if the current number does + * not match the desired number. * * Pass the boot_params pointer as the first argument. */ movq %r15, %rdi - call paging_prepare - - /* Pass the trampoline address and boolean flag as args #1 and #2 */ - movq %rax, %rdi - movq %rdx, %rsi - leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax - call *%rax + call configure_5level_paging /* * cleanup_trampoline() would restore trampoline memory. @@ -561,11 +550,14 @@ SYM_FUNC_END(.Lrelocated) .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) /* - * Preserve live 64-bit registers on the stack: this is necessary - * because the architecture does not guarantee that GPRs will retain - * their full 64-bit values across a 32-bit mode switch. + * Preserve callee save 64-bit registers on the stack: this is + * necessary because the architecture does not guarantee that GPRs will + * retain their full 64-bit values across a 32-bit mode switch. */ pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 pushq %rbp pushq %rbx @@ -592,6 +584,9 @@ SYM_CODE_START(trampoline_32bit_src) /* Restore the preserved 64-bit registers */ popq %rbx popq %rbp + popq %r12 + popq %r13 + popq %r14 popq %r15 retq diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 5198a05aefa8d..f9cc86b2ee55c 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39; unsigned int __section(".data") ptrs_per_p4d = 1; #endif -struct paging_config { - unsigned long trampoline_start; - unsigned long l5_required; -}; - /* Buffer to preserve trampoline memory */ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; @@ -29,7 +24,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; * purposes. * * Avoid putting the pointer into .bss as it will be cleared between - * paging_prepare() and extract_kernel(). + * configure_5level_paging() and extract_kernel(). */ unsigned long *trampoline_32bit __section(".data"); @@ -106,13 +101,13 @@ static unsigned long find_trampoline_placement(void) return bios_start - TRAMPOLINE_32BIT_SIZE; } -struct paging_config paging_prepare(void *rmode) +asmlinkage void configure_5level_paging(struct boot_params *bp) { - struct paging_config paging_config = {}; - void *tramp_code; + void (*toggle_la57)(void *trampoline, bool enable_5lvl); + bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ - boot_params = rmode; + boot_params = bp; /* * Check if LA57 is desired and supported. @@ -130,7 +125,7 @@ struct paging_config paging_prepare(void *rmode) !cmdline_find_option_bool("no5lvl") && native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { - paging_config.l5_required = 1; + l5_required = true; /* Initialize variables for 5-level paging */ __pgtable_l5_enabled = 1; @@ -138,9 +133,7 @@ struct paging_config paging_prepare(void *rmode) ptrs_per_p4d = 512; } - paging_config.trampoline_start = find_trampoline_placement(); - - trampoline_32bit = (unsigned long *)paging_config.trampoline_start; + trampoline_32bit = (unsigned long *)find_trampoline_placement(); /* Preserve trampoline memory */ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); @@ -149,7 +142,7 @@ struct paging_config paging_prepare(void *rmode) memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); /* Copy trampoline code in place */ - tramp_code = memcpy(trampoline_32bit + + toggle_la57 = memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); @@ -159,7 +152,8 @@ struct paging_config paging_prepare(void *rmode) * immediate absolute address, which needs to be adjusted based on the * placement of the trampoline. */ - *(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code; + *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) += + (unsigned long)toggle_la57; /* * The code below prepares page table in trampoline memory. @@ -175,10 +169,10 @@ struct paging_config paging_prepare(void *rmode) * We are not going to use the page table in trampoline memory if we * are already in the desired paging mode. */ - if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) goto out; - if (paging_config.l5_required) { + if (l5_required) { /* * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. @@ -201,7 +195,7 @@ struct paging_config paging_prepare(void *rmode) } out: - return paging_config; + toggle_la57(trampoline_32bit, l5_required); } void cleanup_trampoline(void *pgtable) -- GitLab From e2fa53a04cc722aaaedbd91cd414d170067fb09d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:10 +0200 Subject: [PATCH 0606/2290] x86/decompressor: Only call the trampoline when changing paging levels commit f97b67a773cd84bd8b55c0a0ec32448a87fc56bb upstream. Since the current and desired number of paging levels are known when the trampoline is being prepared, avoid calling the trampoline at all if it is clear that calling it is not going to result in a change to the number of paging levels. Given that the CPU is already running in long mode, the PAE and LA57 settings are necessarily consistent with the currently active page tables, and other fields in CR4 will be initialized by the startup code in the kernel proper. So limit the manipulation of CR4 to toggling the LA57 bit, which is the only thing that really needs doing at this point in the boot. This also means that there is no need to pass the value of l5_required to toggle_la57(), as it will not be called unless CR4.LA57 needs to toggle. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-14-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 45 +++------------------------ arch/x86/boot/compressed/pgtable_64.c | 22 ++++++------- 2 files changed, 13 insertions(+), 54 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 7d0cf029c8246..4645d7b66b1f1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -390,10 +390,6 @@ SYM_CODE_START(startup_64) * For the trampoline, we need the top page table to reside in lower * memory as we don't have a way to load 64-bit values into CR3 in * 32-bit mode. - * - * We go though the trampoline even if we don't have to: if we're - * already in a desired paging mode. This way the trampoline code gets - * tested on every boot. */ /* Make sure we have GDT with 32-bit code segment */ @@ -544,8 +540,7 @@ SYM_FUNC_END(.Lrelocated) * * Return address is at the top of the stack (might be above 4G). * The first argument (EDI) contains the 32-bit addressable base of the - * trampoline memory. A non-zero second argument (ESI) means that the - * trampoline needs to enable 5-level paging. + * trampoline memory. */ .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) @@ -597,25 +592,10 @@ SYM_CODE_START(trampoline_32bit_src) btrl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - /* Check what paging mode we want to be in after the trampoline */ - testl %esi, %esi - jz 1f - - /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jnz 3f - jmp 2f -1: - /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jz 3f -2: /* Point CR3 to the trampoline's new top level page table */ leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax movl %eax, %cr3 -3: + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ movl $MSR_EFER, %ecx rdmsr @@ -624,26 +604,9 @@ SYM_CODE_START(trampoline_32bit_src) jc 1f wrmsr 1: -#ifdef CONFIG_X86_MCE - /* - * Preserve CR4.MCE if the kernel will enable #MC support. - * Clearing MCE may fault in some environments (that also force #MC - * support). Any machine check that occurs before #MC support is fully - * configured will crash the system regardless of the CR4.MCE value set - * here. - */ + /* Toggle CR4.LA57 */ movl %cr4, %eax - andl $X86_CR4_MCE, %eax -#else - movl $0, %eax -#endif - - /* Enable PAE and LA57 (if required) paging modes */ - orl $X86_CR4_PAE, %eax - testl %esi, %esi - jz 1f - orl $X86_CR4_LA57, %eax -1: + btcl $X86_CR4_LA57_BIT, %eax movl %eax, %cr4 /* Enable paging again. */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index f9cc86b2ee55c..4213473ae5488 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -103,7 +103,7 @@ static unsigned long find_trampoline_placement(void) asmlinkage void configure_5level_paging(struct boot_params *bp) { - void (*toggle_la57)(void *trampoline, bool enable_5lvl); + void (*toggle_la57)(void *trampoline); bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ @@ -133,6 +133,13 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) ptrs_per_p4d = 512; } + /* + * The trampoline will not be used if the paging mode is already set to + * the desired one. + */ + if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + return; + trampoline_32bit = (unsigned long *)find_trampoline_placement(); /* Preserve trampoline memory */ @@ -160,18 +167,8 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) * * The new page table will be used by trampoline code for switching * from 4- to 5-level paging or vice versa. - * - * If switching is not required, the page table is unused: trampoline - * code wouldn't touch CR3. */ - /* - * We are not going to use the page table in trampoline memory if we - * are already in the desired paging mode. - */ - if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) - goto out; - if (l5_required) { /* * For 4- to 5-level paging transition, set up current CR3 as @@ -194,8 +191,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) (void *)src, PAGE_SIZE); } -out: - toggle_la57(trampoline_32bit, l5_required); + toggle_la57(trampoline_32bit); } void cleanup_trampoline(void *pgtable) -- GitLab From df3dec320b7c14780484e824f3ec9c213e4996e1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:11 +0200 Subject: [PATCH 0607/2290] x86/decompressor: Pass pgtable address to trampoline directly commit cb83cece57e1889109dd73ea08ee338668c9d1b8 upstream. The only remaining use of the trampoline address by the trampoline itself is deriving the page table address from it, and this involves adding an offset of 0x0. So simplify this, and pass the new CR3 value directly. This makes the fact that the page table happens to be at the start of the trampoline allocation an implementation detail of the caller. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-15-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 8 ++++---- arch/x86/boot/compressed/pgtable.h | 2 -- arch/x86/boot/compressed/pgtable_64.c | 9 ++++----- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4645d7b66b1f1..148b349c193eb 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -539,8 +539,9 @@ SYM_FUNC_END(.Lrelocated) * running in 64-bit mode. * * Return address is at the top of the stack (might be above 4G). - * The first argument (EDI) contains the 32-bit addressable base of the - * trampoline memory. + * The first argument (EDI) contains the address of the temporary PGD level + * page table in 32-bit addressable memory which will be programmed into + * register CR3. */ .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) @@ -593,8 +594,7 @@ SYM_CODE_START(trampoline_32bit_src) movl %eax, %cr0 /* Point CR3 to the trampoline's new top level page table */ - leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax - movl %eax, %cr3 + movl %edi, %cr3 /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ movl $MSR_EFER, %ecx diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index c6b0903aded05..6d595abe06b34 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -3,8 +3,6 @@ #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) -#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 - #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE #define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 4213473ae5488..eab4e6b568ae0 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -103,7 +103,7 @@ static unsigned long find_trampoline_placement(void) asmlinkage void configure_5level_paging(struct boot_params *bp) { - void (*toggle_la57)(void *trampoline); + void (*toggle_la57)(void *cr3); bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ @@ -174,7 +174,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. */ - trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; + *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC; } else { unsigned long src; @@ -187,8 +187,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) * may be above 4G. */ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), - (void *)src, PAGE_SIZE); + memcpy(trampoline_32bit, (void *)src, PAGE_SIZE); } toggle_la57(trampoline_32bit); @@ -198,7 +197,7 @@ void cleanup_trampoline(void *pgtable) { void *trampoline_pgtable; - trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); + trampoline_pgtable = trampoline_32bit; /* * Move the top level page table out of trampoline memory, -- GitLab From 463b51e90c576cd63269f8420c0a0b09152092e5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:12 +0200 Subject: [PATCH 0608/2290] x86/decompressor: Merge trampoline cleanup with switching code commit 03dda95137d3247564854ad9032c0354273a159d upstream. Now that the trampoline setup code and the actual invocation of it are all done from the C routine, the trampoline cleanup can be merged into it as well, instead of returning to asm just to call another C function. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20230807162720.545787-16-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 14 ++++---------- arch/x86/boot/compressed/pgtable_64.c | 18 ++++-------------- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 148b349c193eb..81458f77131b2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -433,20 +433,14 @@ SYM_CODE_START(startup_64) * a trampoline in 32-bit addressable memory if the current number does * not match the desired number. * - * Pass the boot_params pointer as the first argument. + * Pass the boot_params pointer as the first argument. The second + * argument is the relocated address of the page table to use instead + * of the page table in trampoline memory (if required). */ movq %r15, %rdi + leaq rva(top_pgtable)(%rbx), %rsi call configure_5level_paging - /* - * cleanup_trampoline() would restore trampoline memory. - * - * RDI is address of the page table to use instead of page table - * in trampoline memory (if required). - */ - leaq rva(top_pgtable)(%rbx), %rdi - call cleanup_trampoline - /* Zero EFLAGS */ pushq $0 popfq diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index eab4e6b568ae0..7939eb6e6ce9b 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -101,7 +101,7 @@ static unsigned long find_trampoline_placement(void) return bios_start - TRAMPOLINE_32BIT_SIZE; } -asmlinkage void configure_5level_paging(struct boot_params *bp) +asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) { void (*toggle_la57)(void *cr3); bool l5_required = false; @@ -191,22 +191,12 @@ asmlinkage void configure_5level_paging(struct boot_params *bp) } toggle_la57(trampoline_32bit); -} - -void cleanup_trampoline(void *pgtable) -{ - void *trampoline_pgtable; - - trampoline_pgtable = trampoline_32bit; /* - * Move the top level page table out of trampoline memory, - * if it's there. + * Move the top level page table out of trampoline memory. */ - if ((void *)__native_read_cr3() == trampoline_pgtable) { - memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); - native_write_cr3((unsigned long)pgtable); - } + memcpy(pgtable, trampoline_32bit, PAGE_SIZE); + native_write_cr3((unsigned long)pgtable); /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); -- GitLab From 5c4feadb0011983bbc4587bc61056c7b379d9969 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:16 +0200 Subject: [PATCH 0609/2290] x86/decompressor: Move global symbol references to C code commit 24388292e2d7fae79a0d4183cc91716b851299cf upstream. It is no longer necessary to be cautious when referring to global variables in the position independent decompressor code, now that it is built using PIE codegen and makes an assertion in the linker script that no GOT entries exist (which would require adjustment for the actual runtime load address of the decompressor binary). This means global variables can be referenced directly from C code, instead of having to pass their runtime addresses into C routines from asm code, which needs to happen at each call site. Do so for the code that will be called directly from the EFI stub after a subsequent patch, and avoid the need to duplicate this logic a third time. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-20-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_32.S | 8 -------- arch/x86/boot/compressed/head_64.S | 10 ++-------- arch/x86/boot/compressed/misc.c | 16 +++++++++------- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 987ae727cf9f0..3ecc1bbe971e1 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -179,13 +179,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) */ /* push arguments for extract_kernel: */ - pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */ pushl %ebp /* output address */ - pushl input_len@GOTOFF(%ebx) /* input_len */ - leal input_data@GOTOFF(%ebx), %eax - pushl %eax /* input_data */ - leal boot_heap@GOTOFF(%ebx), %eax - pushl %eax /* heap area */ pushl %esi /* real mode pointer */ call extract_kernel /* returns kernel entry point in %eax */ addl $24, %esp @@ -213,8 +207,6 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) */ .bss .balign 4 -boot_heap: - .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: .fill BOOT_STACK_SIZE, 1, 0 boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 81458f77131b2..fafd0a59f3961 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -511,13 +511,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) /* * Do the extraction, and jump to the new kernel.. */ - /* pass struct boot_params pointer */ + /* pass struct boot_params pointer and output target address */ movq %r15, %rdi - leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ - leaq input_data(%rip), %rdx /* input_data */ - movl input_len(%rip), %ecx /* input_len */ - movq %rbp, %r8 /* output target address */ - movl output_len(%rip), %r9d /* decompressed length, end of relocs */ + movq %rbp, %rsi call extract_kernel /* returns kernel entry point in %rax */ /* @@ -675,8 +671,6 @@ SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) */ .bss .balign 4 -SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) - SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 .balign 16 diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 014ff222bf4b3..e4e3e49fcc374 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,6 +330,11 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); + +extern unsigned char input_data[]; +extern unsigned int input_len, output_len; + /* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel @@ -347,14 +352,11 @@ static size_t parse_elf(void *output) * |-------uncompressed kernel image---------| * */ -asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, - unsigned char *input_data, - unsigned long input_len, - unsigned char *output, - unsigned long output_len) +asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) { const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + memptr heap = (memptr)boot_heap; unsigned long needed_size; size_t entry_offset; @@ -412,7 +414,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * entries. This ensures the full mapped area is usable RAM * and doesn't include any reserved areas. */ - needed_size = max(output_len, kernel_total_size); + needed_size = max_t(unsigned long, output_len, kernel_total_size); #ifdef CONFIG_X86_64 needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); #endif @@ -443,7 +445,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); - if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) + if (virt_addr + needed_size > KERNEL_IMAGE_SIZE) error("Destination virtual address is beyond the kernel mapping area"); #else if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) -- GitLab From bf0ca988e250af95824c121873b2f76fccfc91df Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 7 Aug 2023 18:27:15 +0200 Subject: [PATCH 0610/2290] decompress: Use 8 byte alignment commit 8217ad0a435ff06d651d7298ea8ae8d72388179e upstream. The ZSTD decompressor requires malloc() allocations to be 8 byte aligned, so ensure that this the case. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-19-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/decompress/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 9192986b1a731..ac862422df158 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size) if (!malloc_ptr) malloc_ptr = free_mem_ptr; - malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */ p = (void *)malloc_ptr; malloc_ptr += size; -- GitLab From 04dd4403ff3721ad0bff925116fada773ed6ae69 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 30 Nov 2023 17:34:07 -0500 Subject: [PATCH 0611/2290] drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml commit 5b750b22530fe53bf7fd6a30baacd53ada26911b upstream. Does the same thing as: commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") Reviewed-by: Harry Wentland Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311302107.hUDXVyWT-lkp@intel.com/ Fixes: 67e38874b85b ("drm/amd/display: Increase num voltage states to 40") Signed-off-by: Alex Deucher Cc: Alvin Lee Cc: Hamza Mahfooz Cc: Samson Tam Cc: Harry Wentland Cc: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 6fdf87a6e240f..6c7b286e1123d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -51,8 +51,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) -- GitLab From 831e9e63cc3b90f62d82df854cda8232408526a9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 28 Feb 2024 10:25:49 +1100 Subject: [PATCH 0612/2290] NFS: Fix data corruption caused by congestion. when AOP_WRITEPAGE_ACTIVATE is returned (as NFS does when it detects congestion) it is important that the page is redirtied. nfs_writepage_locked() doesn't do this, so files can become corrupted as writes can be lost. Note that this is not needed in v6.8 as AOP_WRITEPAGE_ACTIVATE cannot be returned. It is needed for kernels v5.18..v6.7. From 6.3 onward the patch is different as it needs to mention "folio", not "page". Reported-and-tested-by: Jacek Tomaka Fixes: 6df25e58532b ("nfs: remove reliance on bdi congestion") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f41d24b54fd1f..6a06066684172 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page, int err; if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) + NFS_SERVER(inode)->write_congested) { + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; + } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, 0, -- GitLab From d03a9855cbe6f41b2928c4df2e33e05f32a8e7fa Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 13 Sep 2022 14:01:51 -0400 Subject: [PATCH 0613/2290] NFSD: Simplify READ_PLUS [ Upstream commit eeadcb75794516839078c28b3730132aeb700ce6 ] Chuck had suggested reverting READ_PLUS so it returns a single DATA segment covering the requested read range. This prepares the server for a future "sparse read" function so support can easily be added without needing to rip out the old READ_PLUS code at the same time. Signed-off-by: Anna Schumaker Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 139 +++++++++++----------------------------------- 1 file changed, 32 insertions(+), 107 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 89a579be042e5..51a598ee68fe1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4777,79 +4777,37 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof, - loff_t *pos) + struct nfsd4_read *read) { - struct xdr_stream *xdr = resp->xdr; + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); struct file *file = read->rd_nf->nf_file; - int starting_len = xdr->buf->len; - loff_t hole_pos; - __be32 nfserr; - __be32 *p, tmp; - __be64 tmp64; - - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); - if (hole_pos > read->rd_offset) - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); + struct xdr_stream *xdr = resp->xdr; + unsigned long maxcount; + __be32 nfserr, *p; /* Content type, offset, byte count */ p = xdr_reserve_space(xdr, 4 + 8 + 4); if (!p) - return nfserr_resource; + return nfserr_io; + if (resp->xdr->buf->page_len && splice_ok) { + WARN_ON_ONCE(splice_ok); + return nfserr_serverfault; + } - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); - if (read->rd_vlen < 0) - return nfserr_resource; + maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); + if (file->f_op->splice_read && splice_ok) + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + nfserr = nfsd4_encode_readv(resp, read, file, maxcount); if (nfserr) return nfserr; - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); - - tmp = htonl(NFS4_CONTENT_DATA); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp64 = cpu_to_be64(read->rd_offset); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); - tmp = htonl(*maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); - - tmp = xdr_zero; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, - xdr_pad_size(*maxcount)); - return nfs_ok; -} - -static __be32 -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof) -{ - struct file *file = read->rd_nf->nf_file; - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); - loff_t f_size = i_size_read(file_inode(file)); - unsigned long count; - __be32 *p; - - if (data_pos == -ENXIO) - data_pos = f_size; - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); - count = data_pos - read->rd_offset; - /* Content type, offset, byte count */ - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); - if (!p) - return nfserr_resource; - - *p++ = htonl(NFS4_CONTENT_HOLE); + *p++ = cpu_to_be32(NFS4_CONTENT_DATA); p = xdr_encode_hyper(p, read->rd_offset); - p = xdr_encode_hyper(p, count); + *p = cpu_to_be32(read->rd_length); - *eof = (read->rd_offset + count) >= f_size; - *maxcount = min_t(unsigned long, count, *maxcount); return nfs_ok; } @@ -4857,69 +4815,36 @@ static __be32 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { - unsigned long maxcount, count; + struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; - struct file *file; int starting_len = xdr->buf->len; - int last_segment = xdr->buf->len; - int segments = 0; - __be32 *p, tmp; - bool is_data; - loff_t pos; - u32 eof; + u32 segments = 0; + __be32 *p; if (nfserr) return nfserr; - file = read->rd_nf->nf_file; /* eof flag, segment count */ p = xdr_reserve_space(xdr, 4 + 4); if (!p) - return nfserr_resource; + return nfserr_io; xdr_commit_encode(xdr); - maxcount = min_t(unsigned long, read->rd_length, - (xdr->buf->buflen - xdr->buf->len)); - count = maxcount; - - eof = read->rd_offset >= i_size_read(file_inode(file)); - if (eof) + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file)); + if (read->rd_eof) goto out; - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); - is_data = pos > read->rd_offset; - - while (count > 0 && !eof) { - maxcount = count; - if (is_data) - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, - segments == 0 ? &pos : NULL); - else - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); - if (nfserr) - goto out; - count -= maxcount; - read->rd_offset += maxcount; - is_data = !is_data; - last_segment = xdr->buf->len; - segments++; - } - -out: - if (nfserr && segments == 0) + nfserr = nfsd4_encode_read_plus_data(resp, read); + if (nfserr) { xdr_truncate_encode(xdr, starting_len); - else { - if (nfserr) { - xdr_truncate_encode(xdr, last_segment); - nfserr = nfs_ok; - eof = 0; - } - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + return nfserr; } + segments++; + +out: + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(segments); return nfserr; } -- GitLab From 0a49efb94888b6381d9c43fda17115ffda40a039 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Oct 2022 21:24:23 +0100 Subject: [PATCH 0614/2290] NFSD: Remove redundant assignment to variable host_err [ Upstream commit 69eed23baf877bbb1f14d7f4df54f89807c9ee2a ] Variable host_err is assigned a value that is never read, it is being re-assigned a value in every different execution path in the following switch statement. The assignment is redundant and can be removed. Cleans up clang-scan warning: warning: Value stored to 'host_err' is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index eccc6ce55a63a..fce7a35a5e64d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1317,7 +1317,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode &= ~current_umask(); err = 0; - host_err = 0; switch (type) { case S_IFREG: host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); -- GitLab From bfef0cfab41cb4894bc5cf8b93e76327ac04b9b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Oct 2022 07:47:54 -0400 Subject: [PATCH 0615/2290] nfsd: ignore requests to disable unsupported versions [ Upstream commit 8e823bafff2308753d430566256c83d8085952da ] The kernel currently errors out if you attempt to enable or disable a version that it doesn't recognize. Change it to ignore attempts to disable an unrecognized version. If we don't support it, then there is no harm in doing so. Signed-off-by: Jeff Layton Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 573de0d49e172..c21f5815d7264 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -601,7 +601,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } break; default: - return -EINVAL; + /* Ignore requests to disable non-existent versions */ + if (cmd == NFSD_SET) + return -EINVAL; } vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); -- GitLab From 850333a25aab582118d9fa405af00caae32faa62 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Oct 2022 07:47:55 -0400 Subject: [PATCH 0616/2290] nfsd: move nfserrno() to vfs.c [ Upstream commit cb12fae1c34b1fa7eaae92c5aadc72d86d7fae19 ] nfserrno() is common to all nfs versions, but nfsproc.c is specifically for NFSv2. Move it to vfs.c, and the prototype to vfs.h. While we're in here, remove the #ifdef EDQUOT check in this function. It's apparently a holdover from the initial merge of the nfsd code in 1997. No other place in the kernel checks that that symbol is defined before using it, so I think we can dispense with it here. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/blocklayout.c | 1 + fs/nfsd/blocklayoutxdr.c | 1 + fs/nfsd/export.h | 1 - fs/nfsd/flexfilelayout.c | 1 + fs/nfsd/nfs4idmap.c | 1 + fs/nfsd/nfsproc.c | 62 --------------------------------------- fs/nfsd/vfs.c | 63 ++++++++++++++++++++++++++++++++++++++++ fs/nfsd/vfs.h | 1 + 8 files changed, 68 insertions(+), 63 deletions(-) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index e7e6e78d965db..01d7fd108cf3d 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -12,6 +12,7 @@ #include "blocklayoutxdr.h" #include "pnfs.h" #include "filecache.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 2455dc8be18a8..1ed2f691ebb90 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -9,6 +9,7 @@ #include "nfsd.h" #include "blocklayoutxdr.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index ee0e3aba4a6e5..d03f7f6a8642d 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); int exp_rootfh(struct net *, struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -__be32 nfserrno(int errno); static inline void exp_put(struct svc_export *exp) { diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index 070f90ed09b61..3ca5304440ff0 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -15,6 +15,7 @@ #include "flexfilelayoutxdr.h" #include "pnfs.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index e70a1a2999b7b..5e9809aff37eb 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -41,6 +41,7 @@ #include "idmap.h" #include "nfsd.h" #include "netns.h" +#include "vfs.h" /* * Turn off idmapping when using AUTH_SYS. diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 82b3ddeacc338..52fc222c34f26 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -848,65 +848,3 @@ const struct svc_version nfsd_version2 = { .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS2_SVC_XDRSIZE, }; - -/* - * Map errnos to NFS errnos. - */ -__be32 -nfserrno (int errno) -{ - static struct { - __be32 nfserr; - int syserr; - } nfs_errtbl[] = { - { nfs_ok, 0 }, - { nfserr_perm, -EPERM }, - { nfserr_noent, -ENOENT }, - { nfserr_io, -EIO }, - { nfserr_nxio, -ENXIO }, - { nfserr_fbig, -E2BIG }, - { nfserr_stale, -EBADF }, - { nfserr_acces, -EACCES }, - { nfserr_exist, -EEXIST }, - { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, - { nfserr_nodev, -ENODEV }, - { nfserr_notdir, -ENOTDIR }, - { nfserr_isdir, -EISDIR }, - { nfserr_inval, -EINVAL }, - { nfserr_fbig, -EFBIG }, - { nfserr_nospc, -ENOSPC }, - { nfserr_rofs, -EROFS }, - { nfserr_mlink, -EMLINK }, - { nfserr_nametoolong, -ENAMETOOLONG }, - { nfserr_notempty, -ENOTEMPTY }, -#ifdef EDQUOT - { nfserr_dquot, -EDQUOT }, -#endif - { nfserr_stale, -ESTALE }, - { nfserr_jukebox, -ETIMEDOUT }, - { nfserr_jukebox, -ERESTARTSYS }, - { nfserr_jukebox, -EAGAIN }, - { nfserr_jukebox, -EWOULDBLOCK }, - { nfserr_jukebox, -ENOMEM }, - { nfserr_io, -ETXTBSY }, - { nfserr_notsupp, -EOPNOTSUPP }, - { nfserr_toosmall, -ETOOSMALL }, - { nfserr_serverfault, -ESERVERFAULT }, - { nfserr_serverfault, -ENFILE }, - { nfserr_io, -EREMOTEIO }, - { nfserr_stale, -EOPENSTALE }, - { nfserr_io, -EUCLEAN }, - { nfserr_perm, -ENOKEY }, - { nfserr_no_grace, -ENOGRACE}, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { - if (nfs_errtbl[i].syserr == errno) - return nfs_errtbl[i].nfserr; - } - WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); - return nfserr_io; -} - diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fce7a35a5e64d..5d6a61d47a905 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -49,6 +49,69 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP +/** + * nfserrno - Map Linux errnos to NFS errnos + * @errno: POSIX(-ish) error code to be mapped + * + * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If + * it's an error we don't expect, log it once and return nfserr_io. + */ +__be32 +nfserrno (int errno) +{ + static struct { + __be32 nfserr; + int syserr; + } nfs_errtbl[] = { + { nfs_ok, 0 }, + { nfserr_perm, -EPERM }, + { nfserr_noent, -ENOENT }, + { nfserr_io, -EIO }, + { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, + { nfserr_stale, -EBADF }, + { nfserr_acces, -EACCES }, + { nfserr_exist, -EEXIST }, + { nfserr_xdev, -EXDEV }, + { nfserr_mlink, -EMLINK }, + { nfserr_nodev, -ENODEV }, + { nfserr_notdir, -ENOTDIR }, + { nfserr_isdir, -EISDIR }, + { nfserr_inval, -EINVAL }, + { nfserr_fbig, -EFBIG }, + { nfserr_nospc, -ENOSPC }, + { nfserr_rofs, -EROFS }, + { nfserr_mlink, -EMLINK }, + { nfserr_nametoolong, -ENAMETOOLONG }, + { nfserr_notempty, -ENOTEMPTY }, + { nfserr_dquot, -EDQUOT }, + { nfserr_stale, -ESTALE }, + { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_jukebox, -ERESTARTSYS }, + { nfserr_jukebox, -EAGAIN }, + { nfserr_jukebox, -EWOULDBLOCK }, + { nfserr_jukebox, -ENOMEM }, + { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, + { nfserr_toosmall, -ETOOSMALL }, + { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EREMOTEIO }, + { nfserr_stale, -EOPENSTALE }, + { nfserr_io, -EUCLEAN }, + { nfserr_perm, -ENOKEY }, + { nfserr_no_grace, -ENOGRACE}, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { + if (nfs_errtbl[i].syserr == errno) + return nfs_errtbl[i].nfserr; + } + WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9744b041105b5..dbdfef7ae85bb 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -60,6 +60,7 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) posix_acl_release(attrs->na_dpacl); } +__be32 nfserrno (int errno); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, -- GitLab From f82865e2a026b6d491377e64ad18326a413e6421 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Oct 2022 07:47:56 -0400 Subject: [PATCH 0617/2290] nfsd: allow disabling NFSv2 at compile time [ Upstream commit 2f3a4b2ac2f28b9be78ad21f401f31e263845214 ] rpc.nfsd stopped supporting NFSv2 a year ago. Take the next logical step toward deprecating it and allow NFSv2 support to be compiled out. Add a new CONFIG_NFSD_V2 option that can be turned off and rework the CONFIG_NFSD_V?_ACL option dependencies. Add a description that discourages enabling it. Also, change the description of CONFIG_NFSD to state that the always-on version is now 3 instead of 2. Finally, add an #ifdef around "case 2:" in __write_versions. When NFSv2 is disabled at compile time, this should make the kernel ignore attempts to disable it at runtime, but still error out when trying to enable it. Signed-off-by: Jeff Layton Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/Kconfig | 19 +++++++++++++++---- fs/nfsd/Makefile | 5 +++-- fs/nfsd/nfsctl.c | 2 ++ fs/nfsd/nfsd.h | 3 +-- fs/nfsd/nfssvc.c | 6 ++++++ 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f6a2fd3015e75..7c441f2bd4440 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -8,6 +8,7 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL + select NFS_ACL_SUPPORT if NFSD_V3_ACL depends on MULTIUSER help Choose Y here if you want to allow other computers to access @@ -26,19 +27,29 @@ config NFSD Below you can choose which versions of the NFS protocol are available to clients mounting the NFS server on this system. - Support for NFS version 2 (RFC 1094) is always available when + Support for NFS version 3 (RFC 1813) is always available when CONFIG_NFSD is selected. If unsure, say N. -config NFSD_V2_ACL - bool +config NFSD_V2 + bool "NFS server support for NFS version 2 (DEPRECATED)" depends on NFSD + default n + help + NFSv2 (RFC 1094) was the first publicly-released version of NFS. + Unless you are hosting ancient (1990's era) NFS clients, you don't + need this. + + If unsure, say N. + +config NFSD_V2_ACL + bool "NFS server support for the NFSv2 ACL protocol extension" + depends on NFSD_V2 config NFSD_V3_ACL bool "NFS server support for the NFSv3 ACL protocol extension" depends on NFSD - select NFSD_V2_ACL help Solaris NFS servers support an auxiliary NFSv3 ACL protocol that never became an official part of the NFS version 3 protocol. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 805c06d5f1b4b..6fffc8f03f740 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -10,9 +10,10 @@ obj-$(CONFIG_NFSD) += nfsd.o # this one should be compiled first, as the tracing macros can easily blow up nfsd-y += trace.o -nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ - export.o auth.o lockd.o nfscache.o nfsxdr.o \ +nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o \ stats.o filecache.o nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c21f5815d7264..a8884d0b4638c 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -581,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { +#ifdef CONFIG_NFSD_V2 case 2: +#endif case 3: nfsd_vers(nn, num, cmd); break; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 09726c5b9a317..93b42ef9ed91b 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -64,8 +64,7 @@ struct readdir_cd { extern struct svc_program nfsd_program; -extern const struct svc_version nfsd_version2, nfsd_version3, - nfsd_version4; +extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c7695ebd28dc3..6f4a38f5ab0ce 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static const struct svc_version *nfsd_acl_version[] = { +# if defined(CONFIG_NFSD_V2_ACL) [2] = &nfsd_acl_version2, +# endif +# if defined(CONFIG_NFSD_V3_ACL) [3] = &nfsd_acl_version3, +# endif }; #define NFSD_ACL_MINVERS 2 @@ -116,7 +120,9 @@ static struct svc_stat nfsd_acl_svcstats = { #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ static const struct svc_version *nfsd_version[] = { +#if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, +#endif [3] = &nfsd_version3, #if defined(CONFIG_NFSD_V4) [4] = &nfsd_version4, -- GitLab From 137d20da8ea0daa9e0a2787acc4b66261e8796df Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 21 Oct 2022 14:24:14 +0200 Subject: [PATCH 0618/2290] exportfs: use pr_debug for unreachable debug statements [ Upstream commit 427505ffeaa464f683faba945a88d3e3248f6979 ] expfs.c has a bunch of dprintk statements which are unusable due to: #define dprintk(fmt, args...) do{}while(0) Use pr_debug so that they can be enabled dynamically. Also make some minor changes to the debug statements to fix some incorrect types, and remove __func__ which can be handled by dynamic debug separately. Signed-off-by: David Disseldorp Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/exportfs/expfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c648a493faf23..3204bd33e4e8a 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -18,7 +18,7 @@ #include #include -#define dprintk(fmt, args...) do{}while(0) +#define dprintk(fmt, args...) pr_debug(fmt, ##args) static int get_name(const struct path *path, char *name, struct dentry *child); @@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, inode_unlock(dentry->d_inode); if (IS_ERR(parent)) { - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + dprintk("get_parent of %lu failed, err %ld\n", + dentry->d_inode->i_ino, PTR_ERR(parent)); return parent; } @@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, dprintk("%s: found name: %s\n", __func__, nbuf); tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { - dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); err = PTR_ERR(tmp); goto out_err; } -- GitLab From e62d8c1281662a0cff23df2948162c1fe705d613 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Nov 2022 13:30:46 -0400 Subject: [PATCH 0619/2290] NFSD: Flesh out a documenting comment for filecache.c [ Upstream commit b3276c1f5b268ff56622e9e125b792b4c3dc03ac ] Record what we've learned recently about the NFSD filecache in a documenting comment so our future selves don't forget what all this is for. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/filecache.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 5cb8cce153a57..24ed511ed0d38 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -2,6 +2,30 @@ * Open file cache. * * (c) 2015 - Jeff Layton + * + * An nfsd_file object is a per-file collection of open state that binds + * together: + * - a struct file * + * - a user credential + * - a network namespace + * - a read-ahead context + * - monitoring for writeback errors + * + * nfsd_file objects are reference-counted. Consumers acquire a new + * object via the nfsd_file_acquire API. They manage their interest in + * the acquired object, and hence the object's reference count, via + * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file + * object: + * + * * non-garbage-collected: When a consumer wants to precisely control + * the lifetime of a file's open state, it acquires a non-garbage- + * collected nfsd_file. The final nfsd_file_put releases the open + * state immediately. + * + * * garbage-collected: When a consumer does not control the lifetime + * of open state, it acquires a garbage-collected nfsd_file. The + * final nfsd_file_put allows the open state to linger for a period + * during which it may be re-used. */ #include -- GitLab From 519a80ea5a1770f1bb7d0627f4670ca1c1767f80 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:46:57 -0400 Subject: [PATCH 0620/2290] NFSD: Clean up nfs4_preprocess_stateid_op() call sites [ Upstream commit eeff73f7c1c583f79a401284f46c619294859310 ] Remove the lame-duck dprintk()s around nfs4_preprocess_stateid_op() call sites. Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a9105e95b59c5..ba53cd89ec62c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -943,12 +943,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, &read->rd_nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); - goto out; - } - status = nfs_ok; -out: + read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -1117,10 +1112,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, WR_STATE, NULL, NULL); - if (status) { - dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); + if (status) return status; - } } err = fh_want_write(&cstate->current_fh); if (err) @@ -1170,10 +1163,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, stateid, WR_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); + if (status) return status; - } write->wr_how_written = write->wr_stable_how; @@ -1204,17 +1195,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); + if (status) goto out; - } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, dst_stateid, WR_STATE, dst, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); + if (status) goto out_put_src; - } /* fix up for NFS-specific error code */ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) || @@ -1935,10 +1922,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, WR_STATE, &nf, NULL); - if (status != nfs_ok) { - dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); + if (status != nfs_ok) return status; - } status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file, fallocate->falloc_offset, @@ -1994,10 +1979,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, RD_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); + if (status) return status; - } switch (seek->seek_whence) { case NFS4_CONTENT_DATA: -- GitLab From 9fbef7dcd8aa552d5a7e6867eec570e89d7d1631 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:03 -0400 Subject: [PATCH 0621/2290] NFSD: Trace stateids returned via DELEGRETURN [ Upstream commit 20eee313ff4b8a7e71ae9560f5c4ba27cd763005 ] Handing out a delegation stateid is recorded with the nfsd_deleg_read tracepoint, but there isn't a matching tracepoint for recording when the stateid is returned. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 1 + fs/nfsd/trace.h | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b3f6dda930d8b..6f974fdb47de2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6935,6 +6935,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto put_stateid; + trace_nfsd_deleg_return(stateid); wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); put_stateid: diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 132335011ccae..fedf676ef446f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -604,6 +604,7 @@ DEFINE_STATEID_EVENT(layout_recall_release); DEFINE_STATEID_EVENT(open); DEFINE_STATEID_EVENT(deleg_read); +DEFINE_STATEID_EVENT(deleg_return); DEFINE_STATEID_EVENT(deleg_recall); DECLARE_EVENT_CLASS(nfsd_stateseqid_class, -- GitLab From fae3f8b554fae8631a954e2b205ad84c531ba71b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:09 -0400 Subject: [PATCH 0622/2290] NFSD: Trace delegation revocations [ Upstream commit a1c74569bbde91299f24535abf711be5c84df9de ] Delegation revocation is an exceptional event that is not otherwise visible externally (eg, no network traffic is emitted). Generate a trace record when it occurs so that revocation can be observed or other activity can be triggered. Example: nfsd-1104 [005] 1912.002544: nfsd_stid_revoke: client 633c9343:4e82788d stateid 00000003:00000001 ref=2 type=DELEG Trace infrastructure is provided for subsequent additional tracing related to nfs4_stid activity. Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 ++ fs/nfsd/trace.h | 55 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6f974fdb47de2..5d55812f5a2ae 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1367,6 +1367,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); + trace_nfsd_stid_revoke(&dp->dl_stid); + if (clp->cl_minorversion) { spin_lock(&clp->cl_lock); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index fedf676ef446f..d261a06b61408 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -637,6 +637,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ DEFINE_STATESEQID_EVENT(preprocess); DEFINE_STATESEQID_EVENT(open_confirm); +TRACE_DEFINE_ENUM(NFS4_OPEN_STID); +TRACE_DEFINE_ENUM(NFS4_LOCK_STID); +TRACE_DEFINE_ENUM(NFS4_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_STID); +TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID); + +#define show_stid_type(x) \ + __print_flags(x, "|", \ + { NFS4_OPEN_STID, "OPEN" }, \ + { NFS4_LOCK_STID, "LOCK" }, \ + { NFS4_DELEG_STID, "DELEG" }, \ + { NFS4_CLOSED_STID, "CLOSED" }, \ + { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \ + { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ + { NFS4_LAYOUT_STID, "LAYOUT" }) + +DECLARE_EVENT_CLASS(nfsd_stid_class, + TP_PROTO( + const struct nfs4_stid *stid + ), + TP_ARGS(stid), + TP_STRUCT__entry( + __field(unsigned long, sc_type) + __field(int, sc_count) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + ), + TP_fast_assign( + const stateid_t *stp = &stid->sc_stateid; + + __entry->sc_type = stid->sc_type; + __entry->sc_count = refcount_read(&stid->sc_count); + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + ), + TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", + __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation, + __entry->sc_count, show_stid_type(__entry->sc_type) + ) +); + +#define DEFINE_STID_EVENT(name) \ +DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \ + TP_PROTO(const struct nfs4_stid *stid), \ + TP_ARGS(stid)) + +DEFINE_STID_EVENT(revoke); + DECLARE_EVENT_CLASS(nfsd_clientid_class, TP_PROTO(const clientid_t *clid), TP_ARGS(clid), -- GitLab From 255ac53d78d562fe27b486360699dcbeb0bfacf8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:16 -0400 Subject: [PATCH 0623/2290] NFSD: Use const pointers as parameters to fh_ helpers [ Upstream commit b48f8056c034f28dd54668399f1d22be421b0bef ] Enable callers to use const pointers where they are able to. Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsfh.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index c3ae6414fc5cf..513e028b0bbee 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -220,7 +220,7 @@ __be32 fh_update(struct svc_fh *); void fh_put(struct svc_fh *); static __inline__ struct svc_fh * -fh_copy(struct svc_fh *dst, struct svc_fh *src) +fh_copy(struct svc_fh *dst, const struct svc_fh *src) { WARN_ON(src->fh_dentry); @@ -229,7 +229,7 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src) } static inline void -fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) +fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src) { dst->fh_size = src->fh_size; memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); @@ -243,7 +243,8 @@ fh_init(struct svc_fh *fhp, int maxsize) return fhp; } -static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; @@ -252,7 +253,8 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) return true; } -static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_fsid_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_fsid_type != fh2->fh_fsid_type) return false; -- GitLab From 6ee5c4e269a9136da48df4126c4dde9b899d35cf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:22 -0400 Subject: [PATCH 0624/2290] NFSD: Update file_hashtbl() helpers [ Upstream commit 3fe828caddd81e68e9d29353c6e9285a658ca056 ] Enable callers to use const pointers for type safety. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5d55812f5a2ae..b6e7db0f6a694 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -721,7 +721,7 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -static unsigned int file_hashval(struct svc_fh *fh) +static unsigned int file_hashval(const struct svc_fh *fh) { struct inode *inode = d_inode(fh->fh_dentry); @@ -4687,7 +4687,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) /* search file_hashtbl[] for file */ static struct nfs4_file * -find_file_locked(struct svc_fh *fh, unsigned int hashval) +find_file_locked(const struct svc_fh *fh, unsigned int hashval) { struct nfs4_file *fp; -- GitLab From c8d8876aae34f2609d3ea815106024645fa85112 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:28 -0400 Subject: [PATCH 0625/2290] NFSD: Clean up nfsd4_init_file() [ Upstream commit 81a21fa3e7fdecb3c5b97014f0fc5a17d5806cae ] Name this function more consistently. I'm going to use nfsd4_file_ and nfsd4_file_hash_ for these helpers. Change the @fh parameter to be const pointer for better type safety. Finally, move the hash insertion operation to the caller. This is typical for most other "init_object" type helpers, and it is where most of the other nfs4_file hash table operations are located. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b6e7db0f6a694..80e11da952829 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4278,11 +4278,9 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, - struct nfs4_file *fp) -{ - lockdep_assert_held(&state_lock); +static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) +{ refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); @@ -4300,7 +4298,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); } void @@ -4718,7 +4715,8 @@ static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, fp->fi_aliased = alias_found = true; } if (likely(ret == NULL)) { - nfsd4_init_file(fh, hashval, new); + nfsd4_file_init(fh, new); + hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); new->fi_aliased = alias_found; ret = new; } -- GitLab From 5aa0c564c017a008b3d971a6228cfae171695f57 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:34 -0400 Subject: [PATCH 0626/2290] NFSD: Add a nfsd4_file_hash_remove() helper [ Upstream commit 3341678f2fd6106055cead09e513fad6950a0d19 ] Refactor to relocate hash deletion operation to a helper function that is close to most other nfs4_file data structure operations. The "noinline" annotation will become useful in a moment when the hlist_del_rcu() is replaced with a more complex rhash remove operation. It also guarantees that hash remove operations can be traced with "-p function -l remove_nfs4_file_locked". This also simplifies the organization of forward declarations: the to-be-added rhashtable and its param structure will be defined /after/ put_nfs4_file(). Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 80e11da952829..8e26edbe54a33 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -84,6 +84,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); +static void nfsd4_file_hash_remove(struct nfs4_file *fi); /* Locking: */ @@ -591,7 +592,7 @@ put_nfs4_file(struct nfs4_file *fi) might_lock(&state_lock); if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { - hlist_del_rcu(&fi->fi_hash); + nfsd4_file_hash_remove(fi); spin_unlock(&state_lock); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); @@ -4750,6 +4751,11 @@ find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) return insert_file(new, fh, hashval); } +static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) +{ + hlist_del_rcu(&fi->fi_hash); +} + /* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid -- GitLab From 0d4150f5eb20b2f14153474af7ca3a26814850a8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:41 -0400 Subject: [PATCH 0627/2290] NFSD: Clean up find_or_add_file() [ Upstream commit 9270fc514ba7d415636b23bcb937573a1ce54f6a ] Remove the call to find_file_locked() in insert_nfs4_file(). Tracing shows that over 99% of these calls return NULL. Thus it is not worth the expense of the extra bucket list traversal. insert_file() already deals correctly with the case where the item is already in the hash bucket. Since nfsd4_file_hash_insert() is now just a wrapper around insert_file(), move the meat of insert_file() into nfsd4_file_hash_insert() and get rid of it. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 64 ++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8e26edbe54a33..da6a7574ac558 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4699,24 +4699,42 @@ find_file_locked(const struct svc_fh *fh, unsigned int hashval) return NULL; } -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, - unsigned int hashval) +static struct nfs4_file * find_file(struct svc_fh *fh) { struct nfs4_file *fp; + unsigned int hashval = file_hashval(fh); + + rcu_read_lock(); + fp = find_file_locked(fh, hashval); + rcu_read_unlock(); + return fp; +} + +/* + * On hash insertion, identify entries with the same inode but + * distinct filehandles. They will all be in the same hash bucket + * because nfs4_file's are hashed by the address in the fi_inode + * field. + */ +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) +{ + unsigned int hashval = file_hashval(fhp); struct nfs4_file *ret = NULL; bool alias_found = false; + struct nfs4_file *fi; spin_lock(&state_lock); - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, + hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - ret = fp; - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) - fp->fi_aliased = alias_found = true; + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) + ret = fi; + } else if (d_inode(fhp->fh_dentry) == fi->fi_inode) + fi->fi_aliased = alias_found = true; } if (likely(ret == NULL)) { - nfsd4_file_init(fh, new); + nfsd4_file_init(fhp, new); hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); new->fi_aliased = alias_found; ret = new; @@ -4725,32 +4743,6 @@ static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, return ret; } -static struct nfs4_file * find_file(struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - return fp; -} - -static struct nfs4_file * -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - if (fp) - return fp; - - return insert_file(new, fh, hashval); -} - static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { hlist_del_rcu(&fi->fi_hash); @@ -5661,7 +5653,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(open->op_file, current_fh); + fp = nfsd4_file_hash_insert(open->op_file, current_fh); if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) -- GitLab From 49e8d9f465006ba7197cbcc6d297528b72a2f196 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:47 -0400 Subject: [PATCH 0628/2290] NFSD: Refactor find_file() [ Upstream commit 15424748001a9b5ea62b3e6ad45f0a8b27f01df9 ] find_file() is now the only caller of find_file_locked(), so just fold these two together. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index da6a7574ac558..78b8f1442e005 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4683,31 +4683,24 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) nfs4_put_stid(&last->st_stid); } -/* search file_hashtbl[] for file */ -static struct nfs4_file * -find_file_locked(const struct svc_fh *fh, unsigned int hashval) +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_lookup(const struct svc_fh *fhp) { - struct nfs4_file *fp; + unsigned int hashval = file_hashval(fhp); + struct nfs4_file *fi; - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - return fp; + rcu_read_lock(); + hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, + lockdep_is_held(&state_lock)) { + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) { + rcu_read_unlock(); + return fi; + } } } - return NULL; -} - -static struct nfs4_file * find_file(struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); rcu_read_unlock(); - return fp; + return NULL; } /* @@ -4758,9 +4751,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = find_file(current_fh); + fp = nfsd4_file_hash_lookup(current_fh); if (!fp) return ret; + /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) -- GitLab From 5a1f61516f802d95959944e7529d23dfa6868031 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:47:53 -0400 Subject: [PATCH 0629/2290] NFSD: Use rhashtable for managing nfs4_file objects [ Upstream commit d47b295e8d76a4d69f0e2ea0cd8a79c9d3488280 ] fh_match() is costly, especially when filehandles are large (as is the case for NFSv4). It needs to be used sparingly when searching data structures. Unfortunately, with common workloads, I see multiple thousands of objects stored in file_hashtbl[], which has just 256 buckets, making its bucket hash chains quite lengthy. Walking long hash chains with the state_lock held blocks other activity that needs that lock. Sizable hash chains are a common occurrance once the server has handed out some delegations, for example -- IIUC, each delegated file is held open on the server by an nfs4_file object. To help mitigate the cost of searching with fh_match(), replace the nfs4_file hash table with an rhashtable, which can dynamically resize its bucket array to minimize hash chain length. The result of this modification is an improvement in the latency of NFSv4 operations, and the reduction of nfsd CPU utilization due to eliminating the cost of multiple calls to fh_match() and reducing the CPU cache misses incurred while walking long hash chains in the nfs4_file hash table. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 97 +++++++++++++++++++++++++++++---------------- fs/nfsd/state.h | 5 +-- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 78b8f1442e005..64a61c3d8c609 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,7 +44,9 @@ #include #include #include +#include #include + #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -589,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) void put_nfs4_file(struct nfs4_file *fi) { - might_lock(&state_lock); - - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { + if (refcount_dec_and_test(&fi->fi_ref)) { nfsd4_file_hash_remove(fi); - spin_unlock(&state_lock); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); @@ -718,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -/* hash table for nfs4_file */ -#define FILE_HASH_BITS 8 -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) - -static unsigned int file_hashval(const struct svc_fh *fh) -{ - struct inode *inode = d_inode(fh->fh_dentry); +static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; - /* XXX: why not (here & in file cache) use inode? */ - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); -} +static const struct rhashtable_params nfs4_file_rhash_params = { + .key_len = sizeof_field(struct nfs4_file, fi_inode), + .key_offset = offsetof(struct nfs4_file, fi_inode), + .head_offset = offsetof(struct nfs4_file, fi_rlist), -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, + .automatic_shrinking = true, +}; /* * Check if courtesy clients have conflicting access and resolve it if possible @@ -4686,12 +4686,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) static noinline_for_stack struct nfs4_file * nfsd4_file_hash_lookup(const struct svc_fh *fhp) { - unsigned int hashval = file_hashval(fhp); + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; struct nfs4_file *fi; rcu_read_lock(); - hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) { rcu_read_unlock(); @@ -4705,40 +4707,56 @@ nfsd4_file_hash_lookup(const struct svc_fh *fhp) /* * On hash insertion, identify entries with the same inode but - * distinct filehandles. They will all be in the same hash bucket - * because nfs4_file's are hashed by the address in the fi_inode - * field. + * distinct filehandles. They will all be on the list returned + * by rhltable_lookup(). + * + * inode->i_lock prevents racing insertions from adding an entry + * for the same inode/fhp pair twice. */ static noinline_for_stack struct nfs4_file * nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) { - unsigned int hashval = file_hashval(fhp); + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; struct nfs4_file *ret = NULL; bool alias_found = false; struct nfs4_file *fi; + int err; - spin_lock(&state_lock); - hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { + rcu_read_lock(); + spin_lock(&inode->i_lock); + + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) ret = fi; - } else if (d_inode(fhp->fh_dentry) == fi->fi_inode) + } else fi->fi_aliased = alias_found = true; } - if (likely(ret == NULL)) { - nfsd4_file_init(fhp, new); - hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); - new->fi_aliased = alias_found; - ret = new; - } - spin_unlock(&state_lock); + if (ret) + goto out_unlock; + + nfsd4_file_init(fhp, new); + err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, + nfs4_file_rhash_params); + if (err) + goto out_unlock; + + new->fi_aliased = alias_found; + ret = new; + +out_unlock: + spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ret; } static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { - hlist_del_rcu(&fi->fi_hash); + rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, + nfs4_file_rhash_params); } /* @@ -5648,6 +5666,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * If not found, create the nfs4_file struct */ fp = nfsd4_file_hash_insert(open->op_file, current_fh); + if (unlikely(!fp)) + return nfserr_jukebox; if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -8064,10 +8084,16 @@ nfs4_state_start(void) { int ret; - ret = nfsd4_create_callback_queue(); + ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); if (ret) return ret; + ret = nfsd4_create_callback_queue(); + if (ret) { + rhltable_destroy(&nfs4_file_rhltable); + return ret; + } + set_max_delegations(); return 0; } @@ -8098,6 +8124,7 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); + rhltable_destroy(&nfs4_file_rhltable); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e2daef3cc0034..eadd7f465bf52 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -536,16 +536,13 @@ struct nfs4_clnt_odstate { * inode can have multiple filehandles associated with it, so there is * (potentially) a many to one relationship between this struct and struct * inode. - * - * These are hashed by filehandle in the file_hashtbl, which is protected by - * the global state_lock spinlock. */ struct nfs4_file { refcount_t fi_ref; struct inode * fi_inode; bool fi_aliased; spinlock_t fi_lock; - struct hlist_node fi_hash; /* hash on fi_fhandle */ + struct rhlist_head fi_rlist; struct list_head fi_stateids; union { struct list_head fi_delegations; -- GitLab From 6b12589f610ae5ca924573315b4cf3afd593cb09 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 31 Oct 2022 09:53:26 -0400 Subject: [PATCH 0630/2290] NFSD: Fix licensing header in filecache.c [ Upstream commit 3f054211b29c0fa06dfdcab402c795fd7e906be1 ] Add a missing SPDX header. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/filecache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 24ed511ed0d38..e37ecd8b8197e 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Open file cache. + * The NFSD open file cache. * * (c) 2015 - Jeff Layton * -- GitLab From 1f76cb66ff2257675666172aba42b4e661809a20 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Nov 2022 09:02:30 -0500 Subject: [PATCH 0631/2290] filelock: add a new locks_inode_context accessor function [ Upstream commit 401a8b8fd5acd51582b15238d72a8d0edd580e9f ] There are a number of places in the kernel that are accessing the inode->i_flctx field without smp_load_acquire. This is required to ensure that the caller doesn't see a partially-initialized structure. Add a new accessor function for it to make this clear and convert all of the relevant accesses in locks.c to use it. Also, convert locks_free_lock_context to use the helper as well instead of just doing a "bare" assignment. Reviewed-by: Christoph Hellwig Signed-off-by: Jeff Layton Stable-dep-of: 77c67530e1f9 ("nfsd: use locks_inode_context helper") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 24 ++++++++++++------------ include/linux/fs.h | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 1047ab2b15e96..7d0918b8fe5d6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type) struct file_lock_context *ctx; /* paired with cmpxchg() below */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; @@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type) */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); } out: trace_locks_get_lock_context(inode, type, ctx); @@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, void locks_free_lock_context(struct inode *inode) { - struct file_lock_context *ctx = inode->i_flctx; + struct file_lock_context *ctx = locks_inode_context(inode); if (unlikely(ctx)) { locks_check_ctx_lists(inode); @@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) void *owner; void (*func)(void); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; return; @@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) new_fl->fl_flags = type; /* typically we will check that ctx is non-NULL before calling */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; @@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) struct file_lock_context *ctx; struct file_lock *fl; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); fl = list_first_entry_or_null(&ctx->flc_lease, @@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp) int type = F_UNLCK; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner) struct file_lock_context *ctx; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; @@ -2562,7 +2562,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty(&ctx->flc_posix)) return; @@ -2635,7 +2635,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); + ctx = locks_inode_context(locks_inode(filp)); if (!ctx) return; @@ -2682,7 +2682,7 @@ bool vfs_inode_has_locks(struct inode *inode) struct file_lock_context *ctx; bool ret; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return false; @@ -2863,7 +2863,7 @@ void show_fd_locks(struct seq_file *f, struct file_lock_context *ctx; int id = 0; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return; diff --git a/include/linux/fs.h b/include/linux/fs.h index 67313881f8ac1..092d8fa10153f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1189,6 +1189,13 @@ extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); extern bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner); + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return smp_load_acquire(&inode->i_flctx); +} + #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1334,6 +1341,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, { return false; } + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return NULL; +} + #endif /* !CONFIG_FILE_LOCKING */ static inline struct inode *file_inode(const struct file *f) -- GitLab From c66f9f22e6e555edd575d471c6a309466651a2f5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Nov 2022 09:19:43 -0500 Subject: [PATCH 0632/2290] lockd: use locks_inode_context helper [ Upstream commit 98b41ffe0afdfeaa1439a5d6bd2db4a94277e31b ] lockd currently doesn't access i_flctx safely. This requires a smp_load_acquire, as the pointer is set via cmpxchg (a release operation). Cc: Trond Myklebust Cc: Anna Schumaker Cc: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svcsubs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 3515f17eaf3fb..e3b6229e7ae5c 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct nlm_host *lockhost; if (!flctx || list_empty_careful(&flctx->flc_posix)) @@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; -- GitLab From e017486dadf9ebb890bdd654b67014a9aeaa41c1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Nov 2022 09:36:07 -0500 Subject: [PATCH 0633/2290] nfsd: use locks_inode_context helper [ Upstream commit 77c67530e1f95ac25c7075635f32f04367380894 ] nfsd currently doesn't access i_flctx safely everywhere. This requires a smp_load_acquire, as the pointer is set via cmpxchg (a release operation). Acked-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 64a61c3d8c609..5d6851fd4f972 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4784,7 +4784,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) static bool nfsd4_deleg_present(const struct inode *inode) { - struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx); + struct file_lock_context *ctx = locks_inode_context(inode); return ctx && !list_empty_careful(&ctx->flc_lease); } @@ -5944,7 +5944,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { nf = stp->st_stid.sc_file; - ctx = nf->fi_inode->i_flctx; + ctx = locks_inode_context(nf->fi_inode); if (!ctx) continue; if (locks_owner_has_blockers(ctx, lo)) @@ -7761,7 +7761,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } inode = locks_inode(nf->nf_file); - flctx = inode->i_flctx; + flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); -- GitLab From 8b7be6ef588e0df6036e99f0f637fdac641da396 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Nov 2022 14:44:50 -0400 Subject: [PATCH 0634/2290] nfsd: fix up the filecache laundrette scheduling [ Upstream commit 22ae4c114f77b55a4c5036e8f70409a0799a08f8 ] We don't really care whether there are hashed entries when it comes to scheduling the laundrette. They might all be non-gc entries, after all. We only want to schedule it if there are entries on the LRU. Switch to using list_lru_count, and move the check into nfsd_file_gc_worker. The other callsite in nfsd_file_put doesn't need to count entries, since it only schedules the laundrette after adding an entry to the LRU. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/filecache.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e37ecd8b8197e..697acf5c3c681 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -211,12 +211,9 @@ static const struct rhashtable_params nfsd_file_rhash_params = { static void nfsd_file_schedule_laundrette(void) { - if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || - test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) - return; - - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, - NFSD_LAUNDRETTE_DELAY); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -614,7 +611,8 @@ static void nfsd_file_gc_worker(struct work_struct *work) { nfsd_file_gc(); - nfsd_file_schedule_laundrette(); + if (list_lru_count(&nfsd_file_lru)) + nfsd_file_schedule_laundrette(); } static unsigned long -- GitLab From 12e63680a76cad3bf505669b753560582ccadfcb Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 11 Nov 2022 17:18:35 +0800 Subject: [PATCH 0635/2290] NFSD: Use struct_size() helper in alloc_session() [ Upstream commit 85a0d0c9a58002ef7d1bf5e3ea630f4fbd42a4f0 ] Use struct_size() helper to simplify the code, no functional changes. Signed-off-by: Xiu Jianfeng Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5d6851fd4f972..d2ed047dfdc89 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1834,13 +1834,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, int numslots = fattrs->maxreqs; int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; - int mem, i; + int i; - BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) - + sizeof(struct nfsd4_session) > PAGE_SIZE); - mem = numslots * sizeof(struct nfsd4_slot *); + BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) + > PAGE_SIZE); - new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); + new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); if (!new) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ -- GitLab From 8973a8f9b72dbafbd1083c220d975a0e7ec871d0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Nov 2022 14:36:36 -0500 Subject: [PATCH 0636/2290] lockd: set missing fl_flags field when retrieving args [ Upstream commit 75c7940d2a86d3f1b60a0a265478cb8fc887b970 ] Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svc4proc.c | 1 + fs/lockd/svcproc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 284b019cb6529..b72023a6b4c16 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, *filp = file; /* Set up the missing parts of the file_lock structure */ + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_start = (loff_t)lock->lock_start; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index e35c05e278061..32784f508c810 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ mode = lock_to_openmode(&lock->fl); + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; -- GitLab From ccbf6efab8d37e3af007e83d7e7797f0ab2f3064 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Nov 2022 14:36:37 -0500 Subject: [PATCH 0637/2290] lockd: ensure we use the correct file descriptor when unlocking [ Upstream commit 69efce009f7df888e1fede3cb2913690eb829f52 ] Shared locks are set on O_RDONLY descriptors and exclusive locks are set on O_WRONLY ones. nlmsvc_unlock however calls vfs_lock_file twice, once for each descriptor, but it doesn't reset fl_file. Ensure that it does. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svclock.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 9c1aa75441e1c..9eae99e08e699 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -659,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) nlmsvc_cancel_blocked(net, file, lock); lock->fl.fl_type = F_UNLCK; - if (file->f_file[O_RDONLY]) - error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_RDONLY]; + if (lock->fl.fl_file) + error = vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); - if (file->f_file[O_WRONLY]) - error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_WRONLY]; + if (lock->fl.fl_file) + error |= vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; -- GitLab From 0920deeec6dd2e8d142a688a81744702895d46c6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Nov 2022 14:36:38 -0500 Subject: [PATCH 0638/2290] lockd: fix file selection in nlmsvc_cancel_blocked [ Upstream commit 9f27783b4dd235ef3c8dbf69fc6322777450323c ] We currently do a lock_to_openmode call based on the arguments from the NLM_UNLOCK call, but that will always set the fl_type of the lock to F_UNLCK, and the O_RDONLY descriptor is always chosen. Fix it to use the file_lock from the block instead. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svclock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 9eae99e08e699..4e30f3c509701 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -699,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l block = nlmsvc_lookup_block(file, lock); mutex_unlock(&file->f_mutex); if (block != NULL) { - mode = lock_to_openmode(&lock->fl); - vfs_cancel_lock(block->b_file->f_file[mode], - &block->b_call->a_args.lock.fl); + struct file_lock *fl = &block->b_call->a_args.lock.fl; + + mode = lock_to_openmode(fl); + vfs_cancel_lock(block->b_file->f_file[mode], fl); status = nlmsvc_unlink_block(block); nlmsvc_release_block(block); } -- GitLab From 371e1c1b326b5de0a12204f217709e2f626c7fe7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Nov 2022 08:57:43 -0500 Subject: [PATCH 0639/2290] trace: Relocate event helper files [ Upstream commit 247c01ff5f8d66e62a404c91733be52fecb8b7f6 ] Steven Rostedt says: > The include/trace/events/ directory should only hold files that > are to create events, not headers that hold helper functions. > > Can you please move them out of include/trace/events/ as that > directory is "special" in the creation of events. Signed-off-by: Chuck Lever Acked-by: Leon Romanovsky Acked-by: Steven Rostedt (Google) Acked-by: Anna Schumaker Stable-dep-of: 638593be55c0 ("NFSD: add CB_RECALL_ANY tracepoints") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 7 +++++++ drivers/infiniband/core/cm_trace.h | 2 +- drivers/infiniband/core/cma_trace.h | 2 +- fs/nfs/nfs4trace.h | 6 +++--- fs/nfs/nfstrace.h | 6 +++--- include/trace/events/rpcgss.h | 2 +- include/trace/events/rpcrdma.h | 4 ++-- include/trace/events/sunrpc.h | 2 +- include/trace/{events => misc}/fs.h | 0 include/trace/{events => misc}/nfs.h | 0 include/trace/{events => misc}/rdma.h | 0 include/trace/{events/sunrpc_base.h => misc/sunrpc.h} | 0 12 files changed, 19 insertions(+), 12 deletions(-) rename include/trace/{events => misc}/fs.h (100%) rename include/trace/{events => misc}/nfs.h (100%) rename include/trace/{events => misc}/rdma.h (100%) rename include/trace/{events/sunrpc_base.h => misc/sunrpc.h} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 13d1078808bb5..bbfedb0b20938 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10051,6 +10051,7 @@ F: drivers/infiniband/ F: include/rdma/ F: include/trace/events/ib_mad.h F: include/trace/events/ib_umad.h +F: include/trace/misc/rdma.h F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: samples/bpf/ibumad_kern.c @@ -11139,6 +11140,12 @@ F: fs/nfs_common/ F: fs/nfsd/ F: include/linux/lockd/ F: include/linux/sunrpc/ +F: include/trace/events/rpcgss.h +F: include/trace/events/rpcrdma.h +F: include/trace/events/sunrpc.h +F: include/trace/misc/fs.h +F: include/trace/misc/nfs.h +F: include/trace/misc/sunrpc.h F: include/uapi/linux/nfsd/ F: include/uapi/linux/sunrpc/ F: net/sunrpc/ diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h index e9d282679ef15..944d9071245d2 100644 --- a/drivers/infiniband/core/cm_trace.h +++ b/drivers/infiniband/core/cm_trace.h @@ -16,7 +16,7 @@ #include #include -#include +#include /* * enum ib_cm_state, from include/rdma/ib_cm.h diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index e45264267bcc9..47f3c6e4be893 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -15,7 +15,7 @@ #define _TRACE_RDMA_CMA_H #include -#include +#include DECLARE_EVENT_CLASS(cma_fsm_class, diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 3fa77ad7258f2..c8a57cfde64b4 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -9,10 +9,10 @@ #define _TRACE_NFS4_H #include -#include +#include -#include -#include +#include +#include #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8c6cc58679ff8..642f6921852fa 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,9 +11,9 @@ #include #include -#include -#include -#include +#include +#include +#include #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index c9048f3e471bb..3f121eed369e8 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -13,7 +13,7 @@ #include -#include +#include /** ** GSS-API related trace events diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index fcd3b3f1020a6..8f461e04e5f09 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -15,8 +15,8 @@ #include #include -#include -#include +#include +#include /** ** Event classes diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f48f2ab9d238b..ffe2679a13ced 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,7 +14,7 @@ #include #include -#include +#include TRACE_DEFINE_ENUM(SOCK_STREAM); TRACE_DEFINE_ENUM(SOCK_DGRAM); diff --git a/include/trace/events/fs.h b/include/trace/misc/fs.h similarity index 100% rename from include/trace/events/fs.h rename to include/trace/misc/fs.h diff --git a/include/trace/events/nfs.h b/include/trace/misc/nfs.h similarity index 100% rename from include/trace/events/nfs.h rename to include/trace/misc/nfs.h diff --git a/include/trace/events/rdma.h b/include/trace/misc/rdma.h similarity index 100% rename from include/trace/events/rdma.h rename to include/trace/misc/rdma.h diff --git a/include/trace/events/sunrpc_base.h b/include/trace/misc/sunrpc.h similarity index 100% rename from include/trace/events/sunrpc_base.h rename to include/trace/misc/sunrpc.h -- GitLab From 4481d72a4b63eb190e71e381050aa2959226e13b Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 16 Nov 2022 19:44:45 -0800 Subject: [PATCH 0640/2290] NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker [ Upstream commit a1049eb47f20b9eabf9afb218578fff16b4baca6 ] Refactoring courtesy_client_reaper to generic low memory shrinker so it can be used for other purposes. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d2ed047dfdc89..df3d421025aa8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4362,7 +4362,7 @@ out: } static unsigned long -nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) +nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { int cnt; struct nfsd_net *nn = container_of(shrink, @@ -4375,7 +4375,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) } static unsigned long -nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) +nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { return SHRINK_STOP; } @@ -4402,8 +4402,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; - nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); } @@ -6171,17 +6171,24 @@ laundromat_main(struct work_struct *laundry) } static void -courtesy_client_reaper(struct work_struct *reaper) +courtesy_client_reaper(struct nfsd_net *nn) { struct list_head reaplist; - struct delayed_work *dwork = to_delayed_work(reaper); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, - nfsd_shrinker_work); nfs4_get_courtesy_client_reaplist(nn, &reaplist); nfs4_process_client_reaplist(&reaplist); } +static void +nfsd4_state_shrinker_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + nfsd_shrinker_work); + + courtesy_client_reaper(nn); +} + static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) @@ -8007,7 +8014,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); return 0; -- GitLab From f30f07ba5789ef5c68c2352b996d8a98fefca8a2 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 16 Nov 2022 19:44:46 -0800 Subject: [PATCH 0641/2290] NFSD: add support for sending CB_RECALL_ANY [ Upstream commit 3959066b697b5dfbb7141124ae9665337d4bc638 ] Add XDR encode and decode function for CB_RECALL_ANY. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4callback.c | 72 ++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/state.h | 1 + fs/nfsd/xdr4.h | 5 +++ fs/nfsd/xdr4cb.h | 6 ++++ 4 files changed, 84 insertions(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 39989c14c8a1e..4eae2c5af2edf 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p) * 1 Protocol" */ +static void encode_uint32(struct xdr_stream *xdr, u32 n) +{ + WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0); +} + +static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, + size_t len) +{ + WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); +} + /* * nfs_cb_opnum4 * @@ -328,6 +339,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, hdr->nops++; } +/* + * CB_RECALLANY4args + * + * struct CB_RECALLANY4args { + * uint32_t craa_objects_to_keep; + * bitmap4 craa_type_mask; + * }; + */ +static void +encode_cb_recallany4args(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra) +{ + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY); + encode_uint32(xdr, ra->ra_keep); + encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval)); + hdr->nops++; +} + /* * CB_SEQUENCE4args * @@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, encode_cb_nops(&hdr); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static void +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfsd4_callback *cb = data; + struct nfsd4_cb_recall_any *ra; + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb); + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_recallany4args(xdr, &hdr, ra); + encode_cb_nops(&hdr); +} /* * NFSv4.0 and NFSv4.1 XDR decode functions @@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static int +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status); + return status; +} + #ifdef CONFIG_NFSD_PNFS /* * CB_LAYOUTRECALL4args @@ -783,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { #endif PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index eadd7f465bf52..e30882f8b8516 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -636,6 +636,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_OFFLOAD, NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, + NFSPROC4_CLNT_CB_RECALL_ANY, }; /* Returns true iff a is later than b: */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 36c3340c1d54a..510978e602da6 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -896,5 +896,10 @@ struct nfsd4_operation { union nfsd4_op_u *); }; +struct nfsd4_cb_recall_any { + struct nfsd4_callback ra_cb; + u32 ra_keep; + u32 ra_bmval[1]; +}; #endif diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 547cf07cf4e08..0d39af1b00a0f 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -48,3 +48,9 @@ #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + 1 + 1) +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) -- GitLab From f28dae54632c5ea45f32ddc6fba494f5efc15007 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 16 Nov 2022 19:44:47 -0800 Subject: [PATCH 0642/2290] NFSD: add delegation reaper to react to low memory condition [ Upstream commit 44df6f439a1790a5f602e3842879efa88f346672 ] The delegation reaper is called by nfsd memory shrinker's on the 'count' callback. It scans the client list and sends the courtesy CB_RECALL_ANY to the clients that hold delegations. To avoid flooding the clients with CB_RECALL_ANY requests, the delegation reaper sends only one CB_RECALL_ANY request to each client per 5 seconds. Signed-off-by: Dai Ngo [ cel: moved definition of RCA4_TYPE_MASK_RDATA_DLG ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 88 ++++++++++++++++++++++++++++++++++++++++++-- fs/nfsd/state.h | 5 +++ include/linux/nfs4.h | 13 +++++++ 3 files changed, 102 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index df3d421025aa8..1e030e0309304 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2145,6 +2145,7 @@ static void __free_client(struct kref *k) kfree(clp->cl_nii_domain.data); kfree(clp->cl_nii_name.data); idr_destroy(&clp->cl_stateids); + kfree(clp->cl_ra); kmem_cache_free(client_slab, clp); } @@ -2872,6 +2873,36 @@ static const struct tree_descr client_files[] = { [3] = {""}, }; +static int +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, + struct rpc_task *task) +{ + switch (task->tk_status) { + case -NFS4ERR_DELAY: + rpc_delay(task, 2 * HZ); + return 0; + default: + return 1; + } +} + +static void +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + put_client_renew_locked(clp); + spin_unlock(&nn->client_lock); +} + +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { + .done = nfsd4_cb_recall_any_done, + .release = nfsd4_cb_recall_any_release, +}; + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2909,6 +2940,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); + if (!clp->cl_ra) { + free_client(clp); + return NULL; + } + clp->cl_ra_time = 0; + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, + NFSPROC4_CLNT_CB_RECALL_ANY); return clp; } @@ -4364,14 +4403,16 @@ out: static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - int cnt; + int count; struct nfsd_net *nn = container_of(shrink, struct nfsd_net, nfsd_client_shrinker); - cnt = atomic_read(&nn->nfsd_courtesy_clients); - if (cnt > 0) + count = atomic_read(&nn->nfsd_courtesy_clients); + if (!count) + count = atomic_long_read(&num_delegations); + if (count) mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); - return (unsigned long)cnt; + return (unsigned long)count; } static unsigned long @@ -6179,6 +6220,44 @@ courtesy_client_reaper(struct nfsd_net *nn) nfs4_process_client_reaplist(&reaplist); } +static void +deleg_reaper(struct nfsd_net *nn) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + struct list_head cblist; + + INIT_LIST_HEAD(&cblist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state != NFSD4_ACTIVE || + list_empty(&clp->cl_delegations) || + atomic_read(&clp->cl_delegs_in_recall) || + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || + (ktime_get_boottime_seconds() - + clp->cl_ra_time < 5)) { + continue; + } + list_add(&clp->cl_ra_cblist, &cblist); + + /* release in nfsd4_cb_recall_any_release */ + atomic_inc(&clp->cl_rpc_users); + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + clp->cl_ra_time = ktime_get_boottime_seconds(); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&cblist)) { + clp = list_first_entry(&cblist, struct nfs4_client, + cl_ra_cblist); + list_del_init(&clp->cl_ra_cblist); + clp->cl_ra->ra_keep = 0; + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); + nfsd4_run_cb(&clp->cl_ra->ra_cb); + } +} + static void nfsd4_state_shrinker_worker(struct work_struct *work) { @@ -6187,6 +6266,7 @@ nfsd4_state_shrinker_worker(struct work_struct *work) nfsd_shrinker_work); courtesy_client_reaper(nn); + deleg_reaper(nn); } static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e30882f8b8516..e94634d305912 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -368,6 +368,7 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) +#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; @@ -411,6 +412,10 @@ struct nfs4_client { unsigned int cl_state; atomic_t cl_delegs_in_recall; + + struct nfsd4_cb_recall_any *cl_ra; + time64_t cl_ra_time; + struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8d04b6a5964c4..730003c4f4af4 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -732,4 +732,17 @@ enum nfs4_setxattr_options { SETXATTR4_CREATE = 1, SETXATTR4_REPLACE = 2, }; + +enum { + RCA4_TYPE_MASK_RDATA_DLG = 0, + RCA4_TYPE_MASK_WDATA_DLG = 1, + RCA4_TYPE_MASK_DIR_DLG = 2, + RCA4_TYPE_MASK_FILE_LAYOUT = 3, + RCA4_TYPE_MASK_BLK_LAYOUT = 4, + RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, + RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, + RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, + RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, +}; + #endif -- GitLab From 7b2b8a6c75f0c0175f626d61a74e4f7f75d38df4 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 16 Nov 2022 19:44:48 -0800 Subject: [PATCH 0643/2290] NFSD: add CB_RECALL_ANY tracepoints [ Upstream commit 638593be55c0b37a1930038460a9918215d5c24b ] Add tracepoints to trace start and end of CB_RECALL_ANY operation. Signed-off-by: Dai Ngo [ cel: added show_rca_mask() macro ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 ++ fs/nfsd/trace.h | 50 ++++++++++++++++++++++++++++++++++++++++ include/trace/misc/nfs.h | 12 ++++++++++ 3 files changed, 64 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1e030e0309304..d8829fa53fdaa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2877,6 +2877,7 @@ static int nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, struct rpc_task *task) { + trace_nfsd_cb_recall_any_done(cb, task); switch (task->tk_status) { case -NFS4ERR_DELAY: rpc_delay(task, 2 * HZ); @@ -6254,6 +6255,7 @@ deleg_reaper(struct nfsd_net *nn) list_del_init(&clp->cl_ra_cblist); clp->cl_ra->ra_keep = 0; clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); + trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index d261a06b61408..4183819ea0829 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,9 +9,12 @@ #define _NFSD_TRACE_H #include +#include +#include #include "export.h" #include "nfsfh.h" +#include "xdr4.h" #define NFSD_TRACE_PROC_RES_FIELDS \ __field(unsigned int, netns_ino) \ @@ -1492,6 +1495,32 @@ TRACE_EVENT(nfsd_cb_offload, __entry->fh_hash, __entry->count, __entry->status) ); +TRACE_EVENT(nfsd_cb_recall_any, + TP_PROTO( + const struct nfsd4_cb_recall_any *ra + ), + TP_ARGS(ra), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, keep) + __field(unsigned long, bmval0) + __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot; + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id; + __entry->keep = ra->ra_keep; + __entry->bmval0 = ra->ra_bmval[0]; + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr, + ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen); + ), + TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->keep, show_rca_mask(__entry->bmval0) + ) +); + DECLARE_EVENT_CLASS(nfsd_cb_done_class, TP_PROTO( const stateid_t *stp, @@ -1531,6 +1560,27 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); +TRACE_EVENT(nfsd_cb_recall_any_done, + TP_PROTO( + const struct nfsd4_callback *cb, + const struct rpc_task *task + ), + TP_ARGS(cb, task), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(int, status) + ), + TP_fast_assign( + __entry->status = task->tk_status; + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot; + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id; + ), + TP_printk("client %08x:%08x status=%d", + __entry->cl_boot, __entry->cl_id, __entry->status + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h index 09ffdbb04134d..0d9d48dca38a8 100644 --- a/include/trace/misc/nfs.h +++ b/include/trace/misc/nfs.h @@ -360,6 +360,18 @@ TRACE_DEFINE_ENUM(IOMODE_ANY); { IOMODE_RW, "RW" }, \ { IOMODE_ANY, "ANY" }) +#define show_rca_mask(x) \ + __print_flags(x, "|", \ + { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \ + { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" }) + #define show_nfs4_seq4_status(x) \ __print_flags(x, "|", \ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ -- GitLab From eb73733124305ce47d86d74fc3610ea7a4e55260 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 26 Nov 2022 15:55:30 -0500 Subject: [PATCH 0644/2290] NFSD: Use only RQ_DROPME to signal the need to drop a reply [ Upstream commit 9315564747cb6a570e99196b3a4880fb817635fd ] Clean up: NFSv2 has the only two usages of rpc_drop_reply in the NFSD code base. Since NFSv2 is going away at some point, replace these in order to simplify the "drop this reply?" check in nfsd_dispatch(). Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsproc.c | 4 ++-- fs/nfsd/nfssvc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 52fc222c34f26..a5570cf75f3fd 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + __set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + __set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6f4a38f5ab0ce..0c75636054a54 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1071,7 +1071,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0); *statp = proc->pc_func(rqstp); - if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) + if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) -- GitLab From 5c6c2fb3c12f7d7bb7f04259878ac965a8ea2d2d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Dec 2022 12:48:59 -0800 Subject: [PATCH 0645/2290] NFSD: Avoid clashing function prototypes [ Upstream commit e78e274eb22d966258a3845acc71d3c5b8ee2ea8 ] When built with Control Flow Integrity, function prototypes between caller and function declaration must match. These mismatches are visible at compile time with the new -Wcast-function-type-strict in Clang[1]. There were 97 warnings produced by NFS. For example: fs/nfsd/nfs4xdr.c:2228:17: warning: cast from '__be32 (*)(struct nfsd4_compoundargs *, struct nfsd4_access *)' (aka 'unsigned int (*)(struct nfsd4_compoundargs *, struct nfsd4_access *)') to 'nfsd4_dec' (aka 'unsigned int (*)(struct nfsd4_compoundargs *, void *)') converts to incompatible function type [-Wcast-function-type-strict] [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The enc/dec callbacks were defined as passing "void *" as the second argument, but were being implicitly cast to a new type. Replace the argument with union nfsd4_op_u, and perform explicit member selection in the function body. There are no resulting binary differences. Changes were made mechanically using the following Coccinelle script, with minor by-hand fixes for members that didn't already match their existing argument name: @find@ identifier func; type T, opsT; identifier ops, N; @@ opsT ops[] = { [N] = (T) func, }; @already_void@ identifier find.func; identifier name; @@ func(..., -void +union nfsd4_op_u *name) { ... } @proto depends on !already_void@ identifier find.func; type T; identifier name; position p; @@ func@p(..., T name ) { ... } @script:python get_member@ type_name << proto.T; member; @@ coccinelle.member = cocci.make_ident(type_name.split("_", 1)[1].split(' ',1)[0]) @convert@ identifier find.func; type proto.T; identifier proto.name; position proto.p; identifier get_member.member; @@ func@p(..., - T name + union nfsd4_op_u *u ) { + T name = &u->member; ... } @cast@ identifier find.func; type T, opsT; identifier ops, N; @@ opsT ops[] = { [N] = - (T) func, }; Cc: Chuck Lever Cc: Jeff Layton Cc: Gustavo A. R. Silva Cc: linux-nfs@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 632 +++++++++++++++++++++++++++------------------- 1 file changed, 377 insertions(+), 255 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 51a598ee68fe1..597f14a80512f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) static __be32 nfsd4_decode_access(struct nfsd4_compoundargs *argp, - struct nfsd4_access *access) + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) return nfserr_bad_xdr; return nfs_ok; } static __be32 -nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) +nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) return nfserr_bad_xdr; return nfsd4_decode_stateid4(argp, &close->cl_stateid); @@ -787,8 +789,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) static __be32 -nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) +nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) @@ -798,8 +801,9 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit } static __be32 -nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) +nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; __be32 *p, status; memset(create, 0, sizeof(*create)); @@ -844,22 +848,25 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } static inline __be32 -nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_delegreturn *dr = &u->delegreturn; return nfsd4_decode_stateid4(argp, &dr->dr_stateid); } static inline __be32 -nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) +nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; memset(getattr, 0, sizeof(*getattr)); return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, ARRAY_SIZE(getattr->ga_bmval)); } static __be32 -nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) +nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; memset(link, 0, sizeof(*link)); return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); } @@ -907,8 +914,9 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; memset(lock, 0, sizeof(*lock)); if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) return nfserr_bad_xdr; @@ -924,8 +932,9 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) +nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; memset(lockt, 0, sizeof(*lockt)); if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) return nfserr_bad_xdr; @@ -940,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) } static __be32 -nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) +nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; __be32 status; if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) @@ -962,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) } static __be32 -nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) +nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lookup *lookup = &u->lookup; return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); } @@ -1143,8 +1154,9 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; __be32 status; u32 dummy; @@ -1171,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) } static __be32 -nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) +nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *open_conf = &u->open_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1190,8 +1204,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con } static __be32 -nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) +nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *open_down = &u->open_downgrade; __be32 status; memset(open_down, 0, sizeof(*open_down)); @@ -1209,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d } static __be32 -nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) +nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_putfh *putfh = &u->putfh; __be32 *p; if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) @@ -1229,7 +1246,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 -nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { if (argp->minorversion == 0) return nfs_ok; @@ -1237,8 +1254,9 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) } static __be32 -nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) +nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; __be32 status; memset(read, 0, sizeof(*read)); @@ -1254,8 +1272,9 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) } static __be32 -nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) +nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; __be32 status; memset(readdir, 0, sizeof(*readdir)); @@ -1276,15 +1295,17 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read } static __be32 -nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) +nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); } static __be32 -nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) +nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; __be32 status; memset(rename, 0, sizeof(*rename)); @@ -1295,22 +1316,25 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename } static __be32 -nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) +nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + clientid_t *clientid = &u->renew; return nfsd4_decode_clientid4(argp, clientid); } static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; secinfo->si_exp = NULL; return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } static __be32 -nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; __be32 status; memset(setattr, 0, sizeof(*setattr)); @@ -1324,8 +1348,9 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta } static __be32 -nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) +nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setclientid *setclientid = &u->setclientid; __be32 *p, status; memset(setclientid, 0, sizeof(*setclientid)); @@ -1367,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient } static __be32 -nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) +nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1382,8 +1409,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s /* Also used for NVERIFY */ static __be32 -nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) +nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_verify *verify = &u->verify; __be32 *p, status; memset(verify, 0, sizeof(*verify)); @@ -1409,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify } static __be32 -nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) +nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; __be32 status; status = nfsd4_decode_stateid4(argp, &write->wr_stateid); @@ -1434,8 +1463,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) } static __be32 -nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; __be32 status; if (argp->minorversion >= 1) @@ -1452,16 +1483,20 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel return nfs_ok; } -static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; memset(bc, 0, sizeof(*bc)); if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) return nfserr_bad_xdr; return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); } -static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; u32 use_conn_in_rdma_mode; __be32 status; @@ -1603,8 +1638,9 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; __be32 status; memset(exid, 0, sizeof(*exid)); @@ -1656,8 +1692,9 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; __be32 status; memset(sess, 0, sizeof(*sess)); @@ -1681,23 +1718,26 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_session *destroy_session) + union nfsd4_op_u *u) { + struct nfsd4_destroy_session *destroy_session = &u->destroy_session; return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); } static __be32 nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, - struct nfsd4_free_stateid *free_stateid) + union nfsd4_op_u *u) { + struct nfsd4_free_stateid *free_stateid = &u->free_stateid; return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; __be32 status; memset(gdev, 0, sizeof(*gdev)); @@ -1717,8 +1757,9 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; __be32 *p, status; memset(lcp, 0, sizeof(*lcp)); @@ -1753,8 +1794,9 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; __be32 status; memset(lgp, 0, sizeof(*lgp)); @@ -1781,8 +1823,9 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; memset(lrp, 0, sizeof(*lrp)); if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) return nfserr_bad_xdr; @@ -1795,8 +1838,9 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, #endif /* CONFIG_NFSD_PNFS */ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo_no_name *sin) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name; if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) return nfserr_bad_xdr; @@ -1806,8 +1850,9 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; __be32 *p, status; status = nfsd4_decode_sessionid4(argp, &seq->sessionid); @@ -1826,8 +1871,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; __be32 status; u32 i; @@ -1852,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta } static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_clientid *dc) + union nfsd4_op_u *u) { + struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; return nfsd4_decode_clientid4(argp, &dc->clientid); } static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, - struct nfsd4_reclaim_complete *rc) + union nfsd4_op_u *u) { + struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) return nfserr_bad_xdr; return nfs_ok; @@ -1867,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, - struct nfsd4_fallocate *fallocate) + union nfsd4_op_u *u) { + struct nfsd4_fallocate *fallocate = &u->allocate; __be32 status; status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); @@ -1924,8 +1974,9 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) +nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; u32 consecutive, i, count, sync; struct nl4_server *ns_dummy; __be32 status; @@ -1982,8 +2033,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) static __be32 nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; __be32 status; memset(cn, 0, sizeof(*cn)); @@ -2002,16 +2054,18 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; os->count = 0; os->status = 0; return nfsd4_decode_stateid4(argp, &os->stateid); } static __be32 -nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 status; status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); @@ -2028,8 +2082,9 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) } static __be32 -nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_clone *clone = &u->clone; __be32 status; status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); @@ -2154,8 +2209,9 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) */ static __be32 nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; __be32 status; u32 maxcount; @@ -2173,8 +2229,9 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; u32 flags, maxcount, size; __be32 status; @@ -2214,8 +2271,9 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; u32 maxcount; memset(listxattrs, 0, sizeof(*listxattrs)); @@ -2245,113 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; memset(removexattr, 0, sizeof(*removexattr)); return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); } static __be32 -nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfs_ok; } static __be32 -nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfserr_notsupp; } -typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u); static const nfsd4_dec nfsd4_dec_ops[] = { - [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, - [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] = (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, - [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, - [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, + [OP_ACCESS] = nfsd4_decode_access, + [OP_CLOSE] = nfsd4_decode_close, + [OP_COMMIT] = nfsd4_decode_commit, + [OP_CREATE] = nfsd4_decode_create, + [OP_DELEGPURGE] = nfsd4_decode_notsupp, + [OP_DELEGRETURN] = nfsd4_decode_delegreturn, + [OP_GETATTR] = nfsd4_decode_getattr, + [OP_GETFH] = nfsd4_decode_noop, + [OP_LINK] = nfsd4_decode_link, + [OP_LOCK] = nfsd4_decode_lock, + [OP_LOCKT] = nfsd4_decode_lockt, + [OP_LOCKU] = nfsd4_decode_locku, + [OP_LOOKUP] = nfsd4_decode_lookup, + [OP_LOOKUPP] = nfsd4_decode_noop, + [OP_NVERIFY] = nfsd4_decode_verify, + [OP_OPEN] = nfsd4_decode_open, + [OP_OPENATTR] = nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade, + [OP_PUTFH] = nfsd4_decode_putfh, + [OP_PUTPUBFH] = nfsd4_decode_putpubfh, + [OP_PUTROOTFH] = nfsd4_decode_noop, + [OP_READ] = nfsd4_decode_read, + [OP_READDIR] = nfsd4_decode_readdir, + [OP_READLINK] = nfsd4_decode_noop, + [OP_REMOVE] = nfsd4_decode_remove, + [OP_RENAME] = nfsd4_decode_rename, + [OP_RENEW] = nfsd4_decode_renew, + [OP_RESTOREFH] = nfsd4_decode_noop, + [OP_SAVEFH] = nfsd4_decode_noop, + [OP_SECINFO] = nfsd4_decode_secinfo, + [OP_SETATTR] = nfsd4_decode_setattr, + [OP_SETCLIENTID] = nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = nfsd4_decode_verify, + [OP_WRITE] = nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, - [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, - [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl, + [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = nfsd4_decode_free_stateid, + [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_decode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp, + [OP_LAYOUTGET] = nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = nfsd4_decode_notsupp, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, - [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, - [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, + [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_decode_sequence, + [OP_SET_SSV] = nfsd4_decode_notsupp, + [OP_TEST_STATEID] = nfsd4_decode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid, + [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete, /* new operations for NFSv4.2 */ - [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, - [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, - [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, - [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, + [OP_ALLOCATE] = nfsd4_decode_fallocate, + [OP_COPY] = nfsd4_decode_copy, + [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify, + [OP_DEALLOCATE] = nfsd4_decode_fallocate, + [OP_IO_ADVISE] = nfsd4_decode_notsupp, + [OP_LAYOUTERROR] = nfsd4_decode_notsupp, + [OP_LAYOUTSTATS] = nfsd4_decode_notsupp, + [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status, + [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status, + [OP_READ_PLUS] = nfsd4_decode_read, + [OP_SEEK] = nfsd4_decode_seek, + [OP_WRITE_SAME] = nfsd4_decode_notsupp, + [OP_CLONE] = nfsd4_decode_clone, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, - [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, - [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, + [OP_GETXATTR] = nfsd4_decode_getxattr, + [OP_SETXATTR] = nfsd4_decode_setxattr, + [OP_LISTXATTRS] = nfsd4_decode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_decode_removexattr, }; static inline bool @@ -3643,8 +3702,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) } static __be32 -nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) +nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3656,8 +3717,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ return 0; } -static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3673,8 +3736,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, } static __be32 -nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) +nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); @@ -3682,8 +3747,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 -nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) +nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3696,8 +3763,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) +nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3710,8 +3779,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) +nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = resp->xdr; @@ -3720,8 +3791,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct svc_fh **fhpp = &u->getfh; struct xdr_stream *xdr = resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; @@ -3775,8 +3848,10 @@ again: } static __be32 -nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) +nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; struct xdr_stream *xdr = resp->xdr; if (!nfserr) @@ -3788,8 +3863,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo } static __be32 -nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) +nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; struct xdr_stream *xdr = resp->xdr; if (nfserr == nfserr_denied) @@ -3798,8 +3875,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l } static __be32 -nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) +nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); @@ -3807,8 +3886,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 -nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) +nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3821,8 +3902,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 -nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) +nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3915,16 +3998,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op } static __be32 -nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) +nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *oc = &u->open_confirm; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 -nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) +nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *od = &u->open_downgrade; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); @@ -4023,8 +4110,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; struct xdr_stream *xdr = resp->xdr; @@ -4065,8 +4153,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readlink *readlink = &u->readlink; __be32 *p, *maxcount_p, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; @@ -4110,8 +4200,10 @@ out_err: } static __be32 -nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; int maxcount; int bytes_left; loff_t offset; @@ -4201,8 +4293,10 @@ err_no_verf: } static __be32 -nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) +nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4214,8 +4308,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) +nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4297,8 +4393,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); @@ -4306,8 +4403,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo_no_name *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); @@ -4318,8 +4416,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, * regardless of the error status. */ static __be32 -nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) +nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4342,8 +4442,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) +nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setclientid *scd = &u->setclientid; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4366,8 +4468,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n } static __be32 -nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) +nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4383,8 +4487,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; struct xdr_stream *xdr = resp->xdr; __be32 *p; char *major_id; @@ -4461,8 +4566,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4514,8 +4620,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4537,8 +4644,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_test_stateid *test_stateid) + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4558,8 +4666,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; @@ -4611,8 +4720,9 @@ toosmall: static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4638,8 +4748,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4659,8 +4770,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4745,8 +4857,9 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy *copy) + union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, @@ -4762,8 +4875,9 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4813,8 +4927,9 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; int starting_len = xdr->buf->len; @@ -4850,8 +4965,9 @@ out: static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4885,8 +5001,9 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_seek *seek) + union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 *p; p = xdr_reserve_space(resp->xdr, 4 + 8); @@ -4897,7 +5014,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *p) { return nfserr; } @@ -4948,8 +5066,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) static __be32 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p, err; @@ -4972,8 +5091,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5013,8 +5133,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, static __be32 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; struct xdr_stream *xdr = resp->xdr; u32 cookie_offset, count_offset, eof; u32 left, xdrleft, slen, count; @@ -5124,8 +5245,9 @@ out: static __be32 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5137,7 +5259,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } -typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); +typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); /* * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 @@ -5145,93 +5267,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); * done in the decoding phase. */ static const nfsd4_enc nfsd4_enc_ops[] = { - [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, - [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, - [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, - [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, - [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, - [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, - [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, - [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, - [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, - [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, - [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, - [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, - [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_READ] = (nfsd4_enc)nfsd4_encode_read, - [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, - [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, - [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, - [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, - [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, - [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, - [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, - [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ACCESS] = nfsd4_encode_access, + [OP_CLOSE] = nfsd4_encode_close, + [OP_COMMIT] = nfsd4_encode_commit, + [OP_CREATE] = nfsd4_encode_create, + [OP_DELEGPURGE] = nfsd4_encode_noop, + [OP_DELEGRETURN] = nfsd4_encode_noop, + [OP_GETATTR] = nfsd4_encode_getattr, + [OP_GETFH] = nfsd4_encode_getfh, + [OP_LINK] = nfsd4_encode_link, + [OP_LOCK] = nfsd4_encode_lock, + [OP_LOCKT] = nfsd4_encode_lockt, + [OP_LOCKU] = nfsd4_encode_locku, + [OP_LOOKUP] = nfsd4_encode_noop, + [OP_LOOKUPP] = nfsd4_encode_noop, + [OP_NVERIFY] = nfsd4_encode_noop, + [OP_OPEN] = nfsd4_encode_open, + [OP_OPENATTR] = nfsd4_encode_noop, + [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade, + [OP_PUTFH] = nfsd4_encode_noop, + [OP_PUTPUBFH] = nfsd4_encode_noop, + [OP_PUTROOTFH] = nfsd4_encode_noop, + [OP_READ] = nfsd4_encode_read, + [OP_READDIR] = nfsd4_encode_readdir, + [OP_READLINK] = nfsd4_encode_readlink, + [OP_REMOVE] = nfsd4_encode_remove, + [OP_RENAME] = nfsd4_encode_rename, + [OP_RENEW] = nfsd4_encode_noop, + [OP_RESTOREFH] = nfsd4_encode_noop, + [OP_SAVEFH] = nfsd4_encode_noop, + [OP_SECINFO] = nfsd4_encode_secinfo, + [OP_SETATTR] = nfsd4_encode_setattr, + [OP_SETCLIENTID] = nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop, + [OP_VERIFY] = nfsd4_encode_noop, + [OP_WRITE] = nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop, /* NFSv4.1 operations */ - [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = nfsd4_encode_noop, + [OP_FREE_STATEID] = nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_encode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICEINFO] = nfsd4_encode_noop, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_noop, + [OP_LAYOUTGET] = nfsd4_encode_noop, + [OP_LAYOUTRETURN] = nfsd4_encode_noop, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, - [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, - [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_encode_sequence, + [OP_SET_SSV] = nfsd4_encode_noop, + [OP_TEST_STATEID] = nfsd4_encode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop, /* NFSv4.2 operations */ - [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, - [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, - [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, - [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, - [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ALLOCATE] = nfsd4_encode_noop, + [OP_COPY] = nfsd4_encode_copy, + [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify, + [OP_DEALLOCATE] = nfsd4_encode_noop, + [OP_IO_ADVISE] = nfsd4_encode_noop, + [OP_LAYOUTERROR] = nfsd4_encode_noop, + [OP_LAYOUTSTATS] = nfsd4_encode_noop, + [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop, + [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status, + [OP_READ_PLUS] = nfsd4_encode_read_plus, + [OP_SEEK] = nfsd4_encode_seek, + [OP_WRITE_SAME] = nfsd4_encode_noop, + [OP_CLONE] = nfsd4_encode_noop, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, - [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, - [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, + [OP_GETXATTR] = nfsd4_encode_getxattr, + [OP_SETXATTR] = nfsd4_encode_setxattr, + [OP_LISTXATTRS] = nfsd4_encode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_encode_removexattr, }; /* -- GitLab From ce606d5334c2abd772bac18c5ee83f3dd82f2a11 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 7 Jan 2023 10:15:35 -0500 Subject: [PATCH 0646/2290] NFSD: Use set_bit(RQ_DROPME) [ Upstream commit 5304930dbae82d259bcf7e5611db7c81e7a42eff ] The premise that "Once an svc thread is scheduled and executing an RPC, no other processes will touch svc_rqst::rq_flags" is false. svc_xprt_enqueue() examines the RQ_BUSY flag in scheduled nfsd threads when determining which thread to wake up next. Fixes: 9315564747cb ("NFSD: Use only RQ_DROPME to signal the need to drop a reply") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a5570cf75f3fd..9744443c39652 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } -- GitLab From c479755cb80a85bbd7569fa7a7e133a66f792a31 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 11 Jan 2023 12:17:09 -0800 Subject: [PATCH 0647/2290] NFSD: register/unregister of nfsd-client shrinker at nfsd startup/shutdown time [ Upstream commit f385f7d244134246f984975ed34cd75f77de479f ] Currently the nfsd-client shrinker is registered and unregistered at the time the nfsd module is loaded and unloaded. The problem with this is the shrinker is being registered before all of the relevant fields in nfsd_net are initialized when nfsd is started. This can lead to an oops when memory is low and the shrinker is called while nfsd is not running. This patch moves the register/unregister of nfsd-client shrinker from module load/unload time to nfsd startup/shutdown time. Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition") Reported-by: Mike Galbraith Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 22 +++++++++++----------- fs/nfsd/nfsctl.c | 7 +------ fs/nfsd/nfsd.h | 6 ++---- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d8829fa53fdaa..02a3629ba307f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4422,7 +4422,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) return SHRINK_STOP; } -int +void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; @@ -4444,16 +4444,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); -} - -void -nfsd4_leases_net_shutdown(struct nfsd_net *nn) -{ - unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -8099,8 +8089,17 @@ static int nfs4_state_create_net(struct net *net) INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8193,6 +8192,7 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index a8884d0b4638c..76a60e7a75097 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1452,9 +1452,7 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd4_init_leases_net(nn); - if (retval) - goto out_drc_error; + nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) goto out_cache_error; @@ -1464,8 +1462,6 @@ static __net_init int nfsd_init_net(struct net *net) return 0; out_cache_error: - nfsd4_leases_net_shutdown(nn); -out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1481,7 +1477,6 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); - nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 93b42ef9ed91b..fa0144a742678 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern int nfsd4_init_leases_net(struct nfsd_net *nn); -extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); +extern void nfsd4_init_leases_net(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; -static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) -- GitLab From f3ea5ec83d1a827f074b2b660749817e0bf2b23e Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 11 Jan 2023 16:06:51 -0800 Subject: [PATCH 0648/2290] NFSD: replace delayed_work with work_struct for nfsd_client_shrinker [ Upstream commit 7c24fa225081f31bc6da6a355c1ba801889ab29a ] Since nfsd4_state_shrinker_count always calls mod_delayed_work with 0 delay, we can replace delayed_work with work_struct to save some space and overhead. Also add the call to cancel_work after unregister the shrinker in nfs4_state_shutdown_net. Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfs4state.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8c854ba3285bb..51a4b7885cae2 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -195,7 +195,7 @@ struct nfsd_net { atomic_t nfsd_courtesy_clients; struct shrinker nfsd_client_shrinker; - struct delayed_work nfsd_shrinker_work; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 02a3629ba307f..a6fcde4f7b13a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4412,7 +4412,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) if (!count) count = atomic_long_read(&num_delegations); if (count) - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + queue_work(laundry_wq, &nn->nfsd_shrinker_work); return (unsigned long)count; } @@ -6253,8 +6253,7 @@ deleg_reaper(struct nfsd_net *nn) static void nfsd4_state_shrinker_worker(struct work_struct *work) { - struct delayed_work *dwork = to_delayed_work(work); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + struct nfsd_net *nn = container_of(work, struct nfsd_net, nfsd_shrinker_work); courtesy_client_reaper(nn); @@ -8086,7 +8085,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; @@ -8193,6 +8192,7 @@ nfs4_state_shutdown_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); -- GitLab From 56587affe21c5cd806523a89efd8da5b49872a72 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Feb 2023 07:50:08 -0500 Subject: [PATCH 0649/2290] nfsd: don't destroy global nfs4_file table in per-net shutdown [ Upstream commit 4102db175b5d884d133270fdbd0e59111ce688fc ] The nfs4_file table is global, so shutting it down when a containerized nfsd is shut down is wrong and can lead to double-frees. Tear down the nfs4_file_rhltable in nfs4_state_shutdown instead of nfs4_state_shutdown_net. Fixes: d47b295e8d76 ("NFSD: Use rhashtable for managing nfs4_file objects") Link: https://bugzilla.redhat.com/show_bug.cgi?id=2169017 Reported-by: JianHong Yin Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a6fcde4f7b13a..b9d694ec25d19 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8212,7 +8212,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - rhltable_destroy(&nfs4_file_rhltable); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif @@ -8222,6 +8221,7 @@ void nfs4_state_shutdown(void) { nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void -- GitLab From e58f2862e9fe500b073d20f94e73abc52fb70634 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:39 +0100 Subject: [PATCH 0650/2290] arm64: efi: Limit allocations to 48-bit addressable physical region From: Ard Biesheuvel [ Commit a37dac5c5dcfe0f1fd58513c16cdbc280a47f628 upstream ] The UEFI spec does not mention or reason about the configured size of the virtual address space at all, but it does mention that all memory should be identity mapped using a page size of 4 KiB. This means that a LPA2 capable system that has any system memory outside of the 48-bit addressable physical range and follows the spec to the letter may serve page allocation requests from regions of memory that the kernel cannot access unless it was built with LPA2 support and enables it at runtime. So let's ensure that all page allocations are limited to the 48-bit range. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/efi.h | 1 + drivers/firmware/efi/libstub/alignedmem.c | 2 ++ drivers/firmware/efi/libstub/arm64-stub.c | 5 +++-- drivers/firmware/efi/libstub/efistub.h | 4 ++++ drivers/firmware/efi/libstub/mem.c | 2 ++ drivers/firmware/efi/libstub/randomalloc.c | 2 +- 6 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 62c846be2d76a..a75c0772ecfca 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -103,6 +103,7 @@ static inline void free_screen_info(struct screen_info *si) } #define EFI_ALLOC_ALIGN SZ_64K +#define EFI_ALLOC_LIMIT ((1UL << 48) - 1) /* * On ARM systems, virtually remapped UEFI runtime services are set up in two diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c index 174832661251e..6b83c492c3b82 100644 --- a/drivers/firmware/efi/libstub/alignedmem.c +++ b/drivers/firmware/efi/libstub/alignedmem.c @@ -29,6 +29,8 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, efi_status_t status; int slack; + max = min(max, EFI_ALLOC_LIMIT); + if (align < EFI_ALLOC_ALIGN) align = EFI_ALLOC_ALIGN; diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 08f46c072da56..40275c3131c8e 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -191,10 +191,11 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (status != EFI_SUCCESS) { if (!check_image_region((u64)_text, kernel_memsize)) { efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); - } else if (IS_ALIGNED((u64)_text, min_kimg_align)) { + } else if (IS_ALIGNED((u64)_text, min_kimg_align) && + (u64)_end < EFI_ALLOC_LIMIT) { /* * Just execute from wherever we were loaded by the - * UEFI PE/COFF loader if the alignment is suitable. + * UEFI PE/COFF loader if the placement is suitable. */ *image_addr = (u64)_text; *reserve_size = 0; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index ab505b07e626b..002f02a6d3598 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -29,6 +29,10 @@ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +#ifndef EFI_ALLOC_LIMIT +#define EFI_ALLOC_LIMIT ULONG_MAX +#endif + extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index 03d147f17185b..4f1fa302234d8 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -89,6 +89,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, efi_physical_addr_t alloc_addr; efi_status_t status; + max = min(max, EFI_ALLOC_LIMIT); + if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE) return efi_allocate_pages_aligned(size, addr, max, EFI_ALLOC_ALIGN, diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index ec44bb7e092fa..1692d19ae80f0 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -29,7 +29,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - (u64)ULONG_MAX); + (u64)EFI_ALLOC_LIMIT); if (region_end < size) return 0; -- GitLab From 33d064aecd89846d5cf284ab75eeb9098b5ff49e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:40 +0100 Subject: [PATCH 0651/2290] efi: efivars: prevent double registration From: Johan Hovold [ Commit 0217a40d7ba6e71d7f3422fbe89b436e8ee7ece7 upstream ] Add the missing sanity check to efivars_register() so that it is no longer possible to override an already registered set of efivar ops (without first deregistering them). This can help debug initialisation ordering issues where drivers have so far unknowingly been relying on overriding the generic ops. Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/vars.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 0ba9f18312f5b..4ca256bcd6971 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -66,19 +66,28 @@ int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject) { + int rv; + if (down_interruptible(&efivars_lock)) return -EINTR; + if (__efivars) { + pr_warn("efivars already registered\n"); + rv = -EBUSY; + goto out; + } + efivars->ops = ops; efivars->kobject = kobject; __efivars = efivars; pr_info("Registered efivars operations\n"); - + rv = 0; +out: up(&efivars_lock); - return 0; + return rv; } EXPORT_SYMBOL_GPL(efivars_register); -- GitLab From f0acafd6f79fa6068b7fc4af7980ac9bbd14f1d1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:41 +0100 Subject: [PATCH 0652/2290] x86/efistub: Simplify and clean up handover entry code From: Ard Biesheuvel [ Commit df9215f15206c2a81909ccf60f21d170801dce38 upstream ] Now that the EFI entry code in assembler is only used by the optional and deprecated EFI handover protocol, and given that the EFI stub C code no longer returns to it, most of it can simply be dropped. While at it, clarify the symbol naming, by merging efi_main() and efi_stub_entry(), making the latter the shared entry point for all different boot modes that enter via the EFI stub. The efi32_stub_entry() and efi64_stub_entry() names are referenced explicitly by the tooling that populates the setup header, so these must be retained, but can be emitted as aliases of efi_stub_entry() where appropriate. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-5-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/boot.rst | 2 +- arch/x86/boot/compressed/efi_mixed.S | 22 ++++++++++++---------- arch/x86/boot/compressed/head_32.S | 11 ----------- arch/x86/boot/compressed/head_64.S | 12 ++---------- drivers/firmware/efi/libstub/x86-stub.c | 20 ++++++++++++++++---- 5 files changed, 31 insertions(+), 36 deletions(-) diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst index 894a198970055..bac3789f3e8fa 100644 --- a/Documentation/x86/boot.rst +++ b/Documentation/x86/boot.rst @@ -1416,7 +1416,7 @@ execution context provided by the EFI firmware. The function prototype for the handover entry point looks like this:: - efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp) + efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp) 'handle' is the EFI image handle passed to the boot loader by the EFI firmware, 'table' is the EFI system table - these are the first two diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 8b02e507d3bb0..d05f0250bbbc2 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -26,8 +26,8 @@ * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() * is the first thing that runs after switching to long mode. Depending on * whether the EFI handover protocol or the compat entry point was used to - * enter the kernel, it will either branch to the 64-bit EFI handover - * entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF + * enter the kernel, it will either branch to the common 64-bit EFI stub + * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a * struct bootparams pointer as the third argument, so the presence of such a * pointer is used to disambiguate. @@ -37,21 +37,23 @@ * | efi32_pe_entry |---->| | | +-----------+--+ * +------------------+ | | +------+----------------+ | * | startup_32 |---->| startup_64_mixed_mode | | - * +------------------+ | | +------+----------------+ V - * | efi32_stub_entry |---->| | | +------------------+ - * +------------------+ +------------+ +---->| efi64_stub_entry | - * +-------------+----+ - * +------------+ +----------+ | - * | startup_64 |<----| efi_main |<--------------+ - * +------------+ +----------+ + * +------------------+ | | +------+----------------+ | + * | efi32_stub_entry |---->| | | | + * +------------------+ +------------+ | | + * V | + * +------------+ +----------------+ | + * | startup_64 |<----| efi_stub_entry |<--------+ + * +------------+ +----------------+ */ SYM_FUNC_START(startup_64_mixed_mode) lea efi32_boot_args(%rip), %rdx mov 0(%rdx), %edi mov 4(%rdx), %esi +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL mov 8(%rdx), %edx // saved bootparams pointer test %edx, %edx - jnz efi64_stub_entry + jnz efi_stub_entry +#endif /* * efi_pe_entry uses MS calling convention, which requires 32 bytes of * shadow space on the stack even if all arguments are passed in diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3ecc1bbe971e1..3af4a383615b3 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -150,17 +150,6 @@ SYM_FUNC_START(startup_32) jmp *%eax SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_STUB -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp - movl 8(%esp), %esi /* save boot_params pointer */ - call efi_main - /* efi_main returns the possibly relocated address of startup_32 */ - jmp *%eax -SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fafd0a59f3961..d4ccae574c4f3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -474,19 +474,11 @@ SYM_CODE_START(startup_64) jmp *%rax SYM_CODE_END(startup_64) -#ifdef CONFIG_EFI_STUB -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) .org 0x390 -#endif SYM_FUNC_START(efi64_stub_entry) - and $~0xf, %rsp /* realign the stack */ - movq %rdx, %rbx /* save boot_params pointer */ - call efi_main - movq %rbx,%rsi - leaq rva(startup_64)(%rax), %rax - jmp *%rax + jmp efi_stub_entry SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif .text diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9422fddfbc8f1..9661d5a5769e5 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -774,9 +774,9 @@ static void __noreturn enter_kernel(unsigned long kernel_addr, * return. On failure, it will exit to the firmware via efi_exit() instead of * returning. */ -asmlinkage unsigned long efi_main(efi_handle_t handle, - efi_system_table_t *sys_table_arg, - struct boot_params *boot_params) +void __noreturn efi_stub_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) { unsigned long bzimage_addr = (unsigned long)startup_32; unsigned long buffer_start, buffer_end; @@ -919,7 +919,19 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, enter_kernel(bzimage_addr, boot_params); fail: - efi_err("efi_main() failed!\n"); + efi_err("efi_stub_entry() failed!\n"); efi_exit(handle, status); } + +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +#ifndef CONFIG_EFI_MIXED +extern __alias(efi_stub_entry) +void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); + +extern __alias(efi_stub_entry) +void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); +#endif +#endif -- GitLab From 1f3fd81bff03355c3acc8558c3c4da2f2d4e1d18 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:42 +0100 Subject: [PATCH 0653/2290] x86/decompressor: Avoid magic offsets for EFI handover entrypoint From: Ard Biesheuvel [ Commit 12792064587623065250069d1df980e2c9ac3e67 upstream ] The native 32-bit or 64-bit EFI handover protocol entrypoint offset relative to the respective startup_32/64 address is described in boot_params as handover_offset, so that the special Linux/x86 aware EFI loader can find it there. When mixed mode is enabled, this single field has to describe this offset for both the 32-bit and 64-bit entrypoints, so their respective relative offsets have to be identical. Given that startup_32 and startup_64 are 0x200 bytes apart, and the EFI handover entrypoint resides at a fixed offset, the 32-bit and 64-bit versions of those entrypoints must be exactly 0x200 bytes apart as well. Currently, hard-coded fixed offsets are used to ensure this, but it is sufficient to emit the 64-bit entrypoint 0x200 bytes after the 32-bit one, wherever it happens to reside. This allows this code (which is now EFI mixed mode specific) to be moved into efi_mixed.S and out of the startup code in head_64.S. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-6-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 20 +++++++++++++++++++- arch/x86/boot/compressed/head_64.S | 18 ------------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index d05f0250bbbc2..deb36129e3a95 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -146,6 +146,16 @@ SYM_FUNC_START(__efi64_thunk) SYM_FUNC_END(__efi64_thunk) .code32 +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +SYM_FUNC_START(efi32_stub_entry) + add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi + jmp efi32_entry +SYM_FUNC_END(efi32_stub_entry) +#endif + /* * EFI service pointer must be in %edi. * @@ -226,7 +236,7 @@ SYM_FUNC_END(efi_enter32) * stub may still exit and return to the firmware using the Exit() EFI boot * service.] */ -SYM_FUNC_START(efi32_entry) +SYM_FUNC_START_LOCAL(efi32_entry) call 1f 1: pop %ebx @@ -326,6 +336,14 @@ SYM_FUNC_START(efi32_pe_entry) RET SYM_FUNC_END(efi32_pe_entry) +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + .org efi32_stub_entry + 0x200 + .code64 +SYM_FUNC_START_NOALIGN(efi64_stub_entry) + jmp efi_stub_entry +SYM_FUNC_END(efi64_stub_entry) +#endif + .section ".rodata" /* EFI loaded image protocol GUID */ .balign 4 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d4ccae574c4f3..9a0d83b4d266d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -286,17 +286,6 @@ SYM_FUNC_START(startup_32) lret SYM_FUNC_END(startup_32) -#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) - .org 0x190 -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp /* Discard return address */ - popl %ecx - popl %edx - popl %esi - jmp efi32_entry -SYM_FUNC_END(efi32_stub_entry) -#endif - .code64 .org 0x200 SYM_CODE_START(startup_64) @@ -474,13 +463,6 @@ SYM_CODE_START(startup_64) jmp *%rax SYM_CODE_END(startup_64) -#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) - .org 0x390 -SYM_FUNC_START(efi64_stub_entry) - jmp efi_stub_entry -SYM_FUNC_END(efi64_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) -- GitLab From 34378d7ad273ff859c1ed9ab77bb71e55f652b06 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:43 +0100 Subject: [PATCH 0654/2290] x86/efistub: Clear BSS in EFI handover protocol entrypoint From: Ard Biesheuvel [ Commit d7156b986d4cc0657fa6dc05c9fcf51c3d55a0fe upstream ] The so-called EFI handover protocol is value-add from the distros that permits a loader to simply copy a PE kernel image into memory and call an alternative entrypoint that is described by an embedded boot_params structure. Most implementations of this protocol do not bother to check the PE header for minimum alignment, section placement, etc, and therefore also don't clear the image's BSS, or even allocate enough memory for it. Allocating more memory on the fly is rather difficult, but at least clear the BSS region explicitly when entering in this manner, so that the EFI stub code does not get confused by global variables that were not zero-initialized correctly. When booting in mixed mode, this BSS clearing must occur before any global state is created, so clear it in the 32-bit asm entry point. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-7-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 14 +++++++++++++- drivers/firmware/efi/libstub/x86-stub.c | 13 +++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index deb36129e3a95..d6d1b76b594d9 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -148,6 +148,18 @@ SYM_FUNC_END(__efi64_thunk) .code32 #ifdef CONFIG_EFI_HANDOVER_PROTOCOL SYM_FUNC_START(efi32_stub_entry) + call 1f +1: popl %ecx + + /* Clear BSS */ + xorl %eax, %eax + leal (_bss - 1b)(%ecx), %edi + leal (_ebss - 1b)(%ecx), %ecx + subl %edi, %ecx + shrl $2, %ecx + cld + rep stosl + add $0x4, %esp /* Discard return address */ popl %ecx popl %edx @@ -340,7 +352,7 @@ SYM_FUNC_END(efi32_pe_entry) .org efi32_stub_entry + 0x200 .code64 SYM_FUNC_START_NOALIGN(efi64_stub_entry) - jmp efi_stub_entry + jmp efi_handover_entry SYM_FUNC_END(efi64_stub_entry) #endif diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9661d5a5769e5..764bac6b58f96 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -925,12 +925,21 @@ fail: } #ifdef CONFIG_EFI_HANDOVER_PROTOCOL +void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) +{ + extern char _bss[], _ebss[]; + + memset(_bss, 0, _ebss - _bss); + efi_stub_entry(handle, sys_table_arg, boot_params); +} + #ifndef CONFIG_EFI_MIXED -extern __alias(efi_stub_entry) +extern __alias(efi_handover_entry) void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params); -extern __alias(efi_stub_entry) +extern __alias(efi_handover_entry) void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params); #endif -- GitLab From 8ff6d88c0443acdd4199aacb69f1dd4a24120e8e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:45 +0100 Subject: [PATCH 0655/2290] efi/libstub: Add memory attribute protocol definitions From: Evgeniy Baskov [ Commit 79729f26b074a5d2722c27fa76cc45ef721e65cd upstream ] EFI_MEMORY_ATTRIBUTE_PROTOCOL servers as a better alternative to DXE services for setting memory attributes in EFI Boot Services environment. This protocol is better since it is a part of UEFI specification itself and not UEFI PI specification like DXE services. Add EFI_MEMORY_ATTRIBUTE_PROTOCOL definitions. Support mixed mode properly for its calls. Tested-by: Mario Limonciello Signed-off-by: Evgeniy Baskov Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/efi.h | 7 +++++++ drivers/firmware/efi/libstub/efistub.h | 20 ++++++++++++++++++++ include/linux/efi.h | 1 + 3 files changed, 28 insertions(+) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 233ae6986d6f2..522ff2e443b37 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -325,6 +325,13 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) +/* Memory Attribute Protocol */ +#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 002f02a6d3598..6f5a1a16db15b 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -419,6 +419,26 @@ union efi_dxe_services_table { } mixed_mode; }; +typedef union efi_memory_attribute_protocol efi_memory_attribute_protocol_t; + +union efi_memory_attribute_protocol { + struct { + efi_status_t (__efiapi *get_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64 *); + + efi_status_t (__efiapi *set_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); + + efi_status_t (__efiapi *clear_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); + }; + struct { + u32 get_memory_attributes; + u32 set_memory_attributes; + u32 clear_memory_attributes; + } mixed_mode; +}; + typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; union efi_uga_draw_protocol { diff --git a/include/linux/efi.h b/include/linux/efi.h index 4e1bfee9675d2..de6d6558a4d30 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -390,6 +390,7 @@ void efi_native_runtime_setup(void); #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) #define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) +#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) -- GitLab From 476a48cd37c948b160cc3d5ff5b4d2e711f1ca36 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:46 +0100 Subject: [PATCH 0656/2290] efi/libstub: Add limit argument to efi_random_alloc() From: Ard Biesheuvel [ Commit bc5ddceff4c14494d83449ad45c985e6cd353fce upstream ] x86 will need to limit the kernel memory allocation to the lowest 512 MiB of memory, to match the behavior of the existing bare metal KASLR physical randomization logic. So in preparation for that, add a limit parameter to efi_random_alloc() and wire it up. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-22-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- drivers/firmware/efi/libstub/efistub.h | 2 +- drivers/firmware/efi/libstub/randomalloc.c | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 40275c3131c8e..16377b4521190 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -181,7 +181,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, */ status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed, - EFI_LOADER_CODE); + EFI_LOADER_CODE, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 6f5a1a16db15b..8a343ea1231a2 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -905,7 +905,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type); + int memory_type, unsigned long alloc_limit); efi_status_t efi_random_get_seed(void); diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 1692d19ae80f0..ed6f6087a9eac 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -16,7 +16,8 @@ */ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, unsigned long size, - unsigned long align_shift) + unsigned long align_shift, + u64 alloc_limit) { unsigned long align = 1UL << align_shift; u64 first_slot, last_slot, region_end; @@ -29,7 +30,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - (u64)EFI_ALLOC_LIMIT); + alloc_limit); if (region_end < size) return 0; @@ -54,7 +55,8 @@ efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type) + int memory_type, + unsigned long alloc_limit) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -76,7 +78,7 @@ efi_status_t efi_random_alloc(unsigned long size, efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; - slots = get_entry_num_slots(md, size, ilog2(align)); + slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit); MD_NUM_SLOTS(md) = slots; total_slots += slots; if (md->attribute & EFI_MEMORY_MORE_RELIABLE) -- GitLab From 350265a753d8b39e2bb11660f2109c8dd5306b45 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:47 +0100 Subject: [PATCH 0657/2290] x86/efistub: Perform 4/5 level paging switch from the stub From: Ard Biesheuvel [ Commit cb380000dd23cbbf8bd7d023b51896804c1f7e68 upstream ] In preparation for updating the EFI stub boot flow to avoid the bare metal decompressor code altogether, implement the support code for switching between 4 and 5 levels of paging before jumping to the kernel proper. This reuses the newly refactored trampoline that the bare metal decompressor uses, but relies on EFI APIs to allocate 32-bit addressable memory and remap it with the appropriate permissions. Given that the bare metal decompressor will no longer call into the trampoline if the number of paging levels is already set correctly, it is no longer needed to remove NX restrictions from the memory range where this trampoline may end up. Acked-by: Kirill A. Shutemov Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 1 + .../firmware/efi/libstub/efi-stub-helper.c | 2 + drivers/firmware/efi/libstub/efistub.h | 1 + drivers/firmware/efi/libstub/x86-5lvl.c | 95 +++++++++++++++++++ drivers/firmware/efi/libstub/x86-stub.c | 40 +++----- drivers/firmware/efi/libstub/x86-stub.h | 17 ++++ 6 files changed, 130 insertions(+), 26 deletions(-) create mode 100644 drivers/firmware/efi/libstub/x86-5lvl.c create mode 100644 drivers/firmware/efi/libstub/x86-stub.h diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index b6e1dcb98a64c..473ef18421db0 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -84,6 +84,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o smbios.o lib-$(CONFIG_X86) += x86-stub.o +lib-$(CONFIG_X86_64) += x86-5lvl.o lib-$(CONFIG_RISCV) += riscv-stub.o lib-$(CONFIG_LOONGARCH) += loongarch-stub.o diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 3d9b2469a0dfd..97744822dd951 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -216,6 +216,8 @@ efi_status_t efi_parse_options(char const *cmdline) efi_loglevel = CONSOLE_LOGLEVEL_QUIET; } else if (!strcmp(param, "noinitrd")) { efi_noinitrd = true; + } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { + efi_no5lvl = true; } else if (!strcmp(param, "efi") && val) { efi_nochunk = parse_option_str(val, "nochunk"); efi_novamap |= parse_option_str(val, "novamap"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 8a343ea1231a2..4b4055877f3d3 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -33,6 +33,7 @@ #define EFI_ALLOC_LIMIT ULONG_MAX #endif +extern bool efi_no5lvl; extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c new file mode 100644 index 0000000000000..479dd445acdcf --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-5lvl.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include + +#include +#include +#include + +#include "efistub.h" +#include "x86-stub.h" + +bool efi_no5lvl; + +static void (*la57_toggle)(void *cr3); + +static const struct desc_struct gdt[] = { + [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), +}; + +/* + * Enabling (or disabling) 5 level paging is tricky, because it can only be + * done from 32-bit mode with paging disabled. This means not only that the + * code itself must be running from 32-bit addressable physical memory, but + * also that the root page table must be 32-bit addressable, as programming + * a 64-bit value into CR3 when running in 32-bit mode is not supported. + */ +efi_status_t efi_setup_5level_paging(void) +{ + u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src; + efi_status_t status; + u8 *la57_code; + + if (!efi_is_64bit()) + return EFI_SUCCESS; + + /* check for 5 level paging support */ + if (native_cpuid_eax(0) < 7 || + !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return EFI_SUCCESS; + + /* allocate some 32-bit addressable memory for code and a page table */ + status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code, + U32_MAX); + if (status != EFI_SUCCESS) + return status; + + la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size); + memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size); + + /* + * To avoid the need to allocate a 32-bit addressable stack, the + * trampoline uses a LJMP instruction to switch back to long mode. + * LJMP takes an absolute destination address, which needs to be + * fixed up at runtime. + */ + *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code; + + efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE); + + return EFI_SUCCESS; +} + +void efi_5level_switch(void) +{ + bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl; + bool have_la57 = native_read_cr4() & X86_CR4_LA57; + bool need_toggle = want_la57 ^ have_la57; + u64 *pgt = (void *)la57_toggle + PAGE_SIZE; + u64 *cr3 = (u64 *)__native_read_cr3(); + u64 *new_cr3; + + if (!la57_toggle || !need_toggle) + return; + + if (!have_la57) { + /* + * 5 level paging will be enabled, so a root level page needs + * to be allocated from the 32-bit addressable physical region, + * with its first entry referring to the existing hierarchy. + */ + new_cr3 = memset(pgt, 0, PAGE_SIZE); + new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC; + } else { + /* take the new root table pointer from the current entry #0 */ + new_cr3 = (u64 *)(cr3[0] & PAGE_MASK); + + /* copy the new root table if it is not 32-bit addressable */ + if ((u64)new_cr3 > U32_MAX) + new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE); + } + + native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt }); + + la57_toggle(new_cr3); +} diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 764bac6b58f96..adaddd38d97d1 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -17,6 +17,7 @@ #include #include "efistub.h" +#include "x86-stub.h" /* Maximum physical address for 64-bit kernel with 4-level paging */ #define MAXMEM_X86_64_4LEVEL (1ull << 46) @@ -212,8 +213,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } -static void -adjust_memory_range_protection(unsigned long start, unsigned long size) +void efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) { efi_status_t status; efi_gcd_memory_space_desc_t desc; @@ -267,35 +268,14 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) } } -/* - * Trampoline takes 2 pages and can be loaded in first megabyte of memory - * with its end placed between 128k and 640k where BIOS might start. - * (see arch/x86/boot/compressed/pgtable_64.c) - * - * We cannot find exact trampoline placement since memory map - * can be modified by UEFI, and it can alter the computed address. - */ - -#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) -#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) - extern const u8 startup_32[], startup_64[]; static void setup_memory_protection(unsigned long image_base, unsigned long image_size) { - /* - * Allow execution of possible trampoline used - * for switching between 4- and 5-level page tables - * and relocated kernel image. - */ - - adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, - TRAMPOLINE_PLACEMENT_SIZE); - #ifdef CONFIG_64BIT if (image_base != (unsigned long)startup_32) - adjust_memory_range_protection(image_base, image_size); + efi_adjust_memory_range_protection(image_base, image_size); #else /* * Clear protection flags on a whole range of possible @@ -305,8 +285,8 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size) * need to remove possible protection on relocated image * itself disregarding further relocations. */ - adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); + efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, + KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); #endif } @@ -796,6 +776,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle, efi_dxe_table = NULL; } + status = efi_setup_5level_paging(); + if (status != EFI_SUCCESS) { + efi_err("efi_setup_5level_paging() failed!\n"); + goto fail; + } + /* * If the kernel isn't already loaded at a suitable address, * relocate it. @@ -914,6 +900,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } + efi_5level_switch(); + if (IS_ENABLED(CONFIG_X86_64)) bzimage_addr += startup_64 - startup_32; diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h new file mode 100644 index 0000000000000..37c5a36b9d8cf --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +extern void trampoline_32bit_src(void *, bool); +extern const u16 trampoline_ljmp_imm_offset; + +void efi_adjust_memory_range_protection(unsigned long start, + unsigned long size); + +#ifdef CONFIG_X86_64 +efi_status_t efi_setup_5level_paging(void); +void efi_5level_switch(void); +#else +static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; } +static inline void efi_5level_switch(void) {} +#endif -- GitLab From 5a664585a71c3af82a64aa9b38cadfa02f11c841 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:48 +0100 Subject: [PATCH 0658/2290] x86/decompressor: Factor out kernel decompression and relocation From: Ard Biesheuvel [ Commit 83381519352d6b5b3e429bf72aaab907480cb6b6 upstream ] Factor out the decompressor sequence that invokes the decompressor, parses the ELF and applies the relocations so that it can be called directly from the EFI stub. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-21-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/misc.c | 29 ++++++++++++++++++++++++----- arch/x86/include/asm/boot.h | 8 ++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index e4e3e49fcc374..fb55ac18af6fa 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,11 +330,33 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +const unsigned long kernel_total_size = VO__end - VO__text; + static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); extern unsigned char input_data[]; extern unsigned int input_len, output_len; +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)) +{ + unsigned long entry; + + if (!free_mem_ptr) { + free_mem_ptr = (unsigned long)boot_heap; + free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap); + } + + if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len, + NULL, error) < 0) + return ULONG_MAX; + + entry = parse_elf(outbuf); + handle_relocations(outbuf, output_len, virt_addr); + + return entry; +} + /* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel @@ -354,7 +376,6 @@ extern unsigned int input_len, output_len; */ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) { - const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; memptr heap = (memptr)boot_heap; unsigned long needed_size; @@ -457,10 +478,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) #endif debug_putstr("\nDecompressing Linux... "); - __decompress(input_data, input_len, NULL, NULL, output, output_len, - NULL, error); - entry_offset = parse_elf(output); - handle_relocations(output, output_len, virt_addr); + + entry_offset = decompress_kernel(output, virt_addr, error); debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); debug_puthex(entry_offset); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 215d37f7dde8a..b3a7cfb0d99e0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -79,4 +79,12 @@ # define BOOT_STACK_SIZE 0x1000 #endif +#ifndef __ASSEMBLY__ +extern unsigned int output_len; +extern const unsigned long kernel_total_size; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)); +#endif + #endif /* _ASM_X86_BOOT_H */ -- GitLab From 77330c123d7c443936585f25b31d3979876ba1d0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:49 +0100 Subject: [PATCH 0659/2290] x86/efistub: Prefer EFI memory attributes protocol over DXE services From: Ard Biesheuvel [ Commit 11078876b7a6a1b7226344fecab968945c806832 upstream ] Currently, the EFI stub relies on DXE services in some cases to clear non-execute restrictions from page allocations that need to be executable. This is dodgy, because DXE services are not specified by UEFI but by PI, and they are not intended for consumption by OS loaders. However, no alternative existed at the time. Now, there is a new UEFI protocol that should be used instead, so if it exists, prefer it over the DXE services calls. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-18-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 29 ++++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index adaddd38d97d1..01af018b93158 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -26,6 +26,7 @@ const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; u32 image_offset __section(".data"); static efi_loaded_image_t *image = NULL; +static efi_memory_attribute_protocol_t *memattr; static efi_status_t preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) @@ -222,12 +223,18 @@ void efi_adjust_memory_range_protection(unsigned long start, unsigned long rounded_start, rounded_end; unsigned long unprotect_start, unprotect_size; - if (efi_dxe_table == NULL) - return; - rounded_start = rounddown(start, EFI_PAGE_SIZE); rounded_end = roundup(start + size, EFI_PAGE_SIZE); + if (memattr != NULL) { + efi_call_proto(memattr, clear_memory_attributes, rounded_start, + rounded_end - rounded_start, EFI_MEMORY_XP); + return; + } + + if (efi_dxe_table == NULL) + return; + /* * Don't modify memory region attributes, they are * already suitable, to lower the possibility to @@ -758,6 +765,7 @@ void __noreturn efi_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params) { + efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; unsigned long bzimage_addr = (unsigned long)startup_32; unsigned long buffer_start, buffer_end; struct setup_header *hdr = &boot_params->hdr; @@ -769,13 +777,18 @@ void __noreturn efi_stub_entry(efi_handle_t handle, if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) efi_exit(handle, EFI_INVALID_PARAMETER); - efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); - if (efi_dxe_table && - efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { - efi_warn("Ignoring DXE services table: invalid signature\n"); - efi_dxe_table = NULL; + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { + efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); + if (efi_dxe_table && + efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { + efi_warn("Ignoring DXE services table: invalid signature\n"); + efi_dxe_table = NULL; + } } + /* grab the memory attributes protocol if it exists */ + efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr); + status = efi_setup_5level_paging(); if (status != EFI_SUCCESS) { efi_err("efi_setup_5level_paging() failed!\n"); -- GitLab From fff7614f576f802fb0f4ff169cb251c180ce377e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:50 +0100 Subject: [PATCH 0660/2290] x86/efistub: Perform SNP feature test while running in the firmware From: Ard Biesheuvel [ Commit 31c77a50992e8dd136feed7b67073bb5f1f978cc upstream ] Before refactoring the EFI stub boot flow to avoid the legacy bare metal decompressor, duplicate the SNP feature check in the EFI stub before handing over to the kernel proper. The SNP feature check can be performed while running under the EFI boot services, which means it can force the boot to fail gracefully and return an error to the bootloader if the loaded kernel does not implement support for all the features that the hypervisor enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-23-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/sev.c | 112 ++++++++++++++---------- arch/x86/include/asm/sev.h | 5 ++ drivers/firmware/efi/libstub/x86-stub.c | 17 ++++ 3 files changed, 88 insertions(+), 46 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 9c91cc40f4565..8b21c57bc4700 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -327,20 +327,25 @@ static void enforce_vmpl0(void) */ #define SNP_FEATURES_PRESENT (0) +u64 snp_get_unsupported_features(u64 status) +{ + if (!(status & MSR_AMD64_SEV_SNP_ENABLED)) + return 0; + + return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; +} + void snp_check_features(void) { u64 unsupported; - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) - return; - /* * Terminate the boot if hypervisor has enabled any feature lacking * guest side implementation. Pass on the unsupported features mask through * EXIT_INFO_2 of the GHCB protocol so that those features can be reported * as part of the guest boot failure. */ - unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; + unsupported = snp_get_unsupported_features(sev_status); if (unsupported) { if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); @@ -350,35 +355,22 @@ void snp_check_features(void) } } -void sev_enable(struct boot_params *bp) +/* + * sev_check_cpu_support - Check for SEV support in the CPU capabilities + * + * Returns < 0 if SEV is not supported, otherwise the position of the + * encryption bit in the page table descriptors. + */ +static int sev_check_cpu_support(void) { unsigned int eax, ebx, ecx, edx; - struct msr m; - bool snp; - - /* - * bp->cc_blob_address should only be set by boot/compressed kernel. - * Initialize it to 0 to ensure that uninitialized values from - * buggy bootloaders aren't propagated. - */ - if (bp) - bp->cc_blob_address = 0; - - /* - * Do an initial SEV capability check before snp_init() which - * loads the CPUID page and the same checks afterwards are done - * without the hypervisor and are trustworthy. - * - * If the HV fakes SEV support, the guest will crash'n'burn - * which is good enough. - */ /* Check for the SME/SEV support leaf */ eax = 0x80000000; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); if (eax < 0x8000001f) - return; + return -ENODEV; /* * Check for the SME/SEV feature: @@ -393,6 +385,35 @@ void sev_enable(struct boot_params *bp) native_cpuid(&eax, &ebx, &ecx, &edx); /* Check whether SEV is supported */ if (!(eax & BIT(1))) + return -ENODEV; + + return ebx & 0x3f; +} + +void sev_enable(struct boot_params *bp) +{ + struct msr m; + int bitpos; + bool snp; + + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + + /* + * Do an initial SEV capability check before snp_init() which + * loads the CPUID page and the same checks afterwards are done + * without the hypervisor and are trustworthy. + * + * If the HV fakes SEV support, the guest will crash'n'burn + * which is good enough. + */ + + if (sev_check_cpu_support() < 0) return; /* @@ -403,26 +424,8 @@ void sev_enable(struct boot_params *bp) /* Now repeat the checks with the SNP CPUID table. */ - /* Recheck the SME/SEV support leaf */ - eax = 0x80000000; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - if (eax < 0x8000001f) - return; - - /* - * Recheck for the SME/SEV feature: - * CPUID Fn8000_001F[EAX] - * - Bit 0 - Secure Memory Encryption support - * - Bit 1 - Secure Encrypted Virtualization support - * CPUID Fn8000_001F[EBX] - * - Bits 5:0 - Pagetable bit position used to indicate encryption - */ - eax = 0x8000001f; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - /* Check whether SEV is supported */ - if (!(eax & BIT(1))) { + bitpos = sev_check_cpu_support(); + if (bitpos < 0) { if (snp) error("SEV-SNP support indicated by CC blob, but not CPUID."); return; @@ -454,7 +457,24 @@ void sev_enable(struct boot_params *bp) if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); - sme_me_mask = BIT_ULL(ebx & 0x3f); + sme_me_mask = BIT_ULL(bitpos); +} + +/* + * sev_get_status - Retrieve the SEV status mask + * + * Returns 0 if the CPU is not SEV capable, otherwise the value of the + * AMD64_SEV MSR. + */ +u64 sev_get_status(void) +{ + struct msr m; + + if (sev_check_cpu_support() < 0) + return 0; + + boot_rdmsr(MSR_AMD64_SEV, &m); + return m.q; } /* Search for Confidential Computing blob in the EFI config table. */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 7ca5c9ec8b52e..e231638ba19a4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -202,6 +202,8 @@ void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +u64 snp_get_unsupported_features(u64 status); +u64 sev_get_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -225,6 +227,9 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } + +static inline u64 snp_get_unsupported_features(u64 status) { return 0; } +static inline u64 sev_get_status(void) { return 0; } #endif #endif diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 01af018b93158..8d3ce383bcbb9 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "efistub.h" #include "x86-stub.h" @@ -747,6 +748,19 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return EFI_SUCCESS; } +static bool have_unsupported_snp_features(void) +{ + u64 unsupported; + + unsupported = snp_get_unsupported_features(sev_get_status()); + if (unsupported) { + efi_err("Unsupported SEV-SNP features detected: 0x%llx\n", + unsupported); + return true; + } + return false; +} + static void __noreturn enter_kernel(unsigned long kernel_addr, struct boot_params *boot_params) { @@ -777,6 +791,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) efi_exit(handle, EFI_INVALID_PARAMETER); + if (have_unsupported_snp_features()) + efi_exit(handle, EFI_UNSUPPORTED); + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); if (efi_dxe_table && -- GitLab From 2dfaeac3f38e4e550d215204eedd97a061fdc118 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:51 +0100 Subject: [PATCH 0661/2290] x86/efistub: Avoid legacy decompressor when doing EFI boot From: Ard Biesheuvel [ Commit a1b87d54f4e45ff5e0d081fb1d9db3bf1a8fb39a upstream ] The bare metal decompressor code was never really intended to run in a hosted environment such as the EFI boot services, and does a few things that are becoming problematic in the context of EFI boot now that the logo requirements are getting tighter: EFI executables will no longer be allowed to consist of a single executable section that is mapped with read, write and execute permissions if they are intended for use in a context where Secure Boot is enabled (and where Microsoft's set of certificates is used, i.e., every x86 PC built to run Windows). To avoid stepping on reserved memory before having inspected the E820 tables, and to ensure the correct placement when running a kernel build that is non-relocatable, the bare metal decompressor moves its own executable image to the end of the allocation that was reserved for it, in order to perform the decompression in place. This means the region in question requires both write and execute permissions, which either need to be given upfront (which EFI will no longer permit), or need to be applied on demand using the existing page fault handling framework. However, the physical placement of the kernel is usually randomized anyway, and even if it isn't, a dedicated decompression output buffer can be allocated anywhere in memory using EFI APIs when still running in the boot services, given that EFI support already implies a relocatable kernel. This means that decompression in place is never necessary, nor is moving the compressed image from one end to the other. Since EFI already maps all of memory 1:1, it is also unnecessary to create new page tables or handle page faults when decompressing the kernel. That means there is also no need to replace the special exception handlers for SEV. Generally, there is little need to do any of the things that the decompressor does beyond - initialize SEV encryption, if needed, - perform the 4/5 level paging switch, if needed, - decompress the kernel - relocate the kernel So do all of this from the EFI stub code, and avoid the bare metal decompressor altogether. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230807162720.545787-24-ardb@kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 5 + arch/x86/boot/compressed/efi_mixed.S | 55 -------- arch/x86/boot/compressed/head_32.S | 13 -- arch/x86/boot/compressed/head_64.S | 27 ---- arch/x86/include/asm/efi.h | 7 +- arch/x86/include/asm/sev.h | 2 + drivers/firmware/efi/libstub/x86-stub.c | 166 ++++++++++-------------- 7 files changed, 84 insertions(+), 191 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 0c9ebf74fac59..3965b2c9efee0 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) endif +ifeq ($(CONFIG_EFI_STUB),y) +# ensure that the static EFI stub library will be pulled in, even if it is +# never referenced explicitly from the startup code +LDFLAGS_vmlinux += -u efi_pe_entry +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index d6d1b76b594d9..8232c5b2a9bf5 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -275,10 +275,6 @@ SYM_FUNC_START_LOCAL(efi32_entry) jmp startup_32 SYM_FUNC_END(efi32_entry) -#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) -#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - /* * efi_status_t efi32_pe_entry(efi_handle_t image_handle, * efi_system_table_32_t *sys_table) @@ -286,8 +282,6 @@ SYM_FUNC_END(efi32_entry) SYM_FUNC_START(efi32_pe_entry) pushl %ebp movl %esp, %ebp - pushl %eax // dummy push to allocate loaded_image - pushl %ebx // save callee-save registers pushl %edi @@ -296,48 +290,8 @@ SYM_FUNC_START(efi32_pe_entry) movl $0x80000003, %eax // EFI_UNSUPPORTED jnz 2f - call 1f -1: pop %ebx - - /* Get the loaded image protocol pointer from the image handle */ - leal -4(%ebp), %eax - pushl %eax // &loaded_image - leal (loaded_image_proto - 1b)(%ebx), %eax - pushl %eax // pass the GUID address - pushl 8(%ebp) // pass the image handle - - /* - * Note the alignment of the stack frame. - * sys_table - * handle <-- 16-byte aligned on entry by ABI - * return address - * frame pointer - * loaded_image <-- local variable - * saved %ebx <-- 16-byte aligned here - * saved %edi - * &loaded_image - * &loaded_image_proto - * handle <-- 16-byte aligned for call to handle_protocol - */ - - movl 12(%ebp), %eax // sys_table - movl ST32_boottime(%eax), %eax // sys_table->boottime - call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol - addl $12, %esp // restore argument space - testl %eax, %eax - jnz 2f - movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - movl -4(%ebp), %esi // loaded_image - movl LI32_image_base(%esi), %esi // loaded_image->image_base - leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 - /* - * We need to set the image_offset variable here since startup_32() will - * use it before we get to the 64-bit efi_pe_entry() in C code. - */ - subl %esi, %ebp // calculate image_offset - movl %ebp, (image_offset - 1b)(%ebx) // save image_offset xorl %esi, %esi jmp efi32_entry // pass %ecx, %edx, %esi // no other registers remain live @@ -356,15 +310,6 @@ SYM_FUNC_START_NOALIGN(efi64_stub_entry) SYM_FUNC_END(efi64_stub_entry) #endif - .section ".rodata" - /* EFI loaded image protocol GUID */ - .balign 4 -SYM_DATA_START_LOCAL(loaded_image_proto) - .long 0x5b1b31a1 - .word 0x9562, 0x11d2 - .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b -SYM_DATA_END(loaded_image_proto) - .data .balign 8 SYM_DATA_START_LOCAL(efi32_boot_gdt) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3af4a383615b3..1cfe9802a42fe 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE leal startup_32@GOTOFF(%edx), %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32() will be at an - * offset to the start of the space allocated for the image. efi_pe_entry() will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl image_offset@GOTOFF(%edx), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 9a0d83b4d266d..0d7aef10b19ad 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -138,19 +138,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl rva(image_offset)(%ebp), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -327,20 +314,6 @@ SYM_CODE_START(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - movl image_offset(%rip), %eax - subq %rax, %rbp -#endif - movl BP_kernel_alignment(%rsi), %eax decl %eax addq %rax, %rbp diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 522ff2e443b37..e601264b1a243 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -88,6 +88,8 @@ static inline void efi_fpu_end(void) } #ifdef CONFIG_X86_32 +#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) + #define arch_efi_call_virt_setup() \ ({ \ efi_fpu_begin(); \ @@ -101,8 +103,7 @@ static inline void efi_fpu_end(void) }) #else /* !CONFIG_X86_32 */ - -#define EFI_LOADER_SIGNATURE "EL64" +#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT extern asmlinkage u64 __efi_call(void *fp, ...); @@ -214,6 +215,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, #ifdef CONFIG_EFI_MIXED +#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) + #define ARCH_HAS_EFISTUB_WRAPPERS static inline bool efi_is_64bit(void) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index e231638ba19a4..cf98fc28601fb 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -157,6 +157,7 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern void sev_enable(struct boot_params *bp); static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { @@ -210,6 +211,7 @@ static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 8d3ce383bcbb9..61017921f9ca9 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -15,17 +15,14 @@ #include #include #include +#include #include #include "efistub.h" #include "x86-stub.h" -/* Maximum physical address for 64-bit kernel with 4-level paging */ -#define MAXMEM_X86_64_4LEVEL (1ull << 46) - const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; -u32 image_offset __section(".data"); static efi_loaded_image_t *image = NULL; static efi_memory_attribute_protocol_t *memattr; @@ -276,33 +273,9 @@ void efi_adjust_memory_range_protection(unsigned long start, } } -extern const u8 startup_32[], startup_64[]; - -static void -setup_memory_protection(unsigned long image_base, unsigned long image_size) -{ -#ifdef CONFIG_64BIT - if (image_base != (unsigned long)startup_32) - efi_adjust_memory_range_protection(image_base, image_size); -#else - /* - * Clear protection flags on a whole range of possible - * addresses used for KASLR. We don't need to do that - * on x86_64, since KASLR/extraction is performed after - * dedicated identity page tables are built and we only - * need to remove possible protection on relocated image - * itself disregarding further relocations. - */ - efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); -#endif -} - static const efi_char16_t apple[] = L"Apple"; -static void setup_quirks(struct boot_params *boot_params, - unsigned long image_base, - unsigned long image_size) +static void setup_quirks(struct boot_params *boot_params) { efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) efi_table_attr(efi_system_table, fw_vendor); @@ -311,9 +284,6 @@ static void setup_quirks(struct boot_params *boot_params, if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) retrieve_apple_device_properties(boot_params); } - - if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) - setup_memory_protection(image_base, image_size); } /* @@ -466,7 +436,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, } image_base = efi_table_attr(image, image_base); - image_offset = (void *)startup_32 - image_base; status = efi_allocate_pages(sizeof(struct boot_params), (unsigned long *)&boot_params, ULONG_MAX); @@ -761,6 +730,61 @@ static bool have_unsupported_snp_features(void) return false; } +static void efi_get_seed(void *seed, int size) +{ + efi_get_random_bytes(size, seed); + + /* + * This only updates seed[0] when running on 32-bit, but in that case, + * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit. + */ + *(unsigned long *)seed ^= kaslr_get_random_long("EFI"); +} + +static void error(char *str) +{ + efi_warn("Decompression failed: %s\n", str); +} + +static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) +{ + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + unsigned long addr, alloc_size, entry; + efi_status_t status; + u32 seed[2] = {}; + + /* determine the required size of the allocation */ + alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size), + MIN_KERNEL_ALIGN); + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { + u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + + efi_get_seed(seed, sizeof(seed)); + + virt_addr += (range * seed[1]) >> 32; + virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + } + + status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, + seed[0], EFI_LOADER_CODE, + EFI_X86_KERNEL_ALLOC_LIMIT); + if (status != EFI_SUCCESS) + return status; + + entry = decompress_kernel((void *)addr, virt_addr, error); + if (entry == ULONG_MAX) { + efi_free(alloc_size, addr); + return EFI_LOAD_ERROR; + } + + *kernel_entry = addr + entry; + + efi_adjust_memory_range_protection(addr, kernel_total_size); + + return EFI_SUCCESS; +} + static void __noreturn enter_kernel(unsigned long kernel_addr, struct boot_params *boot_params) { @@ -780,10 +804,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, struct boot_params *boot_params) { efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; - unsigned long bzimage_addr = (unsigned long)startup_32; - unsigned long buffer_start, buffer_end; struct setup_header *hdr = &boot_params->hdr; const struct linux_efi_initrd *initrd = NULL; + unsigned long kernel_entry; efi_status_t status; efi_system_table = sys_table_arg; @@ -812,60 +835,6 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } - /* - * If the kernel isn't already loaded at a suitable address, - * relocate it. - * - * It must be loaded above LOAD_PHYSICAL_ADDR. - * - * The maximum address for 64-bit is 1 << 46 for 4-level paging. This - * is defined as the macro MAXMEM, but unfortunately that is not a - * compile-time constant if 5-level paging is configured, so we instead - * define our own macro for use here. - * - * For 32-bit, the maximum address is complicated to figure out, for - * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what - * KASLR uses. - * - * Also relocate it if image_offset is zero, i.e. the kernel wasn't - * loaded by LoadImage, but rather by a bootloader that called the - * handover entry. The reason we must always relocate in this case is - * to handle the case of systemd-boot booting a unified kernel image, - * which is a PE executable that contains the bzImage and an initrd as - * COFF sections. The initrd section is placed after the bzImage - * without ensuring that there are at least init_size bytes available - * for the bzImage, and thus the compressed kernel's startup code may - * overwrite the initrd unless it is moved out of the way. - */ - - buffer_start = ALIGN(bzimage_addr - image_offset, - hdr->kernel_alignment); - buffer_end = buffer_start + hdr->init_size; - - if ((buffer_start < LOAD_PHYSICAL_ADDR) || - (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || - (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || - (image_offset == 0)) { - extern char _bss[]; - - status = efi_relocate_kernel(&bzimage_addr, - (unsigned long)_bss - bzimage_addr, - hdr->init_size, - hdr->pref_address, - hdr->kernel_alignment, - LOAD_PHYSICAL_ADDR); - if (status != EFI_SUCCESS) { - efi_err("efi_relocate_kernel() failed!\n"); - goto fail; - } - /* - * Now that we've copied the kernel elsewhere, we no longer - * have a set up block before startup_32(), so reset image_offset - * to zero in case it was set earlier. - */ - image_offset = 0; - } - #ifdef CONFIG_CMDLINE_BOOL status = efi_parse_options(CONFIG_CMDLINE); if (status != EFI_SUCCESS) { @@ -883,6 +852,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle, } } + status = efi_decompress_kernel(&kernel_entry); + if (status != EFI_SUCCESS) { + efi_err("Failed to decompress kernel\n"); + goto fail; + } + /* * At this point, an initrd may already have been loaded by the * bootloader and passed via bootparams. We permit an initrd loaded @@ -922,7 +897,7 @@ void __noreturn efi_stub_entry(efi_handle_t handle, setup_efi_pci(boot_params); - setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); + setup_quirks(boot_params); status = exit_boot(boot_params, handle); if (status != EFI_SUCCESS) { @@ -930,12 +905,15 @@ void __noreturn efi_stub_entry(efi_handle_t handle, goto fail; } - efi_5level_switch(); + /* + * Call the SEV init code while still running with the firmware's + * GDT/IDT, so #VC exceptions will be handled by EFI. + */ + sev_enable(boot_params); - if (IS_ENABLED(CONFIG_X86_64)) - bzimage_addr += startup_64 - startup_32; + efi_5level_switch(); - enter_kernel(bzimage_addr, boot_params); + enter_kernel(kernel_entry, boot_params); fail: efi_err("efi_stub_entry() failed!\n"); -- GitLab From 1b54062576792b41f0acb8d562deea7c4c718c33 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:52 +0100 Subject: [PATCH 0662/2290] efi/x86: Avoid physical KASLR on older Dell systems From: Ard Biesheuvel [ Commit 50d7cdf7a9b1ab6f4f74a69c84e974d5dc0c1bf1 upstream ] River reports boot hangs with v6.6 and v6.7, and the bisect points to commit a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") which moves the memory allocation and kernel decompression from the legacy decompressor (which executes *after* ExitBootServices()) to the EFI stub, using boot services for allocating the memory. The memory allocation succeeds but the subsequent call to decompress_kernel() never returns, resulting in a failed boot and a hanging system. As it turns out, this issue only occurs when physical address randomization (KASLR) is enabled, and given that this is a feature we can live without (virtual KASLR is much more important), let's disable the physical part of KASLR when booting on AMI UEFI firmware claiming to implement revision v2.0 of the specification (which was released in 2006), as this is the version these systems advertise. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218173 Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 31 +++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 61017921f9ca9..47ebc85c0d22e 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -273,17 +273,20 @@ void efi_adjust_memory_range_protection(unsigned long start, } } +static efi_char16_t *efistub_fw_vendor(void) +{ + unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor); + + return (efi_char16_t *)vendor; +} + static const efi_char16_t apple[] = L"Apple"; static void setup_quirks(struct boot_params *boot_params) { - efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor); - - if (!memcmp(fw_vendor, apple, sizeof(apple))) { - if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) - retrieve_apple_device_properties(boot_params); - } + if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && + !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) + retrieve_apple_device_properties(boot_params); } /* @@ -759,11 +762,25 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + static const efi_char16_t ami[] = L"American Megatrends"; efi_get_seed(seed, sizeof(seed)); virt_addr += (range * seed[1]) >> 32; virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + + /* + * Older Dell systems with AMI UEFI firmware v2.0 may hang + * while decompressing the kernel if physical address + * randomization is enabled. + * + * https://bugzilla.kernel.org/show_bug.cgi?id=218173 + */ + if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION && + !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { + efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); + seed[0] = 0; + } } status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, -- GitLab From 86c909d2275b91fb34be07b081c7343a0c2351f2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:53 +0100 Subject: [PATCH 0663/2290] x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR From: Ard Biesheuvel [ Commit 2f77465b05b1270c832b5e2ee27037672ad2a10a upstream ] The EFI stub's kernel placement logic randomizes the physical placement of the kernel by taking all available memory into account, and picking a region at random, based on a random seed. When KASLR is disabled, this seed is set to 0x0, and this results in the lowest available region of memory to be selected for loading the kernel, even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is typically reserved for the GFP_DMA region, to accommodate masters that can only access the first 16 MiB of system memory. Even if such devices are rare these days, we may still end up with a warning in the kernel log, as reported by Tom: swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0 Fix this by tweaking the random allocation logic to accept a low bound on the placement, and set it to LOAD_PHYSICAL_ADDR. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Reported-by: Tom Englund Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404 Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- drivers/firmware/efi/libstub/efistub.h | 3 ++- drivers/firmware/efi/libstub/randomalloc.c | 12 +++++++----- drivers/firmware/efi/libstub/x86-stub.c | 1 + 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 16377b4521190..16f15e36f9a7d 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -181,7 +181,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, */ status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed, - EFI_LOADER_CODE, EFI_ALLOC_LIMIT); + EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 4b4055877f3d3..6741f3d900c5a 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -906,7 +906,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type, unsigned long alloc_limit); + int memory_type, unsigned long alloc_min, + unsigned long alloc_max); efi_status_t efi_random_get_seed(void); diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index ed6f6087a9eac..7ba05719a53ba 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -17,7 +17,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, unsigned long size, unsigned long align_shift, - u64 alloc_limit) + u64 alloc_min, u64 alloc_max) { unsigned long align = 1UL << align_shift; u64 first_slot, last_slot, region_end; @@ -30,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - alloc_limit); + alloc_max); if (region_end < size) return 0; - first_slot = round_up(md->phys_addr, align); + first_slot = round_up(max(md->phys_addr, alloc_min), align); last_slot = round_down(region_end - size + 1, align); if (first_slot > last_slot) @@ -56,7 +56,8 @@ efi_status_t efi_random_alloc(unsigned long size, unsigned long *addr, unsigned long random_seed, int memory_type, - unsigned long alloc_limit) + unsigned long alloc_min, + unsigned long alloc_max) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -78,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size, efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; - slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit); + slots = get_entry_num_slots(md, size, ilog2(align), alloc_min, + alloc_max); MD_NUM_SLOTS(md) = slots; total_slots += slots; if (md->attribute & EFI_MEMORY_MORE_RELIABLE) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 47ebc85c0d22e..c1dcc86fcc3d2 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -785,6 +785,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, seed[0], EFI_LOADER_CODE, + LOAD_PHYSICAL_ADDR, EFI_X86_KERNEL_ALLOC_LIMIT); if (status != EFI_SUCCESS) return status; -- GitLab From 8f05493706ff8296d26b449db295b1dbb1de31dd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:54 +0100 Subject: [PATCH 0664/2290] x86/boot: Rename conflicting 'boot_params' pointer to 'boot_params_ptr' From: Ard Biesheuvel [ Commit b9e909f78e7e4b826f318cfe7bedf3ce229920e6 upstream ] The x86 decompressor is built and linked as a separate executable, but it shares components with the kernel proper, which are either #include'd as C files, or linked into the decompresor as a static library (e.g, the EFI stub) Both the kernel itself and the decompressor define a global symbol 'boot_params' to refer to the boot_params struct, but in the former case, it refers to the struct directly, whereas in the decompressor, it refers to a global pointer variable referring to the struct boot_params passed by the bootloader or constructed from scratch. This ambiguity is unfortunate, and makes it impossible to assign this decompressor variable from the x86 EFI stub, given that declaring it as extern results in a clash. So rename the decompressor version (whose scope is limited) to boot_params_ptr. [ mingo: Renamed 'boot_params_p' to 'boot_params_ptr' for clarity ] Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/acpi.c | 14 ++++++------- arch/x86/boot/compressed/cmdline.c | 4 ++-- arch/x86/boot/compressed/ident_map_64.c | 7 ++++--- arch/x86/boot/compressed/kaslr.c | 26 ++++++++++++------------- arch/x86/boot/compressed/misc.c | 24 +++++++++++------------ arch/x86/boot/compressed/misc.h | 1 - arch/x86/boot/compressed/pgtable_64.c | 9 ++++----- arch/x86/boot/compressed/sev.c | 2 +- arch/x86/include/asm/boot.h | 2 ++ 9 files changed, 45 insertions(+), 44 deletions(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 9caf89063e775..55c98fdd67d2b 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -30,13 +30,13 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len) * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to * ACPI_TABLE_GUID because it has more features. */ - rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, ACPI_20_TABLE_GUID); if (rsdp_addr) return (acpi_physical_address)rsdp_addr; /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */ - rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, ACPI_TABLE_GUID); if (rsdp_addr) return (acpi_physical_address)rsdp_addr; @@ -56,15 +56,15 @@ static acpi_physical_address efi_get_rsdp_addr(void) enum efi_type et; int ret; - et = efi_get_type(boot_params); + et = efi_get_type(boot_params_ptr); if (et == EFI_TYPE_NONE) return 0; - systab_pa = efi_get_system_table(boot_params); + systab_pa = efi_get_system_table(boot_params_ptr); if (!systab_pa) error("EFI support advertised, but unable to locate system table."); - ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len); + ret = efi_get_conf_table(boot_params_ptr, &cfg_tbl_pa, &cfg_tbl_len); if (ret || !cfg_tbl_pa) error("EFI config table not found."); @@ -156,7 +156,7 @@ acpi_physical_address get_rsdp_addr(void) { acpi_physical_address pa; - pa = boot_params->acpi_rsdp_addr; + pa = boot_params_ptr->acpi_rsdp_addr; if (!pa) pa = efi_get_rsdp_addr(); @@ -210,7 +210,7 @@ static unsigned long get_acpi_srat_table(void) rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp(); if (!rsdp) rsdp = (struct acpi_table_rsdp *)(long) - boot_params->acpi_rsdp_addr; + boot_params_ptr->acpi_rsdp_addr; if (!rsdp) return 0; diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index f1add5d85da9d..c1bb180973ea2 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -14,9 +14,9 @@ static inline char rdfs8(addr_t addr) #include "../cmdline.c" unsigned long get_cmd_line_ptr(void) { - unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; + unsigned long cmd_line_ptr = boot_params_ptr->hdr.cmd_line_ptr; - cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; + cmd_line_ptr |= (u64)boot_params_ptr->ext_cmd_line_ptr << 32; return cmd_line_ptr; } diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index d34222816c9f5..b8c42339bc355 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -167,8 +167,9 @@ void initialize_identity_maps(void *rmode) * or does not touch all the pages covering them. */ kernel_add_identity_map((unsigned long)_head, (unsigned long)_end); - boot_params = rmode; - kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1)); + boot_params_ptr = rmode; + kernel_add_identity_map((unsigned long)boot_params_ptr, + (unsigned long)(boot_params_ptr + 1)); cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); @@ -176,7 +177,7 @@ void initialize_identity_maps(void *rmode) * Also map the setup_data entries passed via boot_params in case they * need to be accessed by uncompressed kernel via the identity mapping. */ - sd = (struct setup_data *)boot_params->hdr.setup_data; + sd = (struct setup_data *)boot_params_ptr->hdr.setup_data; while (sd) { unsigned long sd_addr = (unsigned long)sd; diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index e476bcbd9b422..9794d9174795d 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -63,7 +63,7 @@ static unsigned long get_boot_seed(void) unsigned long hash = 0; hash = rotate_xor(hash, build_str, sizeof(build_str)); - hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); + hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr)); return hash; } @@ -383,7 +383,7 @@ static void handle_mem_options(void) static void mem_avoid_init(unsigned long input, unsigned long input_size, unsigned long output) { - unsigned long init_size = boot_params->hdr.init_size; + unsigned long init_size = boot_params_ptr->hdr.init_size; u64 initrd_start, initrd_size; unsigned long cmd_line, cmd_line_size; @@ -395,10 +395,10 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; /* Avoid initrd. */ - initrd_start = (u64)boot_params->ext_ramdisk_image << 32; - initrd_start |= boot_params->hdr.ramdisk_image; - initrd_size = (u64)boot_params->ext_ramdisk_size << 32; - initrd_size |= boot_params->hdr.ramdisk_size; + initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32; + initrd_start |= boot_params_ptr->hdr.ramdisk_image; + initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32; + initrd_size |= boot_params_ptr->hdr.ramdisk_size; mem_avoid[MEM_AVOID_INITRD].start = initrd_start; mem_avoid[MEM_AVOID_INITRD].size = initrd_size; /* No need to set mapping for initrd, it will be handled in VO. */ @@ -413,8 +413,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, } /* Avoid boot parameters. */ - mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; - mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); + mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr; + mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr); /* We don't need to set a mapping for setup_data. */ @@ -447,7 +447,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, } /* Avoid all entries in the setup_data linked list. */ - ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; + ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data; while (ptr) { struct mem_vector avoid; @@ -679,7 +679,7 @@ static bool process_mem_region(struct mem_vector *region, static bool process_efi_entries(unsigned long minimum, unsigned long image_size) { - struct efi_info *e = &boot_params->efi_info; + struct efi_info *e = &boot_params_ptr->efi_info; bool efi_mirror_found = false; struct mem_vector region; efi_memory_desc_t *md; @@ -761,8 +761,8 @@ static void process_e820_entries(unsigned long minimum, struct boot_e820_entry *entry; /* Verify potential e820 positions, appending to slots list. */ - for (i = 0; i < boot_params->e820_entries; i++) { - entry = &boot_params->e820_table[i]; + for (i = 0; i < boot_params_ptr->e820_entries; i++) { + entry = &boot_params_ptr->e820_table[i]; /* Skip non-RAM entries. */ if (entry->type != E820_TYPE_RAM) continue; @@ -836,7 +836,7 @@ void choose_random_location(unsigned long input, return; } - boot_params->hdr.loadflags |= KASLR_FLAG; + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; if (IS_ENABLED(CONFIG_X86_32)) mem_limit = KERNEL_IMAGE_SIZE; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index fb55ac18af6fa..8ae7893d712ff 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -46,7 +46,7 @@ void *memmove(void *dest, const void *src, size_t n); /* * This is set up by the setup-routine at boot-time */ -struct boot_params *boot_params; +struct boot_params *boot_params_ptr; struct port_io_ops pio_ops; @@ -132,8 +132,8 @@ void __putstr(const char *s) if (lines == 0 || cols == 0) return; - x = boot_params->screen_info.orig_x; - y = boot_params->screen_info.orig_y; + x = boot_params_ptr->screen_info.orig_x; + y = boot_params_ptr->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -154,8 +154,8 @@ void __putstr(const char *s) } } - boot_params->screen_info.orig_x = x; - boot_params->screen_info.orig_y = y; + boot_params_ptr->screen_info.orig_x = x; + boot_params_ptr->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -382,14 +382,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) size_t entry_offset; /* Retain x86 boot parameters pointer passed from startup_32/64. */ - boot_params = rmode; + boot_params_ptr = rmode; /* Clear flags intended for solely in-kernel use. */ - boot_params->hdr.loadflags &= ~KASLR_FLAG; + boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG; - sanitize_boot_params(boot_params); + sanitize_boot_params(boot_params_ptr); - if (boot_params->screen_info.orig_video_mode == 7) { + if (boot_params_ptr->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -397,8 +397,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) vidport = 0x3d4; } - lines = boot_params->screen_info.orig_video_lines; - cols = boot_params->screen_info.orig_video_cols; + lines = boot_params_ptr->screen_info.orig_video_lines; + cols = boot_params_ptr->screen_info.orig_video_cols; init_default_io_ops(); @@ -417,7 +417,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) * so that early debugging output from the RSDP parsing code can be * collected. */ - boot_params->acpi_rsdp_addr = get_rsdp_addr(); + boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr(); debug_putstr("early console in extract_kernel\n"); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index b6e46435b90b8..254acd76efde2 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -52,7 +52,6 @@ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; void *malloc(int size); void free(void *where); -extern struct boot_params *boot_params; void __putstr(const char *s); void __puthex(unsigned long value); #define error_putstr(__x) __putstr(__x) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 7939eb6e6ce9b..51f957b24ba7a 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -28,7 +28,6 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; */ unsigned long *trampoline_32bit __section(".data"); -extern struct boot_params *boot_params; int cmdline_find_option_bool(const char *option); static unsigned long find_trampoline_placement(void) @@ -49,7 +48,7 @@ static unsigned long find_trampoline_placement(void) * * Only look for values in the legacy ROM for non-EFI system. */ - signature = (char *)&boot_params->efi_info.efi_loader_signature; + signature = (char *)&boot_params_ptr->efi_info.efi_loader_signature; if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) { ebda_start = *(unsigned short *)0x40e << 4; @@ -65,10 +64,10 @@ static unsigned long find_trampoline_placement(void) bios_start = round_down(bios_start, PAGE_SIZE); /* Find the first usable memory region under bios_start. */ - for (i = boot_params->e820_entries - 1; i >= 0; i--) { + for (i = boot_params_ptr->e820_entries - 1; i >= 0; i--) { unsigned long new = bios_start; - entry = &boot_params->e820_table[i]; + entry = &boot_params_ptr->e820_table[i]; /* Skip all entries above bios_start. */ if (bios_start <= entry->addr) @@ -107,7 +106,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ - boot_params = bp; + boot_params_ptr = bp; /* * Check if LA57 is desired and supported. diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 8b21c57bc4700..d07e665bb265b 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -565,7 +565,7 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) * accessed after switchover. */ if (sev_snp_enabled()) { - unsigned long cc_info_pa = boot_params->cc_blob_address; + unsigned long cc_info_pa = boot_params_ptr->cc_blob_address; struct cc_blob_sev_info *cc_info; kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info)); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index b3a7cfb0d99e0..a38cc0afc90a0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -85,6 +85,8 @@ extern const unsigned long kernel_total_size; unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, void (*error)(char *x)); + +extern struct boot_params *boot_params_ptr; #endif #endif /* _ASM_X86_BOOT_H */ -- GitLab From 3a396c409a39ce701533f3f55f3db0ab700aaeae Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:55 +0100 Subject: [PATCH 0665/2290] x86/boot: efistub: Assign global boot_params variable From: Ard Biesheuvel [ Commit 50dcc2e0d62e3c4a54f39673c4dc3dcde7c74d52 upstream ] Now that the x86 EFI stub calls into some APIs exposed by the decompressor (e.g., kaslr_get_random_long()), it is necessary to ensure that the global boot_params variable is set correctly before doing so. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index c1dcc86fcc3d2..b183b40195eee 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -827,6 +827,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle, unsigned long kernel_entry; efi_status_t status; + boot_params_ptr = boot_params; + efi_system_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) -- GitLab From 2402392bed4e440e05442fb1de4ef97536ff5a96 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Mar 2024 12:19:56 +0100 Subject: [PATCH 0666/2290] efi/x86: Fix the missing KASLR_FLAG bit in boot_params->hdr.loadflags From: Yuntao Wang [ Commit 01638431c465741e071ab34acf3bef3c2570f878 upstream ] When KASLR is enabled, the KASLR_FLAG bit in boot_params->hdr.loadflags should be set to 1 to propagate KASLR status from compressed kernel to kernel, just as the choose_random_location() function does. Currently, when the kernel is booted via the EFI stub, the KASLR_FLAG bit in boot_params->hdr.loadflags is not set, even though it should be. This causes some functions, such as kernel_randomize_memory(), not to execute as expected. Fix it. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Signed-off-by: Yuntao Wang [ardb: drop 'else' branch clearing KASLR_FLAG] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index b183b40195eee..a0757a37b482b 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -781,6 +781,8 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); seed[0] = 0; } + + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; } status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, -- GitLab From c4c795b21dd23d9514ae1c6646c3fb2c78b5be60 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 Feb 2024 09:46:57 -0800 Subject: [PATCH 0667/2290] af_unix: Drop oob_skb ref before purging queue in GC. commit aa82ac51d63328714645c827775d64dbfd9941f3 upstream. syzbot reported another task hung in __unix_gc(). [0] The current while loop assumes that all of the left candidates have oob_skb and calling kfree_skb(oob_skb) releases the remaining candidates. However, I missed a case that oob_skb has self-referencing fd and another fd and the latter sk is placed before the former in the candidate list. Then, the while loop never proceeds, resulting the task hung. __unix_gc() has the same loop just before purging the collected skb, so we can call kfree_skb(oob_skb) there and let __skb_queue_purge() release all inflight sockets. [0]: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 2784 Comm: kworker/u4:8 Not tainted 6.8.0-rc4-syzkaller-01028-g71b605d32017 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Workqueue: events_unbound __unix_gc RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x70 kernel/kcov.c:200 Code: 89 fb e8 23 00 00 00 48 8b 3d 84 f5 1a 0c 48 89 de 5b e9 43 26 57 00 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1e fa 48 8b 04 24 65 48 8b 0d 90 52 70 7e 65 8b 15 91 52 70 RSP: 0018:ffffc9000a17fa78 EFLAGS: 00000287 RAX: ffffffff8a0a6108 RBX: ffff88802b6c2640 RCX: ffff88802c0b3b80 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffffc9000a17fbf0 R08: ffffffff89383f1d R09: 1ffff1100ee5ff84 R10: dffffc0000000000 R11: ffffed100ee5ff85 R12: 1ffff110056d84ee R13: ffffc9000a17fae0 R14: 0000000000000000 R15: ffffffff8f47b840 FS: 0000000000000000(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffef5687ff8 CR3: 0000000029b34000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __unix_gc+0xe69/0xf40 net/unix/garbage.c:343 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x913/0x1420 kernel/workqueue.c:2706 worker_thread+0xa5f/0x1000 kernel/workqueue.c:2787 kthread+0x2ef/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Reported-and-tested-by: syzbot+ecab4d36f920c3574bf9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ecab4d36f920c3574bf9 Fixes: 25236c91b5ab ("af_unix: Fix task hung while purging oob_skb in GC.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/garbage.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 9e1bab97c05ba..ab2c83d58b62a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -284,9 +284,17 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) + list_for_each_entry(u, &gc_candidates, link) { scan_children(&u->sk, inc_inflight, &hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif + } + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -314,18 +322,6 @@ void unix_gc(void) /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - while (!list_empty(&gc_candidates)) { - u = list_entry(gc_candidates.next, struct unix_sock, link); - if (u->oob_skb) { - struct sk_buff *skb = u->oob_skb; - - u->oob_skb = NULL; - kfree_skb(skb); - } - } -#endif - spin_lock(&unix_gc_lock); /* There could be io_uring registered files, just push them back to -- GitLab From 2c96f66cd0cca5695ec326398f98b58f545ac087 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 10:33:43 +0100 Subject: [PATCH 0668/2290] phy: freescale: phy-fsl-imx8-mipi-dphy: Fix alias name to use dashes [ Upstream commit 7936378cb6d87073163130e1e1fc1e5f76a597cf ] Devicetree spec lists only dashes as valid characters for alias names. Table 3.2: Valid characters for alias names, Devicee Specification, Release v0.4 Signed-off-by: Alexander Stein Fixes: 3fbae284887de ("phy: freescale: phy-fsl-imx8-mipi-dphy: Add i.MX8qxp LVDS PHY mode support") Link: https://lore.kernel.org/r/20240110093343.468810-1-alexander.stein@ew.tq-group.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c index e625b32889bfc..0928a526e2ab3 100644 --- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c @@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev) return ret; } - priv->id = of_alias_get_id(np, "mipi_dphy"); + priv->id = of_alias_get_id(np, "mipi-dphy"); if (priv->id < 0) { dev_err(dev, "Failed to get phy node alias id: %d\n", priv->id); -- GitLab From 7eb95e0af5c9c2e6fad50356eaf32d216d0e7bc3 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Thu, 25 Jan 2024 14:30:17 -0600 Subject: [PATCH 0669/2290] powerpc/pseries/iommu: IOMMU table is not initialized for kdump over SR-IOV [ Upstream commit 09a3c1e46142199adcee372a420b024b4fc61051 ] When kdump kernel tries to copy dump data over SR-IOV, LPAR panics due to NULL pointer exception: Kernel attempted to read user page (0) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc000000020847ad4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: mlx5_core(+) vmx_crypto pseries_wdt papr_scm libnvdimm mlxfw tls psample sunrpc fuse overlay squashfs loop CPU: 12 PID: 315 Comm: systemd-udevd Not tainted 6.4.0-Test102+ #12 Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_008) hv:phyp pSeries NIP: c000000020847ad4 LR: c00000002083b2dc CTR: 00000000006cd18c REGS: c000000029162ca0 TRAP: 0300 Not tainted (6.4.0-Test102+) MSR: 800000000280b033 CR: 48288244 XER: 00000008 CFAR: c00000002083b2d8 DAR: 0000000000000000 DSISR: 40000000 IRQMASK: 1 ... NIP _find_next_zero_bit+0x24/0x110 LR bitmap_find_next_zero_area_off+0x5c/0xe0 Call Trace: dev_printk_emit+0x38/0x48 (unreliable) iommu_area_alloc+0xc4/0x180 iommu_range_alloc+0x1e8/0x580 iommu_alloc+0x60/0x130 iommu_alloc_coherent+0x158/0x2b0 dma_iommu_alloc_coherent+0x3c/0x50 dma_alloc_attrs+0x170/0x1f0 mlx5_cmd_init+0xc0/0x760 [mlx5_core] mlx5_function_setup+0xf0/0x510 [mlx5_core] mlx5_init_one+0x84/0x210 [mlx5_core] probe_one+0x118/0x2c0 [mlx5_core] local_pci_probe+0x68/0x110 pci_call_probe+0x68/0x200 pci_device_probe+0xbc/0x1a0 really_probe+0x104/0x540 __driver_probe_device+0xb4/0x230 driver_probe_device+0x54/0x130 __driver_attach+0x158/0x2b0 bus_for_each_dev+0xa8/0x130 driver_attach+0x34/0x50 bus_add_driver+0x16c/0x300 driver_register+0xa4/0x1b0 __pci_register_driver+0x68/0x80 mlx5_init+0xb8/0x100 [mlx5_core] do_one_initcall+0x60/0x300 do_init_module+0x7c/0x2b0 At the time of LPAR dump, before kexec hands over control to kdump kernel, DDWs (Dynamic DMA Windows) are scanned and added to the FDT. For the SR-IOV case, default DMA window "ibm,dma-window" is removed from the FDT and DDW added, for the device. Now, kexec hands over control to the kdump kernel. When the kdump kernel initializes, PCI busses are scanned and IOMMU group/tables created, in pci_dma_bus_setup_pSeriesLP(). For the SR-IOV case, there is no "ibm,dma-window". The original commit: b1fc44eaa9ba, fixes the path where memory is pre-mapped (direct mapped) to the DDW. When TCEs are direct mapped, there is no need to initialize IOMMU tables. iommu_table_setparms_lpar() only considers "ibm,dma-window" property when initiallizing IOMMU table. In the scenario where TCEs are dynamically allocated for SR-IOV, newly created IOMMU table is not initialized. Later, when the device driver tries to enter TCEs for the SR-IOV device, NULL pointer execption is thrown from iommu_area_alloc(). The fix is to initialize the IOMMU table with DDW property stored in the FDT. There are 2 points to remember: 1. For the dedicated adapter, kdump kernel would encounter both default and DDW in FDT. In this case, DDW property is used to initialize the IOMMU table. 2. A DDW could be direct or dynamic mapped. kdump kernel would initialize IOMMU table and mark the existing DDW as "dynamic". This works fine since, at the time of table initialization, iommu_table_clear() makes some space in the DDW, for some predefined number of TCEs which are needed for kdump to succeed. Fixes: b1fc44eaa9ba ("pseries/iommu/ddw: Fix kdump to work in absence of ibm,dma-window") Signed-off-by: Gaurav Batra Reviewed-by: Brian King Signed-off-by: Michael Ellerman Link: https://msgid.link/20240125203017.61014-1-gbatra@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/iommu.c | 156 +++++++++++++++++-------- 1 file changed, 105 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 97b026130c71b..1e5f083cdb720 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -569,29 +569,6 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table_ops iommu_table_lpar_multi_ops; -/* - * iommu_table_setparms_lpar - * - * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. - */ -static void iommu_table_setparms_lpar(struct pci_controller *phb, - struct device_node *dn, - struct iommu_table *tbl, - struct iommu_table_group *table_group, - const __be32 *dma_window) -{ - unsigned long offset, size, liobn; - - of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); - - iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, - &iommu_table_lpar_multi_ops); - - - table_group->tce32_start = offset; - table_group->tce32_size = size; -} - struct iommu_table_ops iommu_table_pseries_ops = { .set = tce_build_pSeries, .clear = tce_free_pSeries, @@ -719,26 +696,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { * dynamic 64bit DMA window, walking up the device tree. */ static struct device_node *pci_dma_find(struct device_node *dn, - const __be32 **dma_window) + struct dynamic_dma_window_prop *prop) { - const __be32 *dw = NULL; + const __be32 *default_prop = NULL; + const __be32 *ddw_prop = NULL; + struct device_node *rdn = NULL; + bool default_win = false, ddw_win = false; for ( ; dn && PCI_DN(dn); dn = dn->parent) { - dw = of_get_property(dn, "ibm,dma-window", NULL); - if (dw) { - if (dma_window) - *dma_window = dw; - return dn; + default_prop = of_get_property(dn, "ibm,dma-window", NULL); + if (default_prop) { + rdn = dn; + default_win = true; + } + ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; + } + ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; } - dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); - if (dw) - return dn; - dw = of_get_property(dn, DMA64_PROPNAME, NULL); - if (dw) - return dn; + + /* At least found default window, which is the case for normal boot */ + if (default_win) + break; } - return NULL; + /* For PCI devices there will always be a DMA window, either on the device + * or parent bus + */ + WARN_ON(!(default_win | ddw_win)); + + /* caller doesn't want to get DMA window property */ + if (!prop) + return rdn; + + /* parse DMA window property. During normal system boot, only default + * DMA window is passed in OF. But, for kdump, a dedicated adapter might + * have both default and DDW in FDT. In this scenario, DDW takes precedence + * over default window. + */ + if (ddw_win) { + struct dynamic_dma_window_prop *p; + + p = (struct dynamic_dma_window_prop *)ddw_prop; + prop->liobn = p->liobn; + prop->dma_base = p->dma_base; + prop->tce_shift = p->tce_shift; + prop->window_shift = p->window_shift; + } else if (default_win) { + unsigned long offset, size, liobn; + + of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); + + prop->liobn = cpu_to_be32((u32)liobn); + prop->dma_base = cpu_to_be64(offset); + prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); + prop->window_shift = cpu_to_be32(order_base_2(size)); + } + + return rdn; } static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) @@ -746,17 +768,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - const __be32 *dma_window = NULL; + struct dynamic_dma_window_prop prop; dn = pci_bus_to_OF_node(bus); pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); - if (dma_window == NULL) - pr_debug(" no ibm,dma-window property !\n"); + /* In PPC architecture, there will always be DMA window on bus or one of the + * parent bus. During reboot, there will be ibm,dma-window property to + * define DMA window. For kdump, there will at least be default window or DDW + * or both. + */ ppci = PCI_DN(pdn); @@ -766,13 +791,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - if (dma_window) { - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); - } + iommu_table_setparms_common(tbl, ppci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); + + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -960,6 +996,12 @@ static void find_existing_ddw_windows_named(const char *name) continue; } + /* If at the time of system initialization, there are DDWs in OF, + * it means this is during kexec. DDW could be direct or dynamic. + * We will just mark DDWs as "dynamic" since this is kdump path, + * no need to worry about perforance. ddw_list_new_entry() will + * set window->direct = false. + */ window = ddw_list_new_entry(pdn, dma64); if (!window) { of_node_put(pdn); @@ -1525,8 +1567,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - const __be32 *dma_window = NULL; struct pci_dn *pci; + struct dynamic_dma_window_prop prop; pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); @@ -1539,7 +1581,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1552,8 +1594,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pci->table_group) { pci->table_group = iommu_pseries_alloc_group(pci->phb->node); tbl = pci->table_group->tables[0]; - iommu_table_setparms_lpar(pci->phb, pdn, tbl, - pci->table_group, dma_window); + + iommu_table_setparms_common(tbl, pci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); iommu_init_table(tbl, pci->phb->node, 0, 0); iommu_register_group(pci->table_group, -- GitLab From a3d369aeb332bc7a29ba1facb9a3d3d8ba8d2568 Mon Sep 17 00:00:00 2001 From: Arturas Moskvinas Date: Fri, 1 Mar 2024 09:12:04 +0200 Subject: [PATCH 0670/2290] gpio: 74x164: Enable output pins after registers are reset [ Upstream commit 530b1dbd97846b110ea8a94c7cc903eca21786e5 ] Chip outputs are enabled[1] before actual reset is performed[2] which might cause pin output value to flip flop if previous pin value was set to 1. Fix that behavior by making sure chip is fully reset before all outputs are enabled. Flip-flop can be noticed when module is removed and inserted again and one of the pins was changed to 1 before removal. 100 microsecond flipping is noticeable on oscilloscope (100khz SPI bus). For a properly reset chip - output is enabled around 100 microseconds (on 100khz SPI bus) later during probing process hence should be irrelevant behavioral change. Fixes: 7ebc194d0fd4 (gpio: 74x164: Introduce 'enable-gpios' property) Link: https://elixir.bootlin.com/linux/v6.7.4/source/drivers/gpio/gpio-74x164.c#L130 [1] Link: https://elixir.bootlin.com/linux/v6.7.4/source/drivers/gpio/gpio-74x164.c#L150 [2] Signed-off-by: Arturas Moskvinas Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-74x164.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index e00c333105170..753e7be039e4d 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi) if (IS_ERR(chip->gpiod_oe)) return PTR_ERR(chip->gpiod_oe); - gpiod_set_value_cansleep(chip->gpiod_oe, 1); - spi_set_drvdata(spi, chip); chip->gpio_chip.label = spi->modalias; @@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi) goto exit_destroy; } + gpiod_set_value_cansleep(chip->gpiod_oe, 1); + ret = gpiochip_add_data(&chip->gpio_chip, chip); if (!ret) return 0; -- GitLab From 17acece41de3dafb63018fecbf54d288366901eb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Feb 2024 21:28:46 +0200 Subject: [PATCH 0671/2290] gpiolib: Fix the error path order in gpiochip_add_data_with_key() [ Upstream commit e4aec4daa8c009057b5e063db1b7322252c92dc8 ] After shuffling the code, error path wasn't updated correctly. Fix it here. Fixes: 2f4133bb5f14 ("gpiolib: No need to call gpiochip_remove_pin_ranges() twice") Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6d3e3454a6ed6..f646df7f1b41a 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -815,11 +815,11 @@ err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); err_remove_acpi_chip: acpi_gpiochip_remove(gc); + gpiochip_remove_pin_ranges(gc); err_remove_of_chip: gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: - gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); if (gdev->dev.release) { /* release() has been registered by gpiochip_setup_dev() */ -- GitLab From c6ff5fb6b157cf4101889c1f3e169eb6897e8f50 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 29 Feb 2024 18:25:49 +0100 Subject: [PATCH 0672/2290] gpio: fix resource unwinding order in error path [ Upstream commit ec5c54a9d3c4f9c15e647b049fea401ee5258696 ] Hogs are added *after* ACPI so should be removed *before* in error path. Fixes: a411e81e61df ("gpiolib: add hogs support for machine code") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f646df7f1b41a..9d8c783124033 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -784,11 +784,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gpiochip_irqchip_init_valid_mask(gc); if (ret) - goto err_remove_acpi_chip; + goto err_free_hogs; ret = gpiochip_irqchip_init_hw(gc); if (ret) - goto err_remove_acpi_chip; + goto err_remove_irqchip_mask; ret = gpiochip_add_irqchip(gc, lock_key, request_key); if (ret) @@ -813,11 +813,11 @@ err_remove_irqchip: gpiochip_irqchip_remove(gc); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); -err_remove_acpi_chip: +err_free_hogs: + gpiochip_free_hogs(gc); acpi_gpiochip_remove(gc); gpiochip_remove_pin_ranges(gc); err_remove_of_chip: - gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: gpiochip_free_valid_mask(gc); -- GitLab From 0e351d1aa2e4c1a7a4cb2a5753b86db89796d3c8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 25 Feb 2024 11:01:41 +0800 Subject: [PATCH 0673/2290] block: define bvec_iter as __packed __aligned(4) [ Upstream commit 7838b4656110d950afdd92a081cc0f33e23e0ea8 ] In commit 19416123ab3e ("block: define 'struct bvec_iter' as packed"), what we need is to save the 4byte padding, and avoid `bio` to spread on one extra cache line. It is enough to define it as '__packed __aligned(4)', as '__packed' alone means byte aligned, and can cause compiler to generate horrible code on architectures that don't support unaligned access in case that bvec_iter is embedded in other structures. Cc: Mikulas Patocka Suggested-by: Linus Torvalds Fixes: 19416123ab3e ("block: define 'struct bvec_iter' as packed") Signed-off-by: Ming Lei Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/bvec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 9e3dac51eb26b..d4dbaae8b5218 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -59,7 +59,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; -- GitLab From 19ec82b3cad1abef2a929262b8c1528f4e0c192d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 4 Mar 2024 14:10:12 +0100 Subject: [PATCH 0674/2290] Revert "interconnect: Fix locking for runpm vs reclaim" This reverts commit ee42bfc791aa3cd78e29046f26a09d189beb3efb which is commit af42269c3523492d71ebbe11fefae2653e9cdc78 upstream. It is reported to cause boot crashes in Android systems, so revert it from the stable trees for now. Cc: Rob Clark Cc: Georgi Djakov Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 1d9494f64a215..87f380e0e982e 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -29,7 +29,6 @@ static LIST_HEAD(icc_providers); static int providers_count; static bool synced_state; static DEFINE_MUTEX(icc_lock); -static DEFINE_MUTEX(icc_bw_lock); static struct dentry *icc_debugfs_dir; static void icc_summary_show_one(struct seq_file *s, struct icc_node *n) @@ -636,7 +635,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) if (WARN_ON(IS_ERR(path) || !path->num_nodes)) return -EINVAL; - mutex_lock(&icc_bw_lock); + mutex_lock(&icc_lock); old_avg = path->reqs[0].avg_bw; old_peak = path->reqs[0].peak_bw; @@ -668,7 +667,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) apply_constraints(path); } - mutex_unlock(&icc_bw_lock); + mutex_unlock(&icc_lock); trace_icc_set_bw_end(path, ret); @@ -971,7 +970,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); node->provider = provider; list_add_tail(&node->node_list, &provider->nodes); @@ -997,7 +995,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) node->avg_bw = 0; node->peak_bw = 0; - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_node_add); @@ -1137,7 +1134,6 @@ void icc_sync_state(struct device *dev) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); synced_state = true; list_for_each_entry(p, &icc_providers, provider_list) { dev_dbg(p->dev, "interconnect provider is in synced state\n"); -- GitLab From 559035e04e442a0c7fd58d5fe00308b0d99e2318 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 4 Mar 2024 14:12:15 +0100 Subject: [PATCH 0675/2290] Revert "interconnect: Teach lockdep about icc_bw_lock order" This reverts commit 0db211ec0f1d32b93486e8f6565249ad4d1bece5 which is commit 13619170303878e1dae86d9a58b039475c957fcf upstream. It is reported to cause boot crashes in Android systems, so revert it from the stable trees for now. Cc: Rob Clark Cc: Georgi Djakov Cc: Guenter Roeck Cc: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 87f380e0e982e..4526ff2e1bd5f 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1146,21 +1146,13 @@ void icc_sync_state(struct device *dev) } } } - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_sync_state); static int __init icc_init(void) { - struct device_node *root; - - /* Teach lockdep about lock ordering wrt. shrinker: */ - fs_reclaim_acquire(GFP_KERNEL); - might_lock(&icc_bw_lock); - fs_reclaim_release(GFP_KERNEL); - - root = of_find_node_by_path("/"); + struct device_node *root = of_find_node_by_path("/"); providers_count = of_count_icc_providers(root); of_node_put(root); -- GitLab From 29d3e02fb448b50ffd5d83156de9680daf16f47a Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:23:53 -0800 Subject: [PATCH 0676/2290] x86/bugs: Add asm helpers for executing VERW commit baf8361e54550a48a7087b603313ad013cc13386 upstream. MDS mitigation requires clearing the CPU buffers before returning to user. This needs to be done late in the exit-to-user path. Current location of VERW leaves a possibility of kernel data ending up in CPU buffers for memory accesses done after VERW such as: 1. Kernel data accessed by an NMI between VERW and return-to-user can remain in CPU buffers since NMI returning to kernel does not execute VERW to clear CPU buffers. 2. Alyssa reported that after VERW is executed, CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system call. Memory accesses during stack scrubbing can move kernel stack contents into CPU buffers. 3. When caller saved registers are restored after a return from function executing VERW, the kernel stack accesses can remain in CPU buffers(since they occur after VERW). To fix this VERW needs to be moved very late in exit-to-user path. In preparation for moving VERW to entry/exit asm code, create macros that can be used in asm. Also make VERW patching depend on a new feature flag X86_FEATURE_CLEAR_CPU_BUF. [pawan: - Runtime patch jmp instead of verw in macro CLEAR_CPU_BUFFERS due to lack of relative addressing support for relocations in kernels < v6.5. - Add UNWIND_HINT_EMPTY to avoid warning: arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction] Reported-by: Alyssa Milburn Suggested-by: Andrew Cooper Suggested-by: Peter Zijlstra Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry.S | 23 +++++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 15 +++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index bfb7bcb362bcf..09e99d13fc0b3 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include .pushsection .noinstr.text, "ax" @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b122708792c4d..b60f24b30cb90 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -304,7 +304,7 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ - +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d3706de91a934..2c66b2081f877 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,19 @@ #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF + verw _ASM_RIP(mds_verw_sel) +.Lskip_verw_\@: +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -375,6 +388,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include /** -- GitLab From 22444d079b4ccc608b9bac3e591cd88629c73df7 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:23:59 -0800 Subject: [PATCH 0677/2290] x86/entry_64: Add VERW just before userspace transition commit 3c7501722e6b31a6e56edd23cea5e77dbb9ffd1a upstream. Mitigation for MDS is to use VERW instruction to clear any secrets in CPU Buffers. Any memory accesses after VERW execution can still remain in CPU buffers. It is safer to execute VERW late in return to user path to minimize the window in which kernel data can end up in CPU buffers. There are not many kernel secrets to be had after SWITCH_TO_USER_CR3. Add support for deploying VERW mitigation after user register state is restored. This helps minimize the chances of kernel data ending up into CPU buffers after executing VERW. Note that the mitigation at the new location is not yet enabled. Corner case not handled ======================= Interrupts returning to kernel don't clear CPUs buffers since the exit-to-user path is expected to do that anyways. But, there could be a case when an NMI is generated in kernel after the exit-to-user path has cleared the buffers. This case is not handled and NMI returning to kernel don't clear CPU buffers because: 1. It is rare to get an NMI after VERW, but before returning to user. 2. For an unprivileged user, there is no known way to make that NMI less rare or target it. 3. It would take a large number of these precisely-timed NMIs to mount an actual attack. There's presumably not enough bandwidth. 4. The NMI in question occurs after a VERW, i.e. when user state is restored and most interesting data is already scrubbed. Whats left is only the data that NMI touches, and that may or may not be of any interest. [ pawan: resolved conflict for hunk swapgs_restore_regs_and_return_to_usermode in backport ] Suggested-by: Dave Hansen Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-2-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 11 +++++++++++ arch/x86/entry/entry_64_compat.S | 1 + 2 files changed, 12 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9953d966d1244..c2383c2880ec6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -223,6 +223,7 @@ syscall_return_via_sysret: SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR swapgs + CLEAR_CPU_BUFFERS sysretq SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi swapgs + CLEAR_CPU_BUFFERS jmp .Lnative_iret @@ -767,6 +769,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1493,6 +1497,12 @@ nmi_restore: std movq $0, 5*8(%rsp) /* clear "NMI executing" */ + /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this @@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY ENDBR mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(ignore_sysret) #endif diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d6c08d8986b17..4bcd009a232bf 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR -- GitLab From 2e3087505ddb8ba2d3d4c81306cca11e868fcdb9 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:24:05 -0800 Subject: [PATCH 0678/2290] x86/entry_32: Add VERW just before userspace transition commit a0e2dab44d22b913b4c228c8b52b2a104434b0b3 upstream. As done for entry_64, add support for executing VERW late in exit to user path for 32-bit mode. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-3-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e309e71560389..ee5def1060c86 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -981,6 +982,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: -- GitLab From 07946d956b55703102d5eb1518888f0d0ac87e14 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:24:11 -0800 Subject: [PATCH 0679/2290] x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key commit 6613d82e617dd7eb8b0c40b2fe3acea655b1d611 upstream. The VERW mitigation at exit-to-user is enabled via a static branch mds_user_clear. This static branch is never toggled after boot, and can be safely replaced with an ALTERNATIVE() which is convenient to use in asm. Switch to ALTERNATIVE() to use the VERW mitigation late in exit-to-user path. Also remove the now redundant VERW in exc_nmi() and arch_exit_to_user_mode(). Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-4-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 38 ++++++++++++++++++++-------- arch/x86/include/asm/entry-common.h | 1 - arch/x86/include/asm/nospec-branch.h | 12 --------- arch/x86/kernel/cpu/bugs.c | 15 +++++------ arch/x86/kernel/nmi.c | 3 --- arch/x86/kvm/vmx/vmx.c | 2 +- 6 files changed, 34 insertions(+), 37 deletions(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 5d4330be200f9..e801df0bb3a81 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. - - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). - - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. + + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. + + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 11203a9fe0a87..ffe72790ceafd 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2c66b2081f877..8f6f17a8617b6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -381,7 +381,6 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); @@ -415,17 +414,6 @@ static __always_inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } -/** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - /** * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 13dffc43ded02..d1895930e6eb8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -423,7 +420,7 @@ static void __init mmio_select_mitigation(void) */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else static_branch_enable(&mmio_stale_data_clear); @@ -483,12 +480,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index cec0bfa3bc04f..ed6cce6c39504 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -522,9 +522,6 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 57c1374fdfd49..3b76f1bf001e5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7123,7 +7123,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) + else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) -- GitLab From edfaad334a11d4fba21cbd860ba9a61213f4bd0b Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:24:18 -0800 Subject: [PATCH 0680/2290] KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH From: Sean Christopherson commit 706a189dcf74d3b3f955e9384785e726ed6c7c80 upstream. Use EFLAGS.CF instead of EFLAGS.ZF to track whether to use VMRESUME versus VMLAUNCH. Freeing up EFLAGS.ZF will allow doing VERW, which clobbers ZF, for MDS mitigations as late as possible without needing to duplicate VERW for both paths. [ pawan: resolved merge conflict in __vmx_vcpu_run in backport. ] Signed-off-by: Sean Christopherson Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/all/20240213-delay-verw-v8-5-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/run_flags.h | 7 +++++-- arch/x86/kvm/vmx/vmenter.S | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index edc3f16cc1896..6a9bfdfbb6e59 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -2,7 +2,10 @@ #ifndef __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H -#define VMX_RUN_VMRESUME (1 << 0) -#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) +#define VMX_RUN_VMRESUME_SHIFT 0 +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 + +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0b5db4de4d09e..42c0b2c3aee10 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb $VMX_RUN_VMRESUME, %bl + bt $VMX_RUN_VMRESUME_SHIFT, %bx /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -128,8 +128,8 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Check EFLAGS.ZF from 'testb' above */ - jz .Lvmlaunch + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ + jnc .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" -- GitLab From da67116b74e6aa9c531de386e1d99f2e460d1cc4 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Mar 2024 01:24:24 -0800 Subject: [PATCH 0681/2290] KVM/VMX: Move VERW closer to VMentry for MDS mitigation commit 43fb862de8f628c5db5e96831c915b9aebf62d33 upstream. During VMentry VERW is executed to mitigate MDS. After VERW, any memory access like register push onto stack may put host data in MDS affected CPU buffers. A guest can then use MDS to sample host data. Although likelihood of secrets surviving in registers at current VERW callsite is less, but it can't be ruled out. Harden the MDS mitigation by moving the VERW mitigation late in VMentry path. Note that VERW for MMIO Stale Data mitigation is unchanged because of the complexity of per-guest conditional VERW which is not easy to handle that late in asm with no GPRs available. If the CPU is also affected by MDS, VERW is unconditionally executed late in asm regardless of guest having MMIO access. [ pawan: conflict resolved in backport ] Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Acked-by: Sean Christopherson Link: https://lore.kernel.org/all/20240213-delay-verw-v8-6-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmenter.S | 3 +++ arch/x86/kvm/vmx/vmx.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 42c0b2c3aee10..0b2cad66dee12 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -128,6 +128,9 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ jnc .Lvmlaunch diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3b76f1bf001e5..5c1590855ffcd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + vmx_fb_clear_ctrl_available; /* * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS @@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, { guest_state_enter_irqoff(); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); -- GitLab From 5fafd8254add75d8337df44ba8536e407ffe8928 Mon Sep 17 00:00:00 2001 From: Louis DeLosSantos Date: Wed, 31 May 2023 15:38:48 -0400 Subject: [PATCH 0682/2290] bpf: Add table ID to bpf_fib_lookup BPF helper commit 8ad77e72caae22a1ddcfd0c03f2884929e93b7a4 upstream. Add ability to specify routing table ID to the `bpf_fib_lookup` BPF helper. A new field `tbid` is added to `struct bpf_fib_lookup` used as parameters to the `bpf_fib_lookup` BPF helper. When the helper is called with the `BPF_FIB_LOOKUP_DIRECT` and `BPF_FIB_LOOKUP_TBID` flags the `tbid` field in `struct bpf_fib_lookup` will be used as the table ID for the fib lookup. If the `tbid` does not exist the fib lookup will fail with `BPF_FIB_LKUP_RET_NOT_FWDED`. The `tbid` field becomes a union over the vlan related output fields in `struct bpf_fib_lookup` and will be zeroed immediately after usage. This functionality is useful in containerized environments. For instance, if a CNI wants to dictate the next-hop for traffic leaving a container it can create a container-specific routing table and perform a fib lookup against this table in a "host-net-namespace-side" TC program. This functionality also allows `ip rule` like functionality at the TC layer, allowing an eBPF program to pick a routing table based on some aspect of the sk_buff. As a concrete use case, this feature will be used in Cilium's SRv6 L3VPN datapath. When egress traffic leaves a Pod an eBPF program attached by Cilium will determine which VRF the egress traffic should target, and then perform a FIB lookup in a specific table representing this VRF's FIB. Signed-off-by: Louis DeLosSantos Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230505-bpf-add-tbid-fib-lookup-v2-1-0a31c22c748c@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/bpf.h | 21 ++++++++++++++++++--- net/core/filter.c | 14 +++++++++++++- tools/include/uapi/linux/bpf.h | 21 ++++++++++++++++++--- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 201dc77ebbd77..02cf4d9d8eab5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3109,6 +3109,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -6687,6 +6691,7 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), }; enum { @@ -6747,9 +6752,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/net/core/filter.c b/net/core/filter.c index 3a6110ea4009f..085d211085658 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = fib_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5885,6 +5891,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib6_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = ipv6_stub->fib6_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5957,7 +5969,7 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 201dc77ebbd77..02cf4d9d8eab5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3109,6 +3109,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -6687,6 +6691,7 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), }; enum { @@ -6747,9 +6752,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; -- GitLab From 2d7ebcb5d878b4311db56eeaf7bdd76dbe9b9a13 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Sat, 7 Oct 2023 10:14:14 +0200 Subject: [PATCH 0683/2290] bpf: Derive source IP addr via bpf_*_fib_lookup() commit dab4e1f06cabb6834de14264394ccab197007302 upstream. Extend the bpf_fib_lookup() helper by making it to return the source IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set. For example, the following snippet can be used to derive the desired source IP address: struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr }; ret = bpf_skb_fib_lookup(skb, p, sizeof(p), BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH); if (ret != BPF_FIB_LKUP_RET_SUCCESS) return TC_ACT_SHOT; /* the p.ipv4_src now contains the source address */ The inability to derive the proper source address may cause malfunctions in BPF-based dataplanes for hosts containing netdevs with more than one routable IP address or for multi-homed hosts. For example, Cilium implements packet masquerading in BPF. If an egressing netdev to which the Cilium's BPF prog is attached has multiple IP addresses, then only one [hardcoded] IP address can be used for masquerading. This breaks connectivity if any other IP address should have been selected instead, for example, when a public and private addresses are attached to the same egress interface. The change was tested with Cilium [1]. Nikolay Aleksandrov helped to figure out the IPv6 addr selection. [1]: https://github.com/cilium/cilium/pull/28283 Signed-off-by: Martynas Pumputis Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6_stubs.h | 5 +++++ include/uapi/linux/bpf.h | 10 ++++++++++ net/core/filter.c | 18 +++++++++++++++++- net/ipv6/af_inet6.c | 1 + tools/include/uapi/linux/bpf.h | 10 ++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf47372..21da31e1dff5d 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 02cf4d9d8eab5..d5d2183730b9f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3121,6 +3121,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6692,6 +6697,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6704,6 +6710,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6738,6 +6745,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ diff --git a/net/core/filter.c b/net/core/filter.c index 085d211085658..cb7c4651eaec8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5809,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5951,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -5969,7 +5984,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0b42eb8c55aaf..62247621cea52 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 02cf4d9d8eab5..d5d2183730b9f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3121,6 +3121,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6692,6 +6697,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6704,6 +6710,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6738,6 +6745,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ -- GitLab From 8866334e35102d054160a86750b7db9203f721f9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Jan 2024 12:14:30 +0100 Subject: [PATCH 0684/2290] x86/efistub: Give up if memory attribute protocol returns an error commit a7a6a01f88e87dec4bf2365571dd2dc7403d52d0 upstream. The recently introduced EFI memory attributes protocol should be used if it exists to ensure that the memory allocation created for the kernel permits execution. This is needed for compatibility with tightened requirements related to Windows logo certification for x86 PCs. Currently, we simply strip the execute protect (XP) attribute from the entire range, but this might be rejected under some firmware security policies, and so in a subsequent patch, this will be changed to only strip XP from the executable region that runs early, and make it read-only (RO) as well. In order to catch any issues early, ensure that the memory attribute protocol works as intended, and give up if it produces spurious errors. Note that the DXE services based fallback was always based on best effort, so don't propagate any errors returned by that API. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 24 ++++++++++++++---------- drivers/firmware/efi/libstub/x86-stub.h | 4 ++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index a0757a37b482b..784e1b2ae5ccd 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -212,8 +212,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } -void efi_adjust_memory_range_protection(unsigned long start, - unsigned long size) +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) { efi_status_t status; efi_gcd_memory_space_desc_t desc; @@ -225,13 +225,17 @@ void efi_adjust_memory_range_protection(unsigned long start, rounded_end = roundup(start + size, EFI_PAGE_SIZE); if (memattr != NULL) { - efi_call_proto(memattr, clear_memory_attributes, rounded_start, - rounded_end - rounded_start, EFI_MEMORY_XP); - return; + status = efi_call_proto(memattr, clear_memory_attributes, + rounded_start, + rounded_end - rounded_start, + EFI_MEMORY_XP); + if (status != EFI_SUCCESS) + efi_warn("Failed to clear EFI_MEMORY_XP attribute\n"); + return status; } if (efi_dxe_table == NULL) - return; + return EFI_SUCCESS; /* * Don't modify memory region attributes, they are @@ -244,7 +248,7 @@ void efi_adjust_memory_range_protection(unsigned long start, status = efi_dxe_call(get_memory_space_descriptor, start, &desc); if (status != EFI_SUCCESS) - return; + break; next = desc.base_address + desc.length; @@ -269,8 +273,10 @@ void efi_adjust_memory_range_protection(unsigned long start, unprotect_start, unprotect_start + unprotect_size, status); + break; } } + return EFI_SUCCESS; } static efi_char16_t *efistub_fw_vendor(void) @@ -800,9 +806,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) *kernel_entry = addr + entry; - efi_adjust_memory_range_protection(addr, kernel_total_size); - - return EFI_SUCCESS; + return efi_adjust_memory_range_protection(addr, kernel_total_size); } static void __noreturn enter_kernel(unsigned long kernel_addr, diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h index 37c5a36b9d8cf..1c20e99a64944 100644 --- a/drivers/firmware/efi/libstub/x86-stub.h +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -5,8 +5,8 @@ extern void trampoline_32bit_src(void *, bool); extern const u16 trampoline_ljmp_imm_offset; -void efi_adjust_memory_range_protection(unsigned long start, - unsigned long size); +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size); #ifdef CONFIG_X86_64 efi_status_t efi_setup_5level_paging(void); -- GitLab From 585a344af6bcac222608a158fc2830ff02712af5 Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Wed, 24 Jan 2024 16:31:28 +0000 Subject: [PATCH 0685/2290] xen/events: close evtchn after mapping cleanup commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream. shutdown_pirq and startup_pirq are not taking the irq_mapping_update_lock because they can't due to lock inversion. Both are called with the irq_desc->lock being taking. The lock order, however, is first irq_mapping_update_lock and then irq_desc->lock. This opens multiple races: - shutdown_pirq can be interrupted by a function that allocates an event channel: CPU0 CPU1 shutdown_pirq { xen_evtchn_close(e) __startup_pirq { EVTCHNOP_bind_pirq -> returns just freed evtchn e set_evtchn_to_irq(e, irq) } xen_irq_info_cleanup() { set_evtchn_to_irq(e, -1) } } Assume here event channel e refers here to the same event channel number. After this race the evtchn_to_irq mapping for e is invalid (-1). - __startup_pirq races with __unbind_from_irq in a similar way. Because __startup_pirq doesn't take irq_mapping_update_lock it can grab the evtchn that __unbind_from_irq is currently freeing and cleaning up. In this case even though the event channel is allocated, its mapping can be unset in evtchn_to_irq. The fix is to first cleanup the mappings and then close the event channel. In this way, when an event channel gets allocated it's potential previous evtchn_to_irq mappings are guaranteed to be unset already. This is also the reverse order of the allocation where first the event channel is allocated and then the mappings are setup. On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal [un]bind interfaces"), we hit a BUG like the following during probing of NVMe devices. The issue is that during nvme_setup_io_queues, pci_free_irq is called for every device which results in a call to shutdown_pirq. With many nvme devices it's therefore likely to hit this race during boot because there will be multiple calls to shutdown_pirq and startup_pirq are running potentially in parallel. ------------[ cut here ]------------ blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled kernel BUG at drivers/xen/events/events_base.c:499! invalid opcode: 0000 [#1] SMP PTI CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1 Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006 Workqueue: nvme-reset-wq nvme_reset_work RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0 Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_trace_log_lvl+0x1c1/0x2d9 ? show_trace_log_lvl+0x1c1/0x2d9 ? set_affinity_irq+0xdc/0x1c0 ? __die_body.cold+0x8/0xd ? die+0x2b/0x50 ? do_trap+0x90/0x110 ? bind_evtchn_to_cpu+0xdf/0xf0 ? do_error_trap+0x65/0x80 ? bind_evtchn_to_cpu+0xdf/0xf0 ? exc_invalid_op+0x4e/0x70 ? bind_evtchn_to_cpu+0xdf/0xf0 ? asm_exc_invalid_op+0x12/0x20 ? bind_evtchn_to_cpu+0xdf/0xf0 ? bind_evtchn_to_cpu+0xc5/0xf0 set_affinity_irq+0xdc/0x1c0 irq_do_set_affinity+0x1d7/0x1f0 irq_setup_affinity+0xd6/0x1a0 irq_startup+0x8a/0xf0 __setup_irq+0x639/0x6d0 ? nvme_suspend+0x150/0x150 request_threaded_irq+0x10c/0x180 ? nvme_suspend+0x150/0x150 pci_request_irq+0xa8/0xf0 ? __blk_mq_free_request+0x74/0xa0 queue_request_irq+0x6f/0x80 nvme_create_queue+0x1af/0x200 nvme_create_io_queues+0xbd/0xf0 nvme_setup_io_queues+0x246/0x320 ? nvme_irq_check+0x30/0x30 nvme_reset_work+0x1c8/0x400 process_one_work+0x1b0/0x350 worker_thread+0x49/0x310 ? process_one_work+0x350/0x350 kthread+0x11b/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x22/0x30 Modules linked in: ---[ end trace a11715de1eee1873 ]--- Fixes: d46a78b05c0e ("xen: implement pirq type event channels") Cc: stable@vger.kernel.org Co-debugged-by: Andrew Panyakin Signed-off-by: Maximilian Heyne Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de Signed-off-by: Juergen Gross Signed-off-by: Maximilian Heyne Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 00f8e349921d4..96b96516c9806 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data) return; do_mask(info, EVT_MASK_REASON_EXPLICIT); - xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } static void enable_pirq(struct irq_data *data) @@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq) unsigned int cpu = cpu_from_irq(irq); struct xenbus_device *dev; - xen_evtchn_close(evtchn); - switch (type_from_irq(irq)) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; @@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq) } xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } xen_free_irq(irq); -- GitLab From 61adba85cc40287232a539e607164f273260e0fe Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Mar 2024 14:45:20 +0000 Subject: [PATCH 0686/2290] Linux 6.1.81 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240304211556.993132804@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Ron Economos Tested-by: Salvatore Bonaccorso Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Mateusz Jończyk Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Tested-by: Yann Sionneau Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bc4adb561a7cf..e13df565a1cb6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 80 +SUBLEVEL = 81 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 850bb481890fa830f17e9e38db792931e0b147e4 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 19 Feb 2024 13:14:32 +0800 Subject: [PATCH 0687/2290] ceph: switch to corrected encoding of max_xattr_size in mdsmap [ Upstream commit 51d31149a88b5c5a8d2d33f06df93f6187a25b4c ] The addition of bal_rank_mask with encoding version 17 was merged into ceph.git in Oct 2022 and made it into v18.2.0 release normally. A few months later, the much delayed addition of max_xattr_size got merged, also with encoding version 17, placed before bal_rank_mask in the encoding -- but it didn't make v18.2.0 release. The way this ended up being resolved on the MDS side is that bal_rank_mask will continue to be encoded in version 17 while max_xattr_size is now encoded in version 18. This does mean that older kernels will misdecode version 17, but this is also true for v18.2.0 and v18.2.1 clients in userspace. The best we can do is backport this adjustment -- see ceph.git commit 78abfeaff27fee343fb664db633de5b221699a73 for details. [ idryomov: changelog ] Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/64440 Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size") Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Venky Shankar Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/mdsmap.c | 7 ++++--- include/linux/ceph/mdsmap.h | 6 +++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 3fbabc98e1f70..4a089d70ebd07 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -379,10 +379,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ceph_decode_skip_8(p, end, bad_ext); /* required_client_features */ ceph_decode_skip_set(p, end, 64, bad_ext); + /* bal_rank_mask */ + ceph_decode_skip_string(p, end, bad_ext); + } + if (mdsmap_ev >= 18) { ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); - } else { - /* This forces the usage of the (sync) SETXATTR Op */ - m->m_max_xattr_size = 0; } bad_ext: dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c3e0648dc277..fcc95bff72a57 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,7 +25,11 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ + /* + * maximum size for xattrs blob. + * Zeroed by default to force the usage of the (sync) SETXATTR Op. + */ + u64 m_max_xattr_size; u32 m_max_mds; /* expected up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */ u32 possible_max_rank; /* possible max rank index */ -- GitLab From ea2a1052f23c5c113be7e46444984d96d4bf2687 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 28 Feb 2024 13:45:17 +0100 Subject: [PATCH 0688/2290] net: lan78xx: fix runtime PM count underflow on link stop [ Upstream commit 1eecc7ab82c42133b748e1895275942a054a7f67 ] Current driver has some asymmetry in the runtime PM calls. On lan78xx_open() it will call usb_autopm_get() and unconditionally usb_autopm_put(). And on lan78xx_stop() it will call only usb_autopm_put(). So far, it was working only because this driver do not activate autosuspend by default, so it was visible only by warning "Runtime PM usage count underflow!". Since, with current driver, we can't use runtime PM with active link, execute lan78xx_open()->usb_autopm_put() only in error case. Otherwise, keep ref counting high as long as interface is open. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Oleksij Rempel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 4fd4563811299..366e83ed0a973 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3137,7 +3137,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } -- GitLab From 6632e19acbdcf16603fdd632fcf20f3126d390a9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:51 +0100 Subject: [PATCH 0689/2290] ixgbe: {dis, en}able irqs in ixgbe_txrx_ring_{dis, en}able [ Upstream commit cbf996f52c4e658b3fb4349a869a62fd2d4c3c1c ] Currently routines that are supposed to toggle state of ring pair do not take care of associated interrupt with queue vector that these rings belong to. This causes funky issues such as dead interface due to irq misconfiguration, as per Pavel's report from Closes: tag. Add a function responsible for disabling single IRQ in EIMC register and call this as a very first thing when disabling ring pair during xsk_pool setup. For enable let's reuse ixgbe_irq_enable_queues(). Besides this, disable/enable NAPI as first/last thing when dealing with closing or opening ring pair that xsk_pool is being configured on. Reported-by: Pavel Vazharov Closes: https://lore.kernel.org/netdev/CAJEV1ijxNyPTwASJER1bcZzS9nMoZJqfR86nu_3jFFVXzZQ4NA@mail.gmail.com/ Fixes: 024aa5800f32 ("ixgbe: added Rx/Tx ring disable/enable functions") Signed-off-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6dc554e810a17..086cc25730338 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2947,8 +2947,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -10543,6 +10543,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); } +/** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + /** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure @@ -10559,6 +10597,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10567,9 +10610,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10597,9 +10637,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10608,6 +10645,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** -- GitLab From 8e23edc54a5cb3d4ea881e424673c824255a3422 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:52 +0100 Subject: [PATCH 0690/2290] i40e: disable NAPI right after disabling irqs when handling xsk_pool [ Upstream commit d562b11c1eac7d73f4c778b4cbe5468f86b1f20d ] Disable NAPI before shutting down queues that this particular NAPI contains so that the order of actions in i40e_queue_pair_disable() mirrors what we do in i40e_queue_pair_enable(). Fixes: 123cecd427b6 ("i40e: added queue pair disable/enable functions") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 76455405a6d8e..d8a7fb21b7b76 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13569,9 +13569,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); -- GitLab From 5822c02707dac0a995437ef27ba1c441aaca0e1f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:53 +0100 Subject: [PATCH 0691/2290] ice: reorder disabling IRQ and NAPI in ice_qp_dis [ Upstream commit 99099c6bc75a30b76bb5d6774a0509ab6f06af05 ] ice_qp_dis() currently does things in very mixed way. Tx is stopped before disabling IRQ on related queue vector, then it takes care of disabling Rx and finally NAPI is disabled. Let us start with disabling IRQs in the first place followed by turning off NAPI. Then it is safe to handle queues. One subtle change on top of that is that even though ice_qp_ena() looks more sane, clear ICE_CFG_BUSY as the last thing there. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_xsk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 41ee081eb8875..48cf24709fe32 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -171,6 +171,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) return -EBUSY; usleep_range(1000, 2000); } + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); @@ -187,13 +191,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -256,11 +257,11 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; - clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + clear_bit(ICE_CFG_BUSY, vsi->state); free_buf: kfree(qg_buf); return err; -- GitLab From 44faf8a48294fcb8c93daa7840249c2c4cd6e68e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 29 Feb 2024 14:34:44 -0500 Subject: [PATCH 0692/2290] tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 51270d573a8d9dd5afdc7934de97d66c0e14b5fd ] I'm updating __assign_str() and will be removing the second parameter. To make sure that it does not break anything, I make sure that it matches the __string() field, as that is where the string is actually going to be saved in. To make sure there's nothing that breaks, I added a WARN_ON() to make sure that what was used in __string() is the same that is used in __assign_str(). In doing this change, an error was triggered as __assign_str() now expects the string passed in to be a char * value. I instead had the following warning: include/trace/events/qdisc.h: In function ‘trace_event_raw_event_qdisc_reset’: include/trace/events/qdisc.h:91:35: error: passing argument 1 of 'strcmp' from incompatible pointer type [-Werror=incompatible-pointer-types] 91 | __assign_str(dev, qdisc_dev(q)); That's because the qdisc_enqueue() and qdisc_reset() pass in qdisc_dev(q) to __assign_str() and to __string(). But that function returns a pointer to struct net_device and not a string. It appears that these events are just saving the pointer as a string and then reading it as a string as well. Use qdisc_dev(q)->name to save the device instead. Fixes: a34dac0b90552 ("net_sched: add tracepoints for qdisc_reset() and qdisc_destroy()") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/trace/events/qdisc.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index a3995925cb057..1f4258308b967 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,14 +81,14 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; @@ -106,14 +106,14 @@ TRACE_EVENT(qdisc_destroy, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; -- GitLab From c0b22568a9d8384fd000cc49acb8f74bde40d1b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 13:11:52 +0000 Subject: [PATCH 0693/2290] geneve: make sure to pull inner header in geneve_rx() [ Upstream commit 1ca1ba465e55b9460e4e75dec9fff31e708fec74 ] syzbot triggered a bug in geneve_rx() [1] Issue is similar to the one I fixed in commit 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. [1] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in geneve_rx drivers/net/geneve.c:279 [inline] BUG: KMSAN: uninit-value in geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] geneve_rx drivers/net/geneve.c:279 [inline] geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 udp_queue_rcv_one_skb+0x1d39/0x1f20 net/ipv4/udp.c:2108 udp_queue_rcv_skb+0x6ae/0x6e0 net/ipv4/udp.c:2186 udp_unicast_rcv_skb+0x184/0x4b0 net/ipv4/udp.c:2346 __udp4_lib_rcv+0x1c6b/0x3010 net/ipv4/udp.c:2422 udp_rcv+0x7d/0xa0 net/ipv4/udp.c:2604 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 process_backlog+0x480/0x8b0 net/core/dev.c:5976 __napi_poll+0xe3/0x980 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x8b8/0x1870 net/core/dev.c:6778 __do_softirq+0x1b7/0x7c5 kernel/softirq.c:553 do_softirq+0x9a/0xf0 kernel/softirq.c:454 __local_bh_enable_ip+0x9b/0xa0 kernel/softirq.c:381 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:820 [inline] __dev_queue_xmit+0x2768/0x51c0 net/core/dev.c:4378 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook mm/slub.c:3819 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x5cb/0xbc0 mm/slub.c:3903 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x352/0x790 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1296 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6394 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2783 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x70c2/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Reported-and-tested-by: syzbot+6a1423ff3f97159aae64@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/geneve.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f393e454f45ca..3f8da6f0b25ce 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) -- GitLab From 0de693d68b0a18d5e256556c7c62d92cca35ad52 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 1 Mar 2024 09:06:08 +0100 Subject: [PATCH 0694/2290] net: sparx5: Fix use after free inside sparx5_del_mact_entry [ Upstream commit 89d72d4125e94aa3c2140fedd97ce07ba9e37674 ] Based on the static analyzis of the code it looks like when an entry from the MAC table was removed, the entry was still used after being freed. More precise the vid of the mac_entry was used after calling devm_kfree on the mac_entry. The fix consists in first using the vid of the mac_entry to delete the entry from the HW and after that to free it. Fixes: b37a1bae742f ("net: sparx5: add mactable support") Signed-off-by: Horatiu Vultur Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240301080608.3053468-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 4af285918ea2a..75868b3f548ec 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, list) { if ((vid == 0 || mact_entry->vid == vid) && ether_addr_equal(addr, mact_entry->mac)) { + sparx5_mact_forget(sparx5, addr, mact_entry->vid); + list_del(&mact_entry->list); devm_kfree(sparx5->dev, mact_entry); - - sparx5_mact_forget(sparx5, addr, mact_entry->vid); } } mutex_unlock(&sparx5->mact_lock); -- GitLab From 6293ff942e9ce72ae9fced8c277f078e97843dd4 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 31 Jan 2024 13:51:58 -0800 Subject: [PATCH 0695/2290] ice: virtchnl: stop pretending to support RSS over AQ or registers [ Upstream commit 2652b99e43403dc464f3648483ffb38e48872fe4 ] The E800 series hardware uses the same iAVF driver as older devices, including the virtchnl negotiation scheme. This negotiation scheme includes a mechanism to determine what type of RSS should be supported, including RSS over PF virtchnl messages, RSS over firmware AdminQ messages, and RSS via direct register access. The PF driver will always prefer VIRTCHNL_VF_OFFLOAD_RSS_PF if its supported by the VF driver. However, if an older VF driver is loaded, it may request only VIRTCHNL_VF_OFFLOAD_RSS_REG or VIRTCHNL_VF_OFFLOAD_RSS_AQ. The ice driver happily agrees to support these methods. Unfortunately, the underlying hardware does not support these mechanisms. The E800 series VFs don't have the appropriate registers for RSS_REG. The mailbox queue used by VFs for VF to PF communication blocks messages which do not have the VF-to-PF opcode. Stop lying to the VF that it could support RSS over AdminQ or registers, as these interfaces do not work when the hardware is operating on an E800 series device. In practice this is unlikely to be hit by any normal user. The iAVF driver has supported RSS over PF virtchnl commands since 2016, and always defaults to using RSS_PF if possible. In principle, nothing actually stops the existing VF from attempting to access the registers or send an AQ command. However a properly coded VF will check the capability flags and will report a more useful error if it detects a case where the driver does not support the RSS offloads that it does. Fixes: 1071a8358a28 ("ice: Implement virtchnl commands for AVF support") Signed-off-by: Jacob Keller Reviewed-by: Alan Brady Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 9 +-------- drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c | 2 -- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 6c03ebf81ffda..4b71392f60df1 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vf->driver_caps = *(u32 *)msg; else vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_REG | VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; @@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, vf->driver_caps); - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; - } else { - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; - else - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; - } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5a82216e7d034..63e83e8b97e55 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -13,8 +13,6 @@ * - opcodes needed by VF when caps are activated * * Caps that don't use new opcodes (no opcodes should be allowed): - * - VIRTCHNL_VF_OFFLOAD_RSS_AQ - * - VIRTCHNL_VF_OFFLOAD_RSS_REG * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR * - VIRTCHNL_VF_OFFLOAD_CRC * - VIRTCHNL_VF_OFFLOAD_RX_POLLING -- GitLab From afdd29726a6de4ba27cd15590661424c888dc596 Mon Sep 17 00:00:00 2001 From: Rand Deeb Date: Wed, 28 Feb 2024 18:54:48 +0300 Subject: [PATCH 0696/2290] net: ice: Fix potential NULL pointer dereference in ice_bridge_setlink() [ Upstream commit 06e456a05d669ca30b224b8ed962421770c1496c ] The function ice_bridge_setlink() may encounter a NULL pointer dereference if nlmsg_find_attr() returns NULL and br_spec is dereferenced subsequently in nla_for_each_nested(). To address this issue, add a check to ensure that br_spec is not NULL before proceeding with the nested attribute iteration. Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink") Signed-off-by: Rand Deeb Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ab46cfca4028d..3117f65253b37 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7681,6 +7681,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; -- GitLab From 63a3c1f3c9ecc654d851e7906d05334cd0c236e2 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Mon, 19 Feb 2024 10:08:43 +0100 Subject: [PATCH 0697/2290] igc: avoid returning frame twice in XDP_REDIRECT [ Upstream commit ef27f655b438bed4c83680e4f01e1cde2739854b ] When a frame can not be transmitted in XDP_REDIRECT (e.g. due to a full queue), it is necessary to free it by calling xdp_return_frame_rx_napi. However, this is the responsibility of the caller of the ndo_xdp_xmit (see for example bq_xmit_all in kernel/bpf/devmap.c) and thus calling it inside igc_xdp_xmit (which is the ndo_xdp_xmit of the igc driver) as well will lead to memory corruption. In fact, bq_xmit_all expects that it can return all frames after the last successfully transmitted one. Therefore, break for the first not transmitted frame, but do not call xdp_return_frame_rx_napi in igc_xdp_xmit. This is equally implemented in other Intel drivers such as the igb. There are two alternatives to this that were rejected: 1. Return num_frames as all the frames would have been transmitted and release them inside igc_xdp_xmit. While it might work technically, it is not what the return value is meant to represent (i.e. the number of SUCCESSFULLY transmitted packets). 2. Rework kernel/bpf/devmap.c and all drivers to support non-consecutively dropped packets. Besides being complex, it likely has a negative performance impact without a significant gain since it is anyway unlikely that the next frame can be transmitted if the previous one was dropped. The memory corruption can be reproduced with the following script which leads to a kernel panic after a few seconds. It basically generates more traffic than a i225 NIC can transmit and pushes it via XDP_REDIRECT from a virtual interface to the physical interface where frames get dropped. #!/bin/bash INTERFACE=enp4s0 INTERFACE_IDX=`cat /sys/class/net/$INTERFACE/ifindex` sudo ip link add dev veth1 type veth peer name veth2 sudo ip link set up $INTERFACE sudo ip link set up veth1 sudo ip link set up veth2 cat << EOF > redirect.bpf.c SEC("prog") int redirect(struct xdp_md *ctx) { return bpf_redirect($INTERFACE_IDX, 0); } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c redirect.bpf.c -o redirect.bpf.o sudo ip link set veth2 xdp obj redirect.bpf.o cat << EOF > pass.bpf.c SEC("prog") int pass(struct xdp_md *ctx) { return XDP_PASS; } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c pass.bpf.c -o pass.bpf.o sudo ip link set $INTERFACE xdp obj pass.bpf.o cat << EOF > trafgen.cfg { /* Ethernet Header */ 0xe8, 0x6a, 0x64, 0x41, 0xbf, 0x46, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 1, 1, # IP Src - adapt as needed 10, 0, 1, 2, # IP Dest - adapt as needed const16(6666), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } EOF sudo trafgen -i trafgen.cfg -b3000MB -o veth1 --cpp Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action") Signed-off-by: Florian Kauer Reviewed-by: Maciej Fijalkowski Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4b6f882b380dc..e052f49cc08d7 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6330,7 +6330,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; @@ -6346,16 +6346,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); - drops = 0; + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -6363,7 +6362,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, -- GitLab From 394334fe2ae3b9f1e2332b873857e84cb28aac18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Mar 2024 14:48:00 +0000 Subject: [PATCH 0698/2290] net/ipv6: avoid possible UAF in ip6_route_mpath_notify() [ Upstream commit 685f7d531264599b3f167f1e94bbd22f120e5fab ] syzbot found another use-after-free in ip6_route_mpath_notify() [1] Commit f7225172f25a ("net/ipv6: prevent use after free in ip6_route_mpath_notify") was not able to fix the root cause. We need to defer the fib6_info_release() calls after ip6_route_mpath_notify(), in the cleanup phase. [1] BUG: KASAN: slab-use-after-free in rt6_fill_node+0x1460/0x1ac0 Read of size 4 at addr ffff88809a07fc64 by task syz-executor.2/23037 CPU: 0 PID: 23037 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-01035-gea7f3cfaa588 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0x167/0x540 mm/kasan/report.c:488 kasan_report+0x142/0x180 mm/kasan/report.c:601 rt6_fill_node+0x1460/0x1ac0 inet6_rt_notify+0x13b/0x290 net/ipv6/route.c:6184 ip6_route_mpath_notify net/ipv6/route.c:5198 [inline] ip6_route_multipath_add net/ipv6/route.c:5404 [inline] inet6_rtm_newroute+0x1d0f/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f73dd87dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f73de6550c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f73dd9ac050 RCX: 00007f73dd87dda9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005 RBP: 00007f73dd8ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f73dd9ac050 R15: 00007ffdbdeb7858 Allocated by task 23037: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:372 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:389 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slub.c:3981 [inline] __kmalloc+0x22e/0x490 mm/slub.c:3994 kmalloc include/linux/slab.h:594 [inline] kzalloc include/linux/slab.h:711 [inline] fib6_info_alloc+0x2e/0xf0 net/ipv6/ip6_fib.c:155 ip6_route_info_create+0x445/0x12b0 net/ipv6/route.c:3758 ip6_route_multipath_add net/ipv6/route.c:5298 [inline] inet6_rtm_newroute+0x744/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 Freed by task 16: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x4e/0x60 mm/kasan/generic.c:640 poison_slab_object+0xa6/0xe0 mm/kasan/common.c:241 __kasan_slab_free+0x34/0x70 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kfree+0x14a/0x380 mm/slub.c:4409 rcu_do_batch kernel/rcu/tree.c:2190 [inline] rcu_core+0xd76/0x1810 kernel/rcu/tree.c:2465 __do_softirq+0x2bb/0x942 kernel/softirq.c:553 Last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 __kasan_record_aux_stack+0xae/0x100 mm/kasan/generic.c:586 __call_rcu_common kernel/rcu/tree.c:2715 [inline] call_rcu+0x167/0xa80 kernel/rcu/tree.c:2829 fib6_info_release include/net/ip6_fib.h:341 [inline] ip6_route_multipath_add net/ipv6/route.c:5344 [inline] inet6_rtm_newroute+0x114d/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 The buggy address belongs to the object at ffff88809a07fc00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 100 bytes inside of freed 512-byte region [ffff88809a07fc00, ffff88809a07fe00) The buggy address belongs to the physical page: page:ffffea0002681f00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x9a07c head:ffffea0002681f00 order:2 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888014c41c80 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 2, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 23028, tgid 23027 (syz-executor.4), ts 2340253595219, free_ts 2339107097036 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1ea/0x210 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0x33ea/0x3580 mm/page_alloc.c:3311 __alloc_pages+0x255/0x680 mm/page_alloc.c:4567 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page+0x5f/0x160 mm/slub.c:2190 allocate_slab mm/slub.c:2354 [inline] new_slab+0x84/0x2f0 mm/slub.c:2407 ___slab_alloc+0xd17/0x13e0 mm/slub.c:3540 __slab_alloc mm/slub.c:3625 [inline] __slab_alloc_node mm/slub.c:3678 [inline] slab_alloc_node mm/slub.c:3850 [inline] __do_kmalloc_node mm/slub.c:3980 [inline] __kmalloc+0x2e0/0x490 mm/slub.c:3994 kmalloc include/linux/slab.h:594 [inline] kzalloc include/linux/slab.h:711 [inline] new_dir fs/proc/proc_sysctl.c:956 [inline] get_subdir fs/proc/proc_sysctl.c:1000 [inline] sysctl_mkdir_p fs/proc/proc_sysctl.c:1295 [inline] __register_sysctl_table+0xb30/0x1440 fs/proc/proc_sysctl.c:1376 neigh_sysctl_register+0x416/0x500 net/core/neighbour.c:3859 devinet_sysctl_register+0xaf/0x1f0 net/ipv4/devinet.c:2644 inetdev_init+0x296/0x4d0 net/ipv4/devinet.c:286 inetdev_event+0x338/0x15c0 net/ipv4/devinet.c:1555 notifier_call_chain+0x18f/0x3b0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:1987 [inline] call_netdevice_notifiers net/core/dev.c:2001 [inline] register_netdevice+0x15b2/0x1a20 net/core/dev.c:10340 br_dev_newlink+0x27/0x100 net/bridge/br_netlink.c:1563 rtnl_newlink_create net/core/rtnetlink.c:3497 [inline] __rtnl_newlink net/core/rtnetlink.c:3717 [inline] rtnl_newlink+0x158f/0x20a0 net/core/rtnetlink.c:3730 page last free pid 11583 tgid 11583 stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1140 [inline] free_unref_page_prepare+0x968/0xa90 mm/page_alloc.c:2346 free_unref_page+0x37/0x3f0 mm/page_alloc.c:2486 kasan_depopulate_vmalloc_pte+0x74/0x90 mm/kasan/shadow.c:415 apply_to_pte_range mm/memory.c:2619 [inline] apply_to_pmd_range mm/memory.c:2663 [inline] apply_to_pud_range mm/memory.c:2699 [inline] apply_to_p4d_range mm/memory.c:2735 [inline] __apply_to_page_range+0x8ec/0xe40 mm/memory.c:2769 kasan_release_vmalloc+0x9a/0xb0 mm/kasan/shadow.c:532 __purge_vmap_area_lazy+0x163f/0x1a10 mm/vmalloc.c:1770 drain_vmap_area_work+0x40/0xd0 mm/vmalloc.c:1804 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x913/0x1420 kernel/workqueue.c:2706 worker_thread+0xa5f/0x1000 kernel/workqueue.c:2787 kthread+0x2ef/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Memory state around the buggy address: ffff88809a07fb00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88809a07fb80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88809a07fc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88809a07fc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88809a07fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 3b1137fe7482 ("net: ipv6: Change notifications for multipath add to RTA_MULTIPATH") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240303144801.702646-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/route.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7f65dc750feb8..887599d351b8d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5335,19 +5335,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); - fib6_info_release(nh->fib6_info); - - if (!err) { - /* save reference to last route successfully inserted */ - rt_last = nh->fib6_info; - - /* save reference to first route for notification */ - if (!rt_notif) - rt_notif = nh->fib6_info; - } - /* nh->fib6_info is used or freed at this point, reset to NULL*/ - nh->fib6_info = NULL; if (err) { if (replace && nhn) NL_SET_ERR_MSG_MOD(extack, @@ -5355,6 +5343,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = nh; goto add_errout; } + /* save reference to last route successfully inserted */ + rt_last = nh->fib6_info; + + /* save reference to first route for notification */ + if (!rt_notif) + rt_notif = nh->fib6_info; /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, @@ -5415,8 +5409,7 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->fib6_info) - fib6_info_release(nh->fib6_info); + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } -- GitLab From 3420b3ff1ff489c177ea1cb7bd9fbbc4e9a0be95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 5 Mar 2024 22:31:32 +0100 Subject: [PATCH 0699/2290] cpumap: Zero-initialise xdp_rxq_info struct before running XDP program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2487007aa3b9fafbd2cb14068f49791ce1d7ede5 ] When running an XDP program that is attached to a cpumap entry, we don't initialise the xdp_rxq_info data structure being used in the xdp_buff that backs the XDP program invocation. Tobias noticed that this leads to random values being returned as the xdp_md->rx_queue_index value for XDP programs running in a cpumap. This means we're basically returning the contents of the uninitialised memory, which is bad. Fix this by zero-initialising the rxq data structure before running the XDP program. Fixes: 9216477449f3 ("bpf: cpumap: Add the possibility to attach an eBPF program to cpumap") Reported-by: Tobias Böhm Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240305213132.11955-1-toke@redhat.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- kernel/bpf/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 08a8e81027289..0508937048137 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -222,7 +222,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) { - struct xdp_rxq_info rxq; + struct xdp_rxq_info rxq = {}; struct xdp_buff xdp; int i, nframes = 0; -- GitLab From 7faff12e828d9c643028259c0f7a2b411cd96607 Mon Sep 17 00:00:00 2001 From: "Tobias Jakobi (Compleo)" Date: Mon, 4 Mar 2024 16:41:35 +0100 Subject: [PATCH 0700/2290] net: dsa: microchip: fix register write order in ksz8_ind_write8() [ Upstream commit b7fb7729c94fb2d23c79ff44f7a2da089c92d81c ] This bug was noticed while re-implementing parts of the kernel driver in userspace using spidev. The goal was to enable some of the errata workarounds that Microchip describes in their errata sheet [1]. Both the errata sheet and the regular datasheet of e.g. the KSZ8795 imply that you need to do this for indirect register accesses: - write a 16-bit value to a control register pair (this value consists of the indirect register table, and the offset inside the table) - either read or write an 8-bit value from the data storage register (indicated by REG_IND_BYTE in the kernel) The current implementation has the order swapped. It can be proven, by reading back some indirect register with known content (the EEE register modified in ksz8_handle_global_errata() is one of these), that this implementation does not work. Private discussion with Oleksij Rempel of Pengutronix has revealed that the workaround was apparantly never tested on actual hardware. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ProductDocuments/Errata/KSZ87xx-Errata-DS80000687C.pdf Signed-off-by: Tobias Jakobi (Compleo) Reviewed-by: Oleksij Rempel Fixes: 7b6e6235b664 ("net: dsa: microchip: ksz8795: handle eee specif erratum") Link: https://lore.kernel.org/r/20240304154135.161332-1-tobias.jakobi.compleo@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/microchip/ksz8795.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index c63e082dc57dc..934600eccbaf2 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) mutex_lock(&dev->alu_mutex); ctrl_addr = IND_ACC_TABLE(table) | addr; - ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); if (!ret) - ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); mutex_unlock(&dev->alu_mutex); -- GitLab From 998fd719e6d6468b930ac0c44552ea9ff8b07b80 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Tue, 5 Mar 2024 08:13:08 +0800 Subject: [PATCH 0701/2290] net/rds: fix WARNING in rds_conn_connect_if_down [ Upstream commit c055fc00c07be1f0df7375ab0036cebd1106ed38 ] If connection isn't established yet, get_mr() will fail, trigger connection after get_mr(). Fixes: 584a8279a44a ("RDS: RDMA: return appropriate error on rdma map failures") Reported-and-tested-by: syzbot+d4faee732755bba9838e@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/rdma.c | 3 +++ net/rds/send.c | 6 +----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index fba82d36593ad..a4e3c5de998be 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, kfree(sg); } ret = PTR_ERR(trans_private); + /* Trigger connection so that its ready for the next retry */ + if (ret == -ENODEV) + rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/rds/send.c b/net/rds/send.c index 0c5504068e3c2..a4ba45c430d81 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1314,12 +1314,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); - if (ret) { - /* Trigger connection so that its ready for the next retry */ - if (ret == -EAGAIN) - rds_conn_connect_if_down(conn); + if (ret) goto out; - } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", -- GitLab From bce83144ba7ec7dd231c13aa065bc6900efe34f9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Mar 2024 13:38:15 +0100 Subject: [PATCH 0702/2290] netfilter: nft_ct: fix l3num expectations with inet pseudo family [ Upstream commit 99993789966a6eb4f1295193dc543686899892d3 ] Following is rejected but should be allowed: table inet t { ct expectation exp1 { [..] l3proto ip Valid combos are: table ip t, l3proto ip table ip6 t, l3proto ip6 table inet t, l3proto ip OR l3proto ip6 Disallow inet pseudeo family, the l3num must be a on-wire protocol known to conntrack. Retain NFPROTO_INET case to make it clear its rejected intentionally rather as oversight. Fixes: 8059918a1377 ("netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_ct.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 8df7564f0611e..2bfe3cdfbd581 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1237,14 +1237,13 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, switch (priv->l3num) { case NFPROTO_IPV4: case NFPROTO_IPV6: - if (priv->l3num != ctx->family) - return -EINVAL; + if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET) + break; - fallthrough; - case NFPROTO_INET: - break; + return -EINVAL; + case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */ default: - return -EOPNOTSUPP; + return -EAFNOSUPPORT; } priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); -- GitLab From 39001e3c42000e7c2038717af0d33c32319ad591 Mon Sep 17 00:00:00 2001 From: Lena Wang Date: Tue, 5 Mar 2024 11:38:55 +0000 Subject: [PATCH 0703/2290] netfilter: nf_conntrack_h323: Add protection for bmp length out of range [ Upstream commit 767146637efc528b5e3d31297df115e85a2fd362 ] UBSAN load reports an exception of BRK#5515 SHIFT_ISSUE:Bitwise shifts that are out of bounds for their data type. vmlinux get_bitmap(b=75) + 712 vmlinux decode_seq(bs=0xFFFFFFD008037000, f=0xFFFFFFD008037018, level=134443100) + 1956 vmlinux decode_choice(base=0xFFFFFFD0080370F0, level=23843636) + 1216 vmlinux decode_seq(f=0xFFFFFFD0080371A8, level=134443500) + 812 vmlinux decode_choice(base=0xFFFFFFD008037280, level=0) + 1216 vmlinux DecodeRasMessage() + 304 vmlinux ras_help() + 684 vmlinux nf_confirm() + 188 Due to abnormal data in skb->data, the extension bitmap length exceeds 32 when decoding ras message then uses the length to make a shift operation. It will change into negative after several loop. UBSAN load could detect a negative shift as an undefined behaviour and reports exception. So we add the protection to avoid the length exceeding 32. Or else it will return out of range error and stop decoding. Fixes: 5e35941d9901 ("[NETFILTER]: Add H.323 conntrack/NAT helper") Signed-off-by: Lena Wang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_h323_asn1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index e697a824b0018..540d97715bd23 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get fields bitmap */ if (nf_h323_error_boundary(bs, 0, f->sz)) return H323_ERROR_BOUND; + if (f->sz > 32) + return H323_ERROR_RANGE; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, bmp2_len = get_bits(bs, 7) + 1; if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; + if (bmp2_len > 32) + return H323_ERROR_RANGE; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) -- GitLab From 6e49f3ac43e293f86dc0c630de2354a73eec914e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 6 Mar 2024 13:31:38 +0800 Subject: [PATCH 0704/2290] erofs: apply proper VMA alignment for memory mapped files on THP [ Upstream commit 4127caee89612a84adedd78c9453089138cd5afe ] There are mainly two reasons that thp_get_unmapped_area() should be used for EROFS as other filesystems: - It's needed to enable PMD mappings as a FSDAX filesystem, see commit 74d2fad1334d ("thp, dax: add thp_get_unmapped_area for pmd mappings"); - It's useful together with large folios and CONFIG_READ_ONLY_THP_FOR_FS which enable THPs for mmapped files (e.g. shared libraries) even without FSDAX. See commit 1854bc6e2420 ("mm/readahead: Align file mappings for non-DAX"). Fixes: 06252e9ce05b ("erofs: dax support for non-tailpacking regular file") Fixes: ce529cc25b18 ("erofs: enable large folios for iomap mode") Fixes: e6687b89225e ("erofs: enable large folios for fscache mode") Reviewed-by: Jingbo Xu Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240306053138.2240206-1-hsiangkao@linux.alibaba.com Signed-off-by: Sasha Levin --- fs/erofs/data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index b32801d716f89..9d20e5d23ae0b 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -440,4 +440,5 @@ const struct file_operations erofs_file_fops = { .read_iter = erofs_file_read_iter, .mmap = erofs_file_mmap, .splice_read = generic_file_splice_read, + .get_unmapped_area = thp_get_unmapped_area, }; -- GitLab From dec82a8fc45c6ce494c2cb31f001a2aadb132b57 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:35 +0800 Subject: [PATCH 0705/2290] netrom: Fix a data-race around sysctl_netrom_default_path_quality [ Upstream commit 958d6145a6d9ba9e075c921aead8753fb91c9101 ] We need to protect the reader reading sysctl_netrom_default_path_quality because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index baea3cbd76ca5..6f709fdffc11f 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; - nr_neigh->quality = sysctl_netrom_default_path_quality; + nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); nr_neigh->locked = 0; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; -- GitLab From e439607291c082332e1e35baf8faf8552e6bcb4a Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:36 +0800 Subject: [PATCH 0706/2290] netrom: Fix a data-race around sysctl_netrom_obsolescence_count_initialiser [ Upstream commit cfd9f4a740f772298308b2e6070d2c744fb5cf79 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 6f709fdffc11f..b8ddd8048f352 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (ax25 != NULL) { ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, ax25->ax25_dev->dev, 0, - sysctl_netrom_obsolescence_count_initialiser); + READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); if (ret) return ret; } -- GitLab From a47d68d777b41862757b7e3051f2d46d6e25f87b Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:37 +0800 Subject: [PATCH 0707/2290] netrom: Fix data-races around sysctl_netrom_network_ttl_initialiser [ Upstream commit 119cae5ea3f9e35cdada8e572cc067f072fa825a ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_dev.c | 2 +- net/netrom/nr_out.c | 2 +- net/netrom/nr_subr.c | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 3aaac4a22b387..2c34389c3ce6f 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; - *buff++ = sysctl_netrom_network_ttl_initialiser; + *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 44929657f5b71..5e531394a724b 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index e2d2af924cff4..c3bbd5880850b 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype) *dptr++ = nr->my_id; *dptr++ = frametype; *dptr++ = nr->window; - if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser; + if (nr->bpqext) + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); break; case NR_DISCREQ: @@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (mine) { *dptr++ = 0; -- GitLab From fed835d415766a94fc0246dcebc3af4c03fe9941 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:38 +0800 Subject: [PATCH 0708/2290] netrom: Fix a data-race around sysctl_netrom_transport_timeout [ Upstream commit 60a7a152abd494ed4f69098cf0f322e6bb140612 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec5747969f964..3c6567af2ba47 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -453,7 +453,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr_init_timers(sk); nr->t1 = - msecs_to_jiffies(sysctl_netrom_transport_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); nr->n2 = -- GitLab From d28fa5f0e6c1554e2829f73a6a276c9a49689d04 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:39 +0800 Subject: [PATCH 0709/2290] netrom: Fix a data-race around sysctl_netrom_transport_maximum_tries [ Upstream commit e799299aafed417cc1f32adccb2a0e5268b3f6d5 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3c6567af2ba47..be404ace98786 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -457,7 +457,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t2 = msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); nr->n2 = - msecs_to_jiffies(sysctl_netrom_transport_maximum_tries); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = msecs_to_jiffies(sysctl_netrom_transport_busy_delay); nr->idle = -- GitLab From 5deaef2bf56456c71b841e0dfde1bee2fd88c4eb Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:40 +0800 Subject: [PATCH 0710/2290] netrom: Fix a data-race around sysctl_netrom_transport_acknowledge_delay [ Upstream commit 806f462ba9029d41aadf8ec93f2f99c5305deada ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index be404ace98786..7428ea436e318 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -455,7 +455,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t1 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = - msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay)); nr->n2 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = -- GitLab From 5ac337138272d26d6d3d4f71bc5b1a87adf8b24d Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:41 +0800 Subject: [PATCH 0711/2290] netrom: Fix a data-race around sysctl_netrom_transport_busy_delay [ Upstream commit 43547d8699439a67b78d6bb39015113f7aa360fd ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7428ea436e318..ee6621c0d2e45 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -459,7 +459,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->n2 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = - msecs_to_jiffies(sysctl_netrom_transport_busy_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); nr->window = sysctl_netrom_transport_requested_window_size; -- GitLab From 46803b776d869b0c36041828a83c4f7da2dfa03b Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:42 +0800 Subject: [PATCH 0712/2290] netrom: Fix a data-race around sysctl_netrom_transport_requested_window_size [ Upstream commit a2e706841488f474c06e9b33f71afc947fb3bf56 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ee6621c0d2e45..88941b66631fc 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -462,7 +462,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); - nr->window = sysctl_netrom_transport_requested_window_size; + nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; nr->state = NR_STATE_0; -- GitLab From 498f1d6da11ed6d736d655a2db14ee2d9569eecb Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:43 +0800 Subject: [PATCH 0713/2290] netrom: Fix a data-race around sysctl_netrom_transport_no_activity_timeout [ Upstream commit f99b494b40431f0ca416859f2345746199398e2b ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 88941b66631fc..5472e79cde830 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -461,7 +461,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t4 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = - msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout)); nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; -- GitLab From 4c02b9ccbb11862ee39850b2b285664cd579b039 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:44 +0800 Subject: [PATCH 0714/2290] netrom: Fix a data-race around sysctl_netrom_routing_control [ Upstream commit b5dffcb8f71bdd02a4e5799985b51b12f4eeaf76 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b8ddd8048f352..89e12e6eea2ef 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) return ret; } - if (!sysctl_netrom_routing_control && ax25 != NULL) + if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) return 0; /* Its Time-To-Live has expired */ -- GitLab From cfedde3058bf976f2f292c0a236edd43afcdab57 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:45 +0800 Subject: [PATCH 0715/2290] netrom: Fix a data-race around sysctl_netrom_link_fails_count [ Upstream commit bc76645ebdd01be9b9994dac39685a3d0f6f7985 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 89e12e6eea2ef..70480869ad1c5 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) nr_neigh->ax25 = NULL; ax25_cb_put(ax25); - if (++nr_neigh->failed < sysctl_netrom_link_fails_count) { + if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { nr_neigh_put(nr_neigh); return; } -- GitLab From 43464808669ba9d23996f0b6d875450191687caf Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:46 +0800 Subject: [PATCH 0716/2290] netrom: Fix data-races around sysctl_net_busy_read [ Upstream commit d380ce70058a4ccddc3e5f5c2063165dc07672c6 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- net/netrom/nr_in.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5472e79cde830..f0879295de110 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) * G8PZT's Xrouter which is sending packets with command type 7 * as an extension of the protocol. */ - if (sysctl_netrom_reset_circuit && + if (READ_ONCE(sysctl_netrom_reset_circuit) && (frametype != NR_RESET || flags != 0)) nr_transmit_reset(skb, 1); diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 2f084b6f69d7e..97944db6b5ac6 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; -- GitLab From 51c4435688ebedb498dbbf0732a9d5dff18d908d Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Mon, 9 Oct 2023 11:32:52 +0200 Subject: [PATCH 0717/2290] KVM: s390: add stat counter for shadow gmap events [ Upstream commit c3235e2dd6956448a562d6b1112205eeebc8ab43 ] The shadow gmap tracks memory of nested guests (guest-3). In certain scenarios, the shadow gmap needs to be rebuilt, which is a costly operation since it involves a SIE exit into guest-1 for every entry in the respective shadow level. Add kvm stat counters when new shadow structures are created at various levels. Also add a counter gmap_shadow_create when a completely fresh shadow gmap is created as well as a counter gmap_shadow_reuse when an existing gmap is being reused. Note that when several levels are shadowed at once, counters on all affected levels will be increased. Also note that not all page table levels need to be present and a ASCE can directly point to e.g. a segment table. In this case, a new segment table will always be equivalent to a new shadow gmap and hence will be counted as gmap_shadow_create and not as gmap_shadow_segment. Signed-off-by: Nico Boehr Reviewed-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Signed-off-by: Janosch Frank Link: https://lore.kernel.org/r/20231009093304.2555344-2-nrb@linux.ibm.com Message-Id: <20231009093304.2555344-2-nrb@linux.ibm.com> Stable-dep-of: fe752331d4b3 ("KVM: s390: vsie: fix race during shadow creation") Signed-off-by: Sasha Levin --- arch/s390/include/asm/kvm_host.h | 7 +++++++ arch/s390/kvm/gaccess.c | 7 +++++++ arch/s390/kvm/kvm-s390.c | 9 ++++++++- arch/s390/kvm/vsie.c | 5 ++++- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b1e98a9ed152b..09abf000359f8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -777,6 +777,13 @@ struct kvm_vm_stat { u64 inject_service_signal; u64 inject_virtio; u64 aen_forward; + u64 gmap_shadow_create; + u64 gmap_shadow_reuse; + u64 gmap_shadow_r1_entry; + u64 gmap_shadow_r2_entry; + u64 gmap_shadow_r3_entry; + u64 gmap_shadow_sg_entry; + u64 gmap_shadow_pg_entry; }; struct kvm_arch_memory_slot { diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38d364..3beceff5f1c09 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1273,6 +1273,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, unsigned long *pgt, int *dat_protection, int *fake) { + struct kvm *kvm; struct gmap *parent; union asce asce; union vaddress vaddr; @@ -1281,6 +1282,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, *fake = 0; *dat_protection = 0; + kvm = sg->private; parent = sg->parent; vaddr.addr = saddr; asce.val = sg->orig_asce; @@ -1341,6 +1343,7 @@ shadow_r2t: rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r1_entry++; } fallthrough; case ASCE_TYPE_REGION2: { @@ -1369,6 +1372,7 @@ shadow_r3t: rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r2_entry++; } fallthrough; case ASCE_TYPE_REGION3: { @@ -1406,6 +1410,7 @@ shadow_sgt: rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r3_entry++; } fallthrough; case ASCE_TYPE_SEGMENT: { @@ -1439,6 +1444,7 @@ shadow_pgt: rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_sg_entry++; } } /* Return the parent address of the page table */ @@ -1509,6 +1515,7 @@ shadow_page: pte.p |= dat_protection; if (!rc) rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); + vcpu->kvm->stat.gmap_shadow_pg_entry++; ipte_unlock(vcpu->kvm); mmap_read_unlock(sg->mm); return rc; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f604946ab2c85..348d49268a7ec 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_COUNTER(VM, inject_pfault_done), STATS_DESC_COUNTER(VM, inject_service_signal), STATS_DESC_COUNTER(VM, inject_virtio), - STATS_DESC_COUNTER(VM, aen_forward) + STATS_DESC_COUNTER(VM, aen_forward), + STATS_DESC_COUNTER(VM, gmap_shadow_reuse), + STATS_DESC_COUNTER(VM, gmap_shadow_create), + STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry), }; const struct kvm_stats_header kvm_vm_stats_header = { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 740f8b56e63f9..b2dbf08a961e5 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1206,8 +1206,10 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, * we're holding has been unshadowed. If the gmap is still valid, * we can safely reuse it. */ - if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) + if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) { + vcpu->kvm->stat.gmap_shadow_reuse++; return 0; + } /* release the old shadow - if any, and mark the prefix as unmapped */ release_gmap_shadow(vsie_page); @@ -1215,6 +1217,7 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, if (IS_ERR(gmap)) return PTR_ERR(gmap); gmap->private = vcpu->kvm; + vcpu->kvm->stat.gmap_shadow_create++; WRITE_ONCE(vsie_page->gmap, gmap); return 0; } -- GitLab From 5df3b81a567eb565029563f26f374ae3803a1dfc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 20 Dec 2023 13:53:17 +0100 Subject: [PATCH 0718/2290] KVM: s390: vsie: fix race during shadow creation [ Upstream commit fe752331d4b361d43cfd0b89534b4b2176057c32 ] Right now it is possible to see gmap->private being zero in kvm_s390_vsie_gmap_notifier resulting in a crash. This is due to the fact that we add gmap->private == kvm after creation: static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { [...] gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); if (IS_ERR(gmap)) return PTR_ERR(gmap); gmap->private = vcpu->kvm; Let children inherit the private field of the parent. Reported-by: Marc Hartmayer Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") Cc: Cc: David Hildenbrand Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Signed-off-by: Christian Borntraeger Link: https://lore.kernel.org/r/20231220125317.4258-1-borntraeger@linux.ibm.com Signed-off-by: Sasha Levin --- arch/s390/kvm/vsie.c | 1 - arch/s390/mm/gmap.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index b2dbf08a961e5..d90c818a9ae71 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1216,7 +1216,6 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); if (IS_ERR(gmap)) return PTR_ERR(gmap); - gmap->private = vcpu->kvm; vcpu->kvm->stat.gmap_shadow_create++; WRITE_ONCE(vsie_page->gmap, gmap); return 0; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 243f673fa6515..662cf23a1b44b 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1675,6 +1675,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, return ERR_PTR(-ENOMEM); new->mm = parent->mm; new->parent = gmap_get(parent); + new->private = parent->private; new->orig_asce = asce; new->edat_level = edat_level; new->initialized = false; -- GitLab From 35a0d43cee095e590982c290fde6dabef0623769 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jan 2024 10:11:30 +0100 Subject: [PATCH 0719/2290] ASoC: codecs: wcd938x: fix headphones volume controls [ Upstream commit 4d0e8bdfa4a57099dc7230952a460903f2e2f8de ] The lowest headphones volume setting does not mute so the leave the TLV mute flag unset. This is specifically needed to let the sound server use the lowest gain setting. Fixes: c03226ba15fe ("ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR") Cc: # 6.5 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240122091130.27463-1-johan+linaro@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index e80be4e4fa8b4..555b74e7172d8 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -210,7 +210,7 @@ struct wcd938x_priv { }; static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800); -static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000); +static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000); struct wcd938x_mbhc_zdet_param { -- GitLab From 66d663da8654099591a3aa78cf92d48410e930f7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 17 Jan 2024 08:41:52 +0530 Subject: [PATCH 0720/2290] drm/amd/display: Fix uninitialized variable usage in core_link_ 'read_dpcd() & write_dpcd()' functions [ Upstream commit a58371d632ebab9ea63f10893a6b6731196b6f8d ] The 'status' variable in 'core_link_read_dpcd()' & 'core_link_write_dpcd()' was uninitialized. Thus, initializing 'status' variable to 'DC_ERROR_UNEXPECTED' by default. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:226 core_link_read_dpcd() error: uninitialized symbol 'status'. drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:248 core_link_write_dpcd() error: uninitialized symbol 'status'. Cc: stable@vger.kernel.org Cc: Jerry Zuo Cc: Jun Lei Cc: Wayne Lin Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c index af110bf9470fa..aefca9756dbe8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c @@ -202,7 +202,7 @@ enum dc_status core_link_read_dpcd( uint32_t extended_size; /* size of the remaining partitioned address space */ uint32_t size_left_to_read; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; /* size of the next partition to be read from */ uint32_t partition_size; uint32_t data_index = 0; @@ -231,7 +231,7 @@ enum dc_status core_link_write_dpcd( { uint32_t partition_size; uint32_t data_index = 0; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; while (size) { partition_size = dpcd_get_next_partition_size(address, size); -- GitLab From 4e2f0cae0bfe60d99e733cddf98de017a482975f Mon Sep 17 00:00:00 2001 From: Wentao Jia Date: Tue, 14 Mar 2023 08:36:08 +0200 Subject: [PATCH 0721/2290] nfp: flower: add goto_chain_index for ct entry [ Upstream commit 3e44d19934b92398785b3ffc2353b9eba264140e ] The chain_index has different means in pre ct entry and post ct entry. In pre ct entry, it means chain index, but in post ct entry, it means goto chain index, it is confused. chain_index and goto_chain_index may be present in one flow rule, It cannot be distinguished by one field chain_index, both chain_index and goto_chain_index are required in the follow-up patch to support multiple ct zones Another field goto_chain_index is added to record the goto chain index. If no goto action in post ct entry, goto_chain_index is 0. Signed-off-by: Wentao Jia Acked-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: Jakub Kicinski Stable-dep-of: cefa98e806fd ("nfp: flower: add hardware offload check for post ct entry") Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 8 ++++++-- drivers/net/ethernet/netronome/nfp/flower/conntrack.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 7af03b45555dd..da7a47416a208 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1243,7 +1243,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, /* Checks that the chain_index of the filter matches the * chain_index of the GOTO action. */ - if (post_ct_entry->chain_index != pre_ct_entry->chain_index) + if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index) return -EINVAL; err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); @@ -1776,7 +1776,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, if (IS_ERR(ct_entry)) return PTR_ERR(ct_entry); ct_entry->type = CT_TYPE_PRE_CT; - ct_entry->chain_index = ct_goto->chain_index; + ct_entry->chain_index = flow->common.chain_index; + ct_entry->goto_chain_index = ct_goto->chain_index; list_add(&ct_entry->list_node, &zt->pre_ct_list); zt->pre_ct_count++; @@ -1799,6 +1800,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, struct nfp_fl_ct_zone_entry *zt; bool wildcarded = false; struct flow_match_ct ct; + struct flow_action_entry *ct_goto; flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { @@ -1823,6 +1825,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, ct_entry->type = CT_TYPE_POST_CT; ct_entry->chain_index = flow->common.chain_index; + ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); + ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0; list_add(&ct_entry->list_node, &zt->post_ct_list); zt->post_ct_count++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index 762c0b36e269b..9440ab776ecea 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -112,6 +112,7 @@ enum nfp_nfp_layer_name { * @cookie: Flow cookie, same as original TC flow, used as key * @list_node: Used by the list * @chain_index: Chain index of the original flow + * @goto_chain_index: goto chain index of the flow * @netdev: netdev structure. * @type: Type of pre-entry from enum ct_entry_type * @zt: Reference to the zone table this belongs to @@ -125,6 +126,7 @@ struct nfp_fl_ct_flow_entry { unsigned long cookie; struct list_head list_node; u32 chain_index; + u32 goto_chain_index; enum ct_entry_type type; struct net_device *netdev; struct nfp_fl_ct_zone_entry *zt; -- GitLab From b0b89b470a863fb84360a89365ee69612d2863b5 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:08 +0200 Subject: [PATCH 0722/2290] nfp: flower: add hardware offload check for post ct entry [ Upstream commit cefa98e806fd4e2a5e2047457a11ae5f17b8f621 ] The nfp offload flow pay will not allocate a mask id when the out port is openvswitch internal port. This is because these flows are used to configure the pre_tun table and are never actually send to the firmware as an add-flow message. When a tc rule which action contains ct and the post ct entry's out port is openvswitch internal port, the merge offload flow pay with the wrong mask id of 0 will be send to the firmware. Actually, the nfp can not support hardware offload for this situation, so return EOPNOTSUPP. Fixes: bd0fe7f96a3c ("nfp: flower-ct: add zone table entry when handling pre/post_ct flows") CC: stable@vger.kernel.org # 5.14+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-2-louis.peens@corigine.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/netronome/nfp/flower/conntrack.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index da7a47416a208..497766ecdd91d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1797,10 +1797,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_fl_ct_flow_entry *ct_entry; + struct flow_action_entry *ct_goto; struct nfp_fl_ct_zone_entry *zt; + struct flow_action_entry *act; bool wildcarded = false; struct flow_match_ct ct; - struct flow_action_entry *ct_goto; + int i; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_MIRRED_INGRESS: + if (act->dev->rtnl_link_ops && + !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: out port is openvswitch internal port"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + } flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { -- GitLab From f0c349708290f2632d3103b687c7998945ed4dff Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 Jan 2024 09:58:39 +0100 Subject: [PATCH 0723/2290] readahead: avoid multiple marked readahead pages [ Upstream commit ab4443fe3ca6298663a55c4a70efc6c3ce913ca6 ] ra_alloc_folio() marks a page that should trigger next round of async readahead. However it rounds up computed index to the order of page being allocated. This can however lead to multiple consecutive pages being marked with readahead flag. Consider situation with index == 1, mark == 1, order == 0. We insert order 0 page at index 1 and mark it. Then we bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page at index 2 is marked as well. Then we bump order to 2, index is incremented to 4, mark gets rounded to 4 so page at index 4 is marked as well. The fact that multiple pages get marked within a single readahead window confuses the readahead logic and results in readahead window being trimmed back to 1. This situation is triggered in particular when maximum readahead window size is not a power of two (in the observed case it was 768 KB) and as a result sequential read throughput suffers. Fix the problem by rounding 'mark' down instead of up. Because the index is naturally aligned to 'order', we are guaranteed 'rounded mark' == index iff 'mark' is within the page we are allocating at 'index' and thus exactly one page is marked with readahead flag as required by the readahead code and sequential read performance is restored. This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios"). The commit changed the rounding with the rationale: "... we were setting the readahead flag on the folio which contains the last byte read from the block. This is wrong because we will trigger readahead at the end of the read without waiting to see if a subsequent read is going to use the pages we just read." Although this is true, the fact is this was always the case with read sizes not aligned to folio boundaries and large folios in the page cache just make the situation more obvious (and frequent). Also for sequential read workloads it is better to trigger the readahead earlier rather than later. It is true that the difference in the rounding and thus earlier triggering of the readahead can result in reading more for semi-random workloads. However workloads really suffering from this seem to be rare. In particular I have verified that the workload described in commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading random 100k blocks from a file like: [reader] bs=100k rw=randread numjobs=1 size=64g runtime=60s is not impacted by the rounding change and achieves ~70MB/s in both cases. [jack@suse.cz: fix one more place where mark rounding was done as well] Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") Signed-off-by: Jan Kara Cc: Matthew Wilcox Cc: Guo Xuenan Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/readahead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index ba43428043a35..e4b772bb70e68 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -483,7 +483,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -591,7 +591,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; -- GitLab From 02e16a41e5439e447052c3f9a66489caf8bb4007 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 16 Jan 2024 14:04:54 +0500 Subject: [PATCH 0724/2290] selftests/mm: switch to bash from sh [ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ] Running charge_reserved_hugetlb.sh generates errors if sh is set to dash: ./charge_reserved_hugetlb.sh: 9: [[: not found ./charge_reserved_hugetlb.sh: 19: [[: not found ./charge_reserved_hugetlb.sh: 27: [[: not found ./charge_reserved_hugetlb.sh: 37: [[: not found ./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected Switch to using /bin/bash instead of /bin/sh. Make the switch for write_hugetlb_memory.sh as well which is called from charge_reserved_hugetlb.sh. Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Cc: Muhammad Usama Anjum Cc: Shuah Khan Cc: David Laight Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +- tools/testing/selftests/vm/write_hugetlb_memory.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh index 0899019a7fcb4..e14bdd4455f2d 100644 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh index 70a02301f4c27..3d2d2eb9d6fff 100644 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e -- GitLab From a584c7734a4dd050451fcdd65c66317e15660e81 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 19 Jan 2024 06:14:29 -0700 Subject: [PATCH 0725/2290] selftests: mm: fix map_hugetlb failure on 64K page size systems [ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ] On systems with 64k page size and 512M huge page sizes, the allocation and test succeeds but errors out at the munmap. As the comment states, munmap will failure if its not HUGEPAGE aligned. This is due to the length of the mapping being 1/2 the size of the hugepage causing the munmap to not be hugepage aligned. Fix this by making the mapping length the full hugepage if the hugepage is larger than the length of the mapping. Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com Signed-off-by: Nico Pache Cc: Donet Tom Cc: Shuah Khan Cc: Christophe Leroy Cc: Michael Ellerman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 312889edb84ab..c65c55b7a789f 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -15,6 +15,7 @@ #include #include #include +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -70,10 +71,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { -- GitLab From 9158ea9395c12b5623b569916b6fba0171595542 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:36 +0200 Subject: [PATCH 0726/2290] xhci: process isoc TD properly when there was a transaction error mid TD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ] The last TRB of a isoc TD might not trigger an event if there was an error event for a TRB mid TD. This is seen on a NEC Corporation uPD720200 USB 3.0 Host After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1 generate events for passed TRBs with IOC flag set if it proceeds to the next TD. This event is either a copy of the original error, or a "success" transfer event. If that event is missing then the driver and xHC host get out of sync as the driver is still expecting a transfer event for that first TD, while xHC host is already sending events for the next TD in the list. This leads to "Transfer event TRB DMA ptr not part of current TD" messages. As a solution we tag the isoc TDs that get error events mid TD. If an event doesn't match the first TD, then check if the tag is set, and event points to the next TD. In that case give back the fist TD and process the next TD normally Make sure TD status and transferred length stay valid in both cases with and without final TD completion event. Reported-by: Michał Pecio Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/ Tested-by: Michał Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++------- drivers/usb/host/xhci.h | 1 + 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1239e06dfe411..e4441a71368e5 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2363,6 +2363,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2388,8 +2391,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2409,6 +2413,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2417,6 +2424,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2801,17 +2816,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2823,9 +2872,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 1354310cb37b1..fc25a5b09710c 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1570,6 +1570,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; -- GitLab From 2e3ec80ea7ba58bbb210e83b5a0afefee7c171d3 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 25 Jan 2024 17:27:37 +0200 Subject: [PATCH 0727/2290] xhci: handle isoc Babble and Buffer Overrun events properly [ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ] xHCI 4.9 explicitly forbids assuming that the xHC has released its ownership of a multi-TRB TD when it reports an error on one of the early TRBs. Yet the driver makes such assumption and releases the TD, allowing the remaining TRBs to be freed or overwritten by new TDs. The xHC should also report completion of the final TRB due to its IOC flag being set by us, regardless of prior errors. This event cannot be recognized if the TD has already been freed earlier, resulting in "Transfer event TRB DMA ptr not part of current TD" error message. Fix this by reusing the logic for processing isoc Transaction Errors. This also handles hosts which fail to report the final completion. Fix transfer length reporting on Babble errors. They may be caused by device malfunction, no guarantee that the buffer has been filled. Signed-off-by: Michal Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index e4441a71368e5..239b5edee3268 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2381,9 +2381,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: -- GitLab From a28f4d1e0bed85943d309ac243fd1c200f8af9a2 Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Tue, 23 Jan 2024 12:52:03 +0100 Subject: [PATCH 0728/2290] drm/amdgpu: Reset IH OVERFLOW_CLEAR bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7330256268664ea0a7dd5b07a3fed363093477dd ] Allows us to detect subsequent IH ring buffer overflows as well. Cc: Joshua Ashton Cc: Alex Deucher Cc: Christian König Cc: stable@vger.kernel.org Signed-off-by: Friedrich Vock Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/cz_ih.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/si_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 ++++++ 9 files changed, 52 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index df385ffc97683..6578ca1b90afa 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index b8c47e0cf37ad..c19681492efa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index aecad530b10a6..2c02ae69883d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 7cd79a3844b24..657e4ca6f9dd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -417,6 +417,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index eec13cb5bf758..84e8e8b008ef6 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 9a24f17a57502..cada9f300a7f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index b08905d1c00f0..07a5d95be07f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 1e83db0c5438d..74c94df423455 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 59dfca093155c..f1ba76c35cd6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -424,6 +424,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } -- GitLab From 8b5760939db9c49c03b9e19f6c485a8812f48d83 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: [PATCH 0729/2290] x86/mmio: Disable KVM mitigation when X86_FEATURE_CLEAR_CPU_BUF is set commit e95df4ec0c0c9791941f112db699fae794b9862a upstream. Currently MMIO Stale Data mitigation for CPUs not affected by MDS/TAA is to only deploy VERW at VMentry by enabling mmio_stale_data_clear static branch. No mitigation is needed for kernel->user transitions. If such CPUs are also affected by RFDS, its mitigation may set X86_FEATURE_CLEAR_CPU_BUF to deploy VERW at kernel->user and VMentry. This could result in duplicate VERW at VMentry. Fix this by disabling mmio_stale_data_clear static branch when X86_FEATURE_CLEAR_CPU_BUF is enabled. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d1895930e6eb8..c66f6eb40afb1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -421,6 +421,13 @@ static void __init mmio_select_mitigation(void) if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + /* + * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based + * mitigations, disable KVM-only mitigation in that case. + */ + if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) + static_branch_disable(&mmio_stale_data_clear); else static_branch_enable(&mmio_stale_data_clear); @@ -497,8 +504,11 @@ static void __init md_clear_update_mitigation(void) taa_mitigation = TAA_MITIGATION_VERW; taa_select_mitigation(); } - if (mmio_mitigation == MMIO_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + /* + * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear + * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state. + */ + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { mmio_mitigation = MMIO_MITIGATION_VERW; mmio_select_mitigation(); } -- GitLab From 29476fac750dddeabc3503bf9b13e05b949d7adb Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: [PATCH 0730/2290] Documentation/hw-vuln: Add documentation for RFDS commit 4e42765d1be01111df0c0275bbaf1db1acef346e upstream. Add the documentation for transient execution vulnerability Register File Data Sampling (RFDS) that affects Intel Atom CPUs. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/index.rst | 1 + .../hw-vuln/reg-file-data-sampling.rst | 104 ++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 6828102baaa7a..3e4a14e38b49e 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -21,3 +21,4 @@ are configurable at compile, boot or run time. cross-thread-rsb.rst gather_data_sampling.rst srso + reg-file-data-sampling diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst new file mode 100644 index 0000000000000..0585d02b9a6cb --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst @@ -0,0 +1,104 @@ +================================== +Register File Data Sampling (RFDS) +================================== + +Register File Data Sampling (RFDS) is a microarchitectural vulnerability that +only affects Intel Atom parts(also branded as E-cores). RFDS may allow +a malicious actor to infer data values previously used in floating point +registers, vector registers, or integer registers. RFDS does not provide the +ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS. + +Affected Processors +=================== +Below is the list of affected Intel processors [#f1]_: + + =================== ============ + Common name Family_Model + =================== ============ + ATOM_GOLDMONT 06_5CH + ATOM_GOLDMONT_D 06_5FH + ATOM_GOLDMONT_PLUS 06_7AH + ATOM_TREMONT_D 06_86H + ATOM_TREMONT 06_96H + ALDERLAKE 06_97H + ALDERLAKE_L 06_9AH + ATOM_TREMONT_L 06_9CH + RAPTORLAKE 06_B7H + RAPTORLAKE_P 06_BAH + ATOM_GRACEMONT 06_BEH + RAPTORLAKE_S 06_BFH + =================== ============ + +As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and +RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as +vulnerable in Linux because they share the same family/model with an affected +part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or +CPUID.HYBRID. This information could be used to distinguish between the +affected and unaffected parts, but it is deemed not worth adding complexity as +the reporting is fixed automatically when these parts enumerate RFDS_NO. + +Mitigation +========== +Intel released a microcode update that enables software to clear sensitive +information using the VERW instruction. Like MDS, RFDS deploys the same +mitigation strategy to force the CPU to clear the affected buffers before an +attacker can extract the secrets. This is achieved by using the otherwise +unused and obsolete VERW instruction in combination with a microcode update. +The microcode clears the affected CPU buffers when the VERW instruction is +executed. + +Mitigation points +----------------- +VERW is executed by the kernel before returning to user space, and by KVM +before VMentry. None of the affected cores support SMT, so VERW is not required +at C-state transitions. + +New bits in IA32_ARCH_CAPABILITIES +---------------------------------- +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +vulnerability and mitigation capability: + +- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS. +- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the + microcode that clears the affected buffers on VERW execution. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control RFDS mitigation at boot time with the +parameter "reg_file_data_sampling=". The valid arguments are: + + ========== ================================================================= + on If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and before entering a VM. + off Disables mitigation. + ========== ================================================================= + +Mitigation default is selected by CONFIG_MITIGATION_RFDS. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: No microcode' + - The processor is vulnerable but microcode is not updated. + * - 'Mitigation: Clear Register File' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html -- GitLab From d405b9c03f06b1b5e73ebc4f34452687022f7029 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: [PATCH 0731/2290] x86/rfds: Mitigate Register File Data Sampling (RFDS) commit 8076fcde016c9c0e0660543e67bff86cb48a7c9c upstream. RFDS is a CPU vulnerability that may allow userspace to infer kernel stale data previously used in floating point registers, vector registers and integer registers. RFDS only affects certain Intel Atom processors. Intel released a microcode update that uses VERW instruction to clear the affected CPU buffers. Unlike MDS, none of the affected cores support SMT. Add RFDS bug infrastructure and enable the VERW based mitigation by default, that clears the affected buffers just before exiting to userspace. Also add sysfs reporting and cmdline parameter "reg_file_data_sampling" to control the mitigation. For details see: Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 1 + .../admin-guide/kernel-parameters.txt | 21 +++++ arch/x86/Kconfig | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 8 ++ arch/x86/kernel/cpu/bugs.c | 78 ++++++++++++++++++- arch/x86/kernel/cpu/common.c | 38 ++++++++- drivers/base/cpu.c | 8 ++ include/linux/cpu.h | 2 + 9 files changed, 162 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 13c01b641dc70..78c26280c473b 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -519,6 +519,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling /sys/devices/system/cpu/vulnerabilities/retbleed /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/spectre_v1 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4ad60e127e048..2dfe75104e7de 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1107,6 +1107,26 @@ The filter can be disabled or changed to another driver later using sysfs. + reg_file_data_sampling= + [X86] Controls mitigation for Register File Data + Sampling (RFDS) vulnerability. RFDS is a CPU + vulnerability which may allow userspace to infer + kernel data values previously stored in floating point + registers, vector registers, or integer registers. + RFDS only affects Intel Atom processors. + + on: Turns ON the mitigation. + off: Turns OFF the mitigation. + + This parameter overrides the compile time default set + by CONFIG_MITIGATION_RFDS. Mitigation cannot be + disabled when other VERW based mitigations (like MDS) + are enabled. In order to disable RFDS mitigation all + VERW based mitigations need to be disabled. + + For details see: + Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst + driver_async_probe= [KNL] List of driver names to be probed asynchronously. * matches with all driver names. If * is specified, the @@ -3262,6 +3282,7 @@ nospectre_bhb [ARM64] nospectre_v1 [X86,PPC] nospectre_v2 [X86,PPC,S390,ARM64] + reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] spectre_v2_user=off [X86] diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2f7af61b49b6c..5caa023e98397 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2565,6 +2565,17 @@ config GDS_FORCE_MITIGATION If in doubt, say N. +config MITIGATION_RFDS + bool "RFDS Mitigation" + depends on CPU_SUP_INTEL + default y + help + Enable mitigation for Register File Data Sampling (RFDS) by default. + RFDS is a hardware vulnerability which affects Intel Atom CPUs. It + allows unprivileged speculative access to stale data previously + stored in floating point, vector and integer registers. + See also + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b60f24b30cb90..b97a70aa4de90 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -477,4 +477,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec955ab2ff034..005e41dc7ee5a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -168,6 +168,14 @@ * CPU is not vulnerable to Gather * Data Sampling (GDS). */ +#define ARCH_CAP_RFDS_NO BIT(27) /* + * Not susceptible to Register + * File Data Sampling. + */ +#define ARCH_CAP_RFDS_CLEAR BIT(28) /* + * VERW clears CPU Register + * File. + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c66f6eb40afb1..c68789fdc123b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -479,6 +479,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str) } early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "Register File Data Sampling: " fmt + +enum rfds_mitigations { + RFDS_MITIGATION_OFF, + RFDS_MITIGATION_VERW, + RFDS_MITIGATION_UCODE_NEEDED, +}; + +/* Default mitigation for Register File Data Sampling */ +static enum rfds_mitigations rfds_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF; + +static const char * const rfds_strings[] = { + [RFDS_MITIGATION_OFF] = "Vulnerable", + [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File", + [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", +}; + +static void __init rfds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { + rfds_mitigation = RFDS_MITIGATION_OFF; + return; + } + if (rfds_mitigation == RFDS_MITIGATION_OFF) + return; + + if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; +} + +static __init int rfds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + return 0; + + if (!strcmp(str, "off")) + rfds_mitigation = RFDS_MITIGATION_OFF; + else if (!strcmp(str, "on")) + rfds_mitigation = RFDS_MITIGATION_VERW; + + return 0; +} +early_param("reg_file_data_sampling", rfds_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "" fmt @@ -512,6 +563,11 @@ static void __init md_clear_update_mitigation(void) mmio_mitigation = MMIO_MITIGATION_VERW; mmio_select_mitigation(); } + if (rfds_mitigation == RFDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_RFDS)) { + rfds_mitigation = RFDS_MITIGATION_VERW; + rfds_select_mitigation(); + } out: if (boot_cpu_has_bug(X86_BUG_MDS)) pr_info("MDS: %s\n", mds_strings[mds_mitigation]); @@ -521,6 +577,8 @@ out: pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + if (boot_cpu_has_bug(X86_BUG_RFDS)) + pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]); } static void __init md_clear_select_mitigation(void) @@ -528,11 +586,12 @@ static void __init md_clear_select_mitigation(void) mds_select_mitigation(); taa_select_mitigation(); mmio_select_mitigation(); + rfds_select_mitigation(); /* - * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update - * and print their mitigation after MDS, TAA and MMIO Stale Data - * mitigation selection is done. + * As these mitigations are inter-related and rely on VERW instruction + * to clear the microarchitural buffers, update and print their status + * after mitigation selection is done for each of these vulnerabilities. */ md_clear_update_mitigation(); } @@ -2596,6 +2655,11 @@ static ssize_t mmio_stale_data_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t rfds_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) @@ -2757,6 +2821,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRSO: return srso_show_state(buf); + case X86_BUG_RFDS: + return rfds_show_state(buf); + default: break; } @@ -2831,4 +2898,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut { return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); } + +ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 454cdf3418624..758938c94b41e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1248,6 +1248,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define SRSO BIT(5) /* CPU is affected by GDS */ #define GDS BIT(6) +/* CPU is affected by Register File Data Sampling */ +#define RFDS BIT(7) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1275,9 +1277,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_N, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), @@ -1311,6 +1322,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap) ia32_cap & ARCH_CAP_SBDR_SSDP_NO); } +static bool __init vulnerable_to_rfds(u64 ia32_cap) +{ + /* The "immunity" bit trumps everything else: */ + if (ia32_cap & ARCH_CAP_RFDS_NO) + return false; + + /* + * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to + * indicate that mitigation is needed because guest is running on a + * vulnerable hardware or may migrate to such hardware: + */ + if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + return true; + + /* Only consult the blacklist when there is no enumeration: */ + return cpu_matches(cpu_vuln_blacklist, RFDS); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1419,6 +1448,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } + if (vulnerable_to_rfds(ia32_cap)) + setup_force_cpu_bug(X86_BUG_RFDS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index dab70a65377c8..31da94afe4f3d 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -589,6 +589,12 @@ ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -602,6 +608,7 @@ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); +static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -617,6 +624,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_retbleed.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_spec_rstack_overflow.attr, + &dev_attr_reg_file_data_sampling.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 008bfa68cfabc..4b06b1f1e267a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -74,6 +74,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- GitLab From b2e92ab17e440a97c716b701ecd897eebca11ac0 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 12:29:43 -0700 Subject: [PATCH 0732/2290] KVM/x86: Export RFDS_NO and RFDS_CLEAR to guests commit 2a0180129d726a4b953232175857d442651b55a0 upstream. Mitigation for RFDS requires RFDS_CLEAR capability which is enumerated by MSR_IA32_ARCH_CAPABILITIES bit 27. If the host has it set, export it to guests so that they can deploy the mitigation. RFDS_NO indicates that the system is not vulnerable to RFDS, export it to guests so that they don't deploy the mitigation unnecessarily. When the host is not affected by X86_BUG_RFDS, but has RFDS_NO=0, synthesize RFDS_NO to the guest. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7144e51668136..688bc7b72eb66 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1613,7 +1613,8 @@ static unsigned int num_msr_based_features; ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO) + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) static u64 kvm_get_arch_capabilities(void) { @@ -1650,6 +1651,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_SSB_NO; if (!boot_cpu_has_bug(X86_BUG_MDS)) data |= ARCH_CAP_MDS_NO; + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + data |= ARCH_CAP_RFDS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* -- GitLab From 92cdc9d71ab084788afd1fcefa315dd33deba068 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:51 +0100 Subject: [PATCH 0733/2290] selftests: mptcp: decrease BW in simult flows [ Upstream commit 5e2f3c65af47e527ccac54060cf909e3306652ff ] When running the simult_flow selftest in slow environments -- e.g. QEmu without KVM support --, the results can be unstable. This selftest checks if the aggregated bandwidth is (almost) fully used as expected. To help improving the stability while still keeping the same validation in place, the BW and the delay are reduced to lower the pressure on the CPU. Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-6-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/mptcp/simult_flows.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 4a417f9d51d67..06ad0510469e3 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -301,10 +301,10 @@ done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" exit $ret -- GitLab From f0b6dc034e1797b0bc91558c0c2f90c8a12b5533 Mon Sep 17 00:00:00 2001 From: Ma Hanghong Date: Thu, 20 Oct 2022 11:46:56 -0400 Subject: [PATCH 0734/2290] drm/amd/display: Wrong colorimetry workaround [ Upstream commit b1a98cf89a695d36c414653634ea7ba91b6e701f ] [Why] For FreeSync HDR, native color space flag in AMD VSIF(BT.709) should be used when intepreting content and color space flag in VSC or AVI infoFrame should be ignored. However, it turned out some userspace application still use color flag in VSC or AVI infoFrame which is incorrect. [How] Transfer function is used when building the VSC and AVI infoFrame. Set colorimetry to BT.709 when all the following match: 1. Pixel format is YCbCr; 2. In FreeSync 2 HDR, color is COLOR_SPACE_2020_YCBCR; 3. Transfer function is TRANSFER_FUNC_GAMMA_22; Tested-by: Mark Broadworth Reviewed-by: Krunoslav Kovac Acked-by: Rodrigo Siqueira Signed-off-by: Ma Hanghong Signed-off-by: Alex Deucher Stable-dep-of: e6a7df96facd ("drm/amd/display: Fix MST Null Ptr for RV") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 6 ++++++ drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h | 3 ++- .../gpu/drm/amd/display/modules/info_packet/info_packet.c | 6 +++++- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index da16048bf1004..bea49befdcacc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5938,6 +5938,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false; int mode_refresh; int preferred_refresh = 0; + enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN; #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_dec_dpcd_caps dsc_caps; #endif @@ -6071,7 +6072,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) stream->use_vsc_sdp_for_colorimetry = true; } - mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space); + if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) + tf = TRANSFER_FUNC_GAMMA_22; + mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 66923f51037a3..e2f80cd0ca8cb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3038,6 +3038,12 @@ static void set_avi_info_frame( hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } + if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR && + stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) { + hdmi_info.bits.EC0_EC2 = 0; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; + } + /* TODO: un-hardcode aspect ratio */ aspect = stream->timing.aspect_ratio; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h index 1d8b746b02f24..edf5845f6a1f7 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h @@ -35,7 +35,8 @@ struct mod_vrr_params; void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs); + enum dc_color_space cs, + enum color_transfer_func tf); void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 27ceba9d6d658..69691058ab898 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -132,7 +132,8 @@ enum ColorimetryYCCDP { void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs) + enum dc_color_space cs, + enum color_transfer_func tf) { unsigned int vsc_packet_revision = vsc_packet_undefined; unsigned int i; @@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, colorimetryFormat = ColorimetryYCC_DP_AdobeYCC; else if (cs == COLOR_SPACE_2020_YCBCR) colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr; + + if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22) + colorimetryFormat = ColorimetryYCC_DP_ITU709; break; default: -- GitLab From 01d992088dce3945f70f49f34b0b911c5213c238 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 22 Jan 2024 13:43:46 -0500 Subject: [PATCH 0735/2290] drm/amd/display: Fix MST Null Ptr for RV [ Upstream commit e6a7df96facdcf5b1f71eb3ec26f2f9f6ad61e57 ] The change try to fix below error specific to RV platform: BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2 Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022 RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? plist_add+0xbe/0x100 ? exc_page_fault+0x7c/0x180 ? asm_exc_page_fault+0x26/0x30 ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] drm_atomic_check_only+0x5c5/0xa40 drm_mode_atomic_ioctl+0x76e/0xbc0 ? _copy_to_user+0x25/0x30 ? drm_ioctl+0x296/0x4b0 ? __pfx_drm_mode_atomic_ioctl+0x10/0x10 drm_ioctl_kernel+0xcd/0x170 drm_ioctl+0x26d/0x4b0 ? __pfx_drm_mode_atomic_ioctl+0x10/0x10 amdgpu_drm_ioctl+0x4e/0x90 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] __x64_sys_ioctl+0x94/0xd0 do_syscall_64+0x60/0x90 ? do_syscall_64+0x6c/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f4dad17f76f Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c> RSP: 002b:00007ffd9ae859f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 000055e255a55900 RCX: 00007f4dad17f76f RDX: 00007ffd9ae85a90 RSI: 00000000c03864bc RDI: 000000000000000b RBP: 00007ffd9ae85a90 R08: 0000000000000003 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c03864bc R13: 000000000000000b R14: 000055e255a7fc60 R15: 000055e255a01eb0 Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device ccm cmac algif_hash algif_skcipher af_alg joydev mousedev bnep > typec libphy k10temp ipmi_msghandler roles i2c_scmi acpi_cpufreq mac_hid nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_mas> CR2: 0000000000000008 ---[ end trace 0000000000000000 ]--- RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 With a second DP monitor connected, drm_atomic_state in dm atomic check sequence does not include the connector state for the old/existing/first DP monitor. In such case, dsc determination policy would hit a null ptr when it tries to iterate the old/existing stream that does not have a valid connector state attached to it. When that happens, dm atomic check should call drm_atomic_get_connector_state for a new connector state. Existing dm has already done that, except for RV due to it does not have official support of dsc where .num_dsc is not defined in dcn10 resource cap, that prevent from getting drm_atomic_get_connector_state called. So, skip dsc determination policy for ASICs that don't have DSC support. Cc: stable@vger.kernel.org # 6.1+ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314 Reviewed-by: Wayne Lin Acked-by: Hamza Mahfooz Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bea49befdcacc..a6c6f286a5988 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10123,11 +10123,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } #if defined(CONFIG_DRM_AMD_DC_DCN) - ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); - if (ret) { - DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; - goto fail; + if (dc_resource_is_dsc_encoding_supported(dc)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { + DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); + ret = -EINVAL; + goto fail; + } } ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); -- GitLab From eba76e4808c9ad2c41ec5652a9335a8b5c03a709 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Sep 2023 19:25:54 +0200 Subject: [PATCH 0736/2290] getrusage: add the "signal_struct *sig" local variable [ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ] No functional changes, cleanup/preparation. Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Signed-off-by: Andrew Morton Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()") Signed-off-by: Sasha Levin --- kernel/sys.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index c85e1abf7b7c7..177155ba50cd3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1779,6 +1779,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unsigned long flags; u64 tgutime, tgstime, utime, stime; unsigned long maxrss = 0; + struct signal_struct *sig = p->signal; memset((char *)r, 0, sizeof (*r)); utime = stime = 0; @@ -1786,7 +1787,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); - maxrss = p->signal->maxrss; + maxrss = sig->maxrss; goto out; } @@ -1796,15 +1797,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) switch (who) { case RUSAGE_BOTH: case RUSAGE_CHILDREN: - utime = p->signal->cutime; - stime = p->signal->cstime; - r->ru_nvcsw = p->signal->cnvcsw; - r->ru_nivcsw = p->signal->cnivcsw; - r->ru_minflt = p->signal->cmin_flt; - r->ru_majflt = p->signal->cmaj_flt; - r->ru_inblock = p->signal->cinblock; - r->ru_oublock = p->signal->coublock; - maxrss = p->signal->cmaxrss; + utime = sig->cutime; + stime = sig->cstime; + r->ru_nvcsw = sig->cnvcsw; + r->ru_nivcsw = sig->cnivcsw; + r->ru_minflt = sig->cmin_flt; + r->ru_majflt = sig->cmaj_flt; + r->ru_inblock = sig->cinblock; + r->ru_oublock = sig->coublock; + maxrss = sig->cmaxrss; if (who == RUSAGE_CHILDREN) break; @@ -1814,14 +1815,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) thread_group_cputime_adjusted(p, &tgutime, &tgstime); utime += tgutime; stime += tgstime; - r->ru_nvcsw += p->signal->nvcsw; - r->ru_nivcsw += p->signal->nivcsw; - r->ru_minflt += p->signal->min_flt; - r->ru_majflt += p->signal->maj_flt; - r->ru_inblock += p->signal->inblock; - r->ru_oublock += p->signal->oublock; - if (maxrss < p->signal->maxrss) - maxrss = p->signal->maxrss; + r->ru_nvcsw += sig->nvcsw; + r->ru_nivcsw += sig->nivcsw; + r->ru_minflt += sig->min_flt; + r->ru_majflt += sig->maj_flt; + r->ru_inblock += sig->inblock; + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; t = p; do { accumulate_thread_rusage(t, r); -- GitLab From d9fe6ef245766d4f4d0494aafc7d5b2189b9b94c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Jan 2024 16:50:50 +0100 Subject: [PATCH 0737/2290] getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand() [ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ] Patch series "getrusage: use sig->stats_lock", v2. This patch (of 2): thread_group_cputime() does its own locking, we can safely shift thread_group_cputime_adjusted() which does another for_each_thread loop outside of ->siglock protected section. This is also preparation for the next patch which changes getrusage() to use stats_lock instead of siglock, thread_group_cputime() takes the same lock. With the current implementation recursive read_seqbegin_or_lock() is fine, thread_group_cputime() can't enter the slow mode if the caller holds stats_lock, yet this looks more safe and better performance-wise. Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dylan Hatch Tested-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/sys.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 177155ba50cd3..2646047fe5513 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1778,17 +1778,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) struct task_struct *t; unsigned long flags; u64 tgutime, tgstime, utime, stime; - unsigned long maxrss = 0; + unsigned long maxrss; + struct mm_struct *mm; struct signal_struct *sig = p->signal; - memset((char *)r, 0, sizeof (*r)); + memset(r, 0, sizeof(*r)); utime = stime = 0; + maxrss = 0; if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); maxrss = sig->maxrss; - goto out; + goto out_thread; } if (!lock_task_sighand(p, &flags)) @@ -1812,9 +1814,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) fallthrough; case RUSAGE_SELF: - thread_group_cputime_adjusted(p, &tgutime, &tgstime); - utime += tgutime; - stime += tgstime; r->ru_nvcsw += sig->nvcsw; r->ru_nivcsw += sig->nivcsw; r->ru_minflt += sig->min_flt; @@ -1834,19 +1833,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) } unlock_task_sighand(p, &flags); -out: - r->ru_utime = ns_to_kernel_old_timeval(utime); - r->ru_stime = ns_to_kernel_old_timeval(stime); + if (who == RUSAGE_CHILDREN) + goto out_children; - if (who != RUSAGE_CHILDREN) { - struct mm_struct *mm = get_task_mm(p); + thread_group_cputime_adjusted(p, &tgutime, &tgstime); + utime += tgutime; + stime += tgstime; - if (mm) { - setmax_mm_hiwater_rss(&maxrss, mm); - mmput(mm); - } +out_thread: + mm = get_task_mm(p); + if (mm) { + setmax_mm_hiwater_rss(&maxrss, mm); + mmput(mm); } + +out_children: r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ + r->ru_utime = ns_to_kernel_old_timeval(utime); + r->ru_stime = ns_to_kernel_old_timeval(stime); } SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) -- GitLab From 2a304d8c922f2d13d6b95457022950350c23103b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Sep 2023 19:26:29 +0200 Subject: [PATCH 0738/2290] getrusage: use __for_each_thread() [ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ] do/while_each_thread should be avoided when possible. Plus this change allows to avoid lock_task_sighand(), we can use rcu and/or sig->stats_lock instead. Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Signed-off-by: Andrew Morton Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()") Signed-off-by: Sasha Levin --- kernel/sys.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 2646047fe5513..04102538cf43f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1822,10 +1822,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += sig->oublock; if (maxrss < sig->maxrss) maxrss = sig->maxrss; - t = p; - do { + __for_each_thread(sig, t) accumulate_thread_rusage(t, r); - } while_each_thread(p, t); break; default: -- GitLab From 9793a3bb531c5b747bb726d7611b81abe32f6401 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Jan 2024 16:50:53 +0100 Subject: [PATCH 0739/2290] getrusage: use sig->stats_lock rather than lock_task_sighand() [ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ] lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call getrusage() at the same time and the process has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. Change getrusage() to use sig->stats_lock, it was specifically designed for this type of use. This way it runs lockless in the likely case. TODO: - Change do_task_stat() to use sig->stats_lock too, then we can remove spin_lock_irq(siglock) in wait_task_zombie(). - Turn sig->stats_lock into seqcount_rwlock_t, this way the readers in the slow mode won't exclude each other. See https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/ - stats_lock has to disable irqs because ->siglock can be taken in irq context, it would be very nice to change __exit_signal() to avoid the siglock->stats_lock dependency. Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dylan Hatch Tested-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/sys.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 04102538cf43f..d06eda1387b69 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1781,7 +1781,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unsigned long maxrss; struct mm_struct *mm; struct signal_struct *sig = p->signal; + unsigned int seq = 0; +retry: memset(r, 0, sizeof(*r)); utime = stime = 0; maxrss = 0; @@ -1793,8 +1795,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) goto out_thread; } - if (!lock_task_sighand(p, &flags)) - return; + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); switch (who) { case RUSAGE_BOTH: @@ -1822,14 +1823,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += sig->oublock; if (maxrss < sig->maxrss) maxrss = sig->maxrss; + + rcu_read_lock(); __for_each_thread(sig, t) accumulate_thread_rusage(t, r); + rcu_read_unlock(); + break; default: BUG(); } - unlock_task_sighand(p, &flags); + + if (need_seqretry(&sig->stats_lock, seq)) { + seq = 1; + goto retry; + } + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); if (who == RUSAGE_CHILDREN) goto out_children; -- GitLab From d95ef75162f4722af4b74d847173747930962405 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Sep 2023 18:45:01 +0200 Subject: [PATCH 0740/2290] fs/proc: do_task_stat: use __for_each_thread() [ Upstream commit 7904e53ed5a20fc678c01d5d1b07ec486425bb6a ] do/while_each_thread should be avoided when possible. Link: https://lkml.kernel.org/r/20230909164501.GA11581@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Signed-off-by: Andrew Morton Stable-dep-of: 7601df8031fd ("fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats") Signed-off-by: Sasha Levin --- fs/proc/array.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 1b0d78dfd20f9..bcb645627991e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -526,12 +526,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { - struct task_struct *t = task; - do { + struct task_struct *t; + + __for_each_thread(sig, t) { min_flt += t->min_flt; maj_flt += t->maj_flt; gtime += task_gtime(t); - } while_each_thread(task, t); + } min_flt += sig->min_flt; maj_flt += sig->maj_flt; -- GitLab From cf4b8c39b9a0bd81c47afc7ef62914a62dd5ec4d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jan 2024 16:33:57 +0100 Subject: [PATCH 0741/2290] fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats [ Upstream commit 7601df8031fd67310af891897ef6cc0df4209305 ] lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call do_task_stat() at the same time and the process has NR_THREADS, it will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. Change do_task_stat() to use sig->stats_lock to gather the statistics outside of ->siglock protected section, in the likely case this code will run lockless. Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/proc/array.c | 58 +++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index bcb645627991e..d210b2f8b7ed5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -467,13 +467,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, int permitted; struct mm_struct *mm; unsigned long long start_time; - unsigned long cmin_flt = 0, cmaj_flt = 0; - unsigned long min_flt = 0, maj_flt = 0; - u64 cutime, cstime, utime, stime; - u64 cgtime, gtime; + unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt; + u64 cutime, cstime, cgtime, utime, stime, gtime; unsigned long rsslim = 0; unsigned long flags; int exit_code = task->exit_code; + struct signal_struct *sig = task->signal; + unsigned int seq = 1; state = *get_task_state(task); vsize = eip = esp = 0; @@ -501,12 +501,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = 0; - cgtime = gtime = 0; if (lock_task_sighand(task, &flags)) { - struct signal_struct *sig = task->signal; - if (sig->tty) { struct pid *pgrp = tty_get_pgrp(sig->tty); tty_pgrp = pid_nr_ns(pgrp, ns); @@ -517,27 +513,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, num_threads = get_nr_threads(task); collect_sigign_sigcatch(task, &sigign, &sigcatch); - cmin_flt = sig->cmin_flt; - cmaj_flt = sig->cmaj_flt; - cutime = sig->cutime; - cstime = sig->cstime; - cgtime = sig->cgtime; rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); - /* add up live thread stats at the group level */ if (whole) { - struct task_struct *t; - - __for_each_thread(sig, t) { - min_flt += t->min_flt; - maj_flt += t->maj_flt; - gtime += task_gtime(t); - } - - min_flt += sig->min_flt; - maj_flt += sig->maj_flt; - gtime += sig->gtime; - if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) exit_code = sig->group_exit_code; } @@ -552,6 +530,34 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); + do { + seq++; /* 2 on the 1st/lockless path, otherwise odd */ + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + + cmin_flt = sig->cmin_flt; + cmaj_flt = sig->cmaj_flt; + cutime = sig->cutime; + cstime = sig->cstime; + cgtime = sig->cgtime; + + if (whole) { + struct task_struct *t; + + min_flt = sig->min_flt; + maj_flt = sig->maj_flt; + gtime = sig->gtime; + + rcu_read_lock(); + __for_each_thread(sig, t) { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + gtime += task_gtime(t); + } + rcu_read_unlock(); + } + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + if (whole) { thread_group_cputime_adjusted(task, &utime, &stime); } else { -- GitLab From d7543167affd372819a94879b8b1e8b9b12547d9 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 13 Mar 2024 07:42:17 -0400 Subject: [PATCH 0742/2290] Linux 6.1.82 Tested-by: Linux Kernel Functional Testing Tested-by: Florian Fainelli Tested-by: Mark Brown Tested-by: Pavel Machek (CIP) Tested-by: kernelci.org bot Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e13df565a1cb6..c5345f3ebed0d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 81 +SUBLEVEL = 82 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 7512a70376f584589ada6363a8918fadd90189ed Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 12 May 2023 09:56:07 +0800 Subject: [PATCH 0743/2290] md: fix data corruption for raid456 when reshape restart while grow up [ Upstream commit 873f50ece41aad5c4f788a340960c53774b5526e ] Currently, if reshape is interrupted, echo "reshape" to sync_action will restart reshape from scratch, for example: echo frozen > sync_action echo reshape > sync_action This will corrupt data before reshape_position if the array is growing, fix the problem by continue reshape from reshape_position. Reported-by: Peter Neuwirth Link: https://lore.kernel.org/linux-raid/e2f96772-bfbc-f43b-6da1-f520e5164536@online.de/ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230512015610.821290-3-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 846bdee4daa0e..1c87f3e708094 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4903,11 +4903,21 @@ action_store(struct mddev *mddev, const char *page, size_t len) return -EINVAL; err = mddev_lock(mddev); if (!err) { - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { err = -EBUSY; - else { + } else if (mddev->reshape_position == MaxSector || + mddev->pers->check_reshape == NULL || + mddev->pers->check_reshape(mddev)) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); err = mddev->pers->start_reshape(mddev); + } else { + /* + * If reshape is still in progress, and + * md_check_recovery() can continue to reshape, + * don't restart reshape because data can be + * corrupted for raid456. + */ + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); } mddev_unlock(mddev); } -- GitLab From 1d467e10507167eb6dc2c281a87675b731955d86 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 29 May 2023 21:11:00 +0800 Subject: [PATCH 0744/2290] md/raid10: prevent soft lockup while flush writes [ Upstream commit 010444623e7f4da6b4a4dd603a7da7469981e293 ] Currently, there is no limit for raid1/raid10 plugged bio. While flushing writes, raid1 has cond_resched() while raid10 doesn't, and too many writes can cause soft lockup. Follow up soft lockup can be triggered easily with writeback test for raid10 with ramdisks: watchdog: BUG: soft lockup - CPU#10 stuck for 27s! [md0_raid10:1293] Call Trace: call_rcu+0x16/0x20 put_object+0x41/0x80 __delete_object+0x50/0x90 delete_object_full+0x2b/0x40 kmemleak_free+0x46/0xa0 slab_free_freelist_hook.constprop.0+0xed/0x1a0 kmem_cache_free+0xfd/0x300 mempool_free_slab+0x1f/0x30 mempool_free+0x3a/0x100 bio_free+0x59/0x80 bio_put+0xcf/0x2c0 free_r10bio+0xbf/0xf0 raid_end_bio_io+0x78/0xb0 one_write_done+0x8a/0xa0 raid10_end_write_request+0x1b4/0x430 bio_endio+0x175/0x320 brd_submit_bio+0x3b9/0x9b7 [brd] __submit_bio+0x69/0xe0 submit_bio_noacct_nocheck+0x1e6/0x5a0 submit_bio_noacct+0x38c/0x7e0 flush_pending_writes+0xf0/0x240 raid10d+0xac/0x1ed0 Fix the problem by adding cond_resched() to raid10 like what raid1 did. Note that unlimited plugged bio still need to be optimized, for example, in the case of lots of dirty pages writeback, this will take lots of memory and io will spend a long time in plug, hence io latency is bad. Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230529131106.2123367-2-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/raid10.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7b318e7e8d459..009f7ffe4e10c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -920,6 +920,7 @@ static void flush_pending_writes(struct r10conf *conf) raid1_submit_write(bio); bio = next; + cond_resched(); } blk_finish_plug(&plug); } else @@ -1130,6 +1131,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) raid1_submit_write(bio); bio = next; + cond_resched(); } kfree(plug); } -- GitLab From 28fe81bcd3ea932e280f04e087ff0c75a4995a46 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Mar 2024 18:10:12 -0600 Subject: [PATCH 0745/2290] io_uring/unix: drop usage of io_uring socket Commit a4104821ad651d8a0b374f0b2474c345bbb42f82 upstream. Since we no longer allow sending io_uring fds over SCM_RIGHTS, move to using io_is_uring_fops() to detect whether this is a io_uring fd or not. With that done, kill off io_uring_get_socket() as nobody calls it anymore. This is in preparation to yanking out the rest of the core related to unix gc with io_uring. Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/io_uring.h | 10 +++++----- io_uring/io_uring.c | 13 ------------- io_uring/io_uring.h | 1 - net/core/scm.c | 2 +- net/unix/scm.c | 4 +--- 5 files changed, 7 insertions(+), 23 deletions(-) diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index a1484cdb3158e..a8f3058448eaa 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -42,11 +42,11 @@ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, unsigned issue_flags); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned)); -struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); const char *io_uring_get_opcode(u8 opcode); +bool io_is_uring_fops(struct file *file); static inline void io_uring_files_cancel(void) { @@ -71,6 +71,10 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, { return -EOPNOTSUPP; } +static inline bool io_is_uring_fops(struct file *file) +{ + return false; +} static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t ret2, unsigned issue_flags) { @@ -79,10 +83,6 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { } -static inline struct sock *io_uring_get_socket(struct file *file) -{ - return NULL; -} static inline void io_uring_task_cancel(void) { } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 35894955b4549..cf7dd62da0e37 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -153,19 +153,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx); static struct kmem_cache *req_cachep; -struct sock *io_uring_get_socket(struct file *file) -{ -#if defined(CONFIG_UNIX) - if (io_is_uring_fops(file)) { - struct io_ring_ctx *ctx = file->private_data; - - return ctx->ring_sock->sk; - } -#endif - return NULL; -} -EXPORT_SYMBOL(io_uring_get_socket); - static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) { if (!wq_list_empty(&ctx->submit_state.compl_reqs)) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 019600570ee49..59e6f755f12c6 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -52,7 +52,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) } void __io_req_task_work_add(struct io_kiocb *req, bool allow_local); -bool io_is_uring_fops(struct file *file); bool io_alloc_async_data(struct io_kiocb *req); void io_req_task_queue(struct io_kiocb *req); void io_queue_iowq(struct io_kiocb *req, bool *dont_use); diff --git a/net/core/scm.c b/net/core/scm.c index e762a4b8a1d22..a877c4ef4c256 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; /* don't allow io_uring files */ - if (io_uring_get_socket(file)) { + if (io_is_uring_fops(file)) { fput(file); return -EINVAL; } diff --git a/net/unix/scm.c b/net/unix/scm.c index e8e2a00bb0f58..d1048b4c2baaf 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -34,10 +34,8 @@ struct sock *unix_get_socket(struct file *filp) /* PF_UNIX ? */ if (s && sock->ops && sock->ops->family == PF_UNIX) u_sock = s; - } else { - /* Could be an io_uring instance */ - u_sock = io_uring_get_socket(filp); } + return u_sock; } EXPORT_SYMBOL(unix_get_socket); -- GitLab From a3812a47a32022ca76bf46ddacdd823dc2aabf8b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Mar 2024 18:15:05 -0600 Subject: [PATCH 0746/2290] io_uring: drop any code related to SCM_RIGHTS Commit 6e5e6d274956305f1fc0340522b38f5f5be74bdb upstream. This is dead code after we dropped support for passing io_uring fds over SCM_RIGHTS, get rid of it. Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/io_uring_types.h | 3 - io_uring/filetable.c | 10 +-- io_uring/io_uring.c | 31 +------ io_uring/rsrc.c | 151 +-------------------------------- io_uring/rsrc.h | 15 ---- 5 files changed, 8 insertions(+), 202 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f5b687a787a34..37aeea266ebb3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -330,9 +330,6 @@ struct io_ring_ctx { struct list_head io_buffers_pages; - #if defined(CONFIG_UNIX) - struct socket *ring_sock; - #endif /* hashed buffered write serialization */ struct io_wq_hash *hash_map; diff --git a/io_uring/filetable.c b/io_uring/filetable.c index b80614e7d6051..4660cb89ea9f5 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -95,12 +95,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, needs_switch = true; } - ret = io_scm_file_account(ctx, file); - if (!ret) { - *io_get_tag_slot(ctx->file_data, slot_index) = 0; - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, slot_index); - } + *io_get_tag_slot(ctx->file_data, slot_index) = 0; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, slot_index); + return 0; err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cf7dd62da0e37..415248c1f82c6 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -60,7 +60,6 @@ #include #include #include -#include #include #include #include @@ -2628,12 +2627,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - ctx->ring_sock->file = NULL; /* so that iput() is called */ - sock_release(ctx->ring_sock); - } -#endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); if (ctx->mm_account) { @@ -3438,32 +3431,12 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this - * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, - * we have to tie this fd to a socket for file garbage collection purposes. + * fd to gain access to the SQ/CQ ring details. */ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { - struct file *file; -#if defined(CONFIG_UNIX) - int ret; - - ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, - &ctx->ring_sock); - if (ret) - return ERR_PTR(ret); -#endif - - file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC, NULL); -#if defined(CONFIG_UNIX) - if (IS_ERR(file)) { - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; - } else { - ctx->ring_sock->file = file; - } -#endif - return file; } static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7ada0339b3870..ac658cfa89c63 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -494,11 +494,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - err = io_scm_file_account(ctx, file); - if (err) { - fput(file); - break; - } *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); @@ -762,22 +757,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) for (i = 0; i < ctx->nr_user_files; i++) { struct file *file = io_file_from_index(&ctx->file_table, i); - /* skip scm accounted files, they'll be freed by ->ring_sock */ - if (!file || io_file_need_scm(file)) + if (!file) continue; io_file_bitmap_clear(&ctx->file_table, i); fput(file); } -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) - kfree_skb(skb); - } -#endif io_free_file_tables(&ctx->file_table); io_file_table_set_alloc_range(ctx, 0, 0); io_rsrc_data_free(ctx->file_data); @@ -805,134 +790,11 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx) return ret; } -/* - * Ensure the UNIX gc is aware of our file set, so we are certain that - * the io_uring can be safely unregistered on process exit, even if we have - * loops in the file referencing. We account only files that can hold other - * files because otherwise they can't form a loop and so are not interesting - * for GC. - */ -int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) -{ -#if defined(CONFIG_UNIX) - struct sock *sk = ctx->ring_sock->sk; - struct sk_buff_head *head = &sk->sk_receive_queue; - struct scm_fp_list *fpl; - struct sk_buff *skb; - - if (likely(!io_file_need_scm(file))) - return 0; - - /* - * See if we can merge this file into an existing skb SCM_RIGHTS - * file set. If there's no room, fall back to allocating a new skb - * and filling it in. - */ - spin_lock_irq(&head->lock); - skb = skb_peek(head); - if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) - __skb_unlink(skb, head); - else - skb = NULL; - spin_unlock_irq(&head->lock); - - if (!skb) { - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); - if (!fpl) - return -ENOMEM; - - skb = alloc_skb(0, GFP_KERNEL); - if (!skb) { - kfree(fpl); - return -ENOMEM; - } - - fpl->user = get_uid(current_user()); - fpl->max = SCM_MAX_FD; - fpl->count = 0; - - UNIXCB(skb).fp = fpl; - skb->sk = sk; - skb->scm_io_uring = 1; - skb->destructor = unix_destruct_scm; - refcount_add(skb->truesize, &sk->sk_wmem_alloc); - } - - fpl = UNIXCB(skb).fp; - fpl->fp[fpl->count++] = get_file(file); - unix_inflight(fpl->user, file); - skb_queue_head(head, skb); - fput(file); -#endif - return 0; -} - static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { struct file *file = prsrc->file; -#if defined(CONFIG_UNIX) - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff_head list, *head = &sock->sk_receive_queue; - struct sk_buff *skb; - int i; - - if (!io_file_need_scm(file)) { - fput(file); - return; - } - - __skb_queue_head_init(&list); - - /* - * Find the skb that holds this file in its SCM_RIGHTS. When found, - * remove this entry and rearrange the file array. - */ - skb = skb_dequeue(head); - while (skb) { - struct scm_fp_list *fp; - fp = UNIXCB(skb).fp; - for (i = 0; i < fp->count; i++) { - int left; - - if (fp->fp[i] != file) - continue; - - unix_notinflight(fp->user, fp->fp[i]); - left = fp->count - 1 - i; - if (left) { - memmove(&fp->fp[i], &fp->fp[i + 1], - left * sizeof(struct file *)); - } - fp->count--; - if (!fp->count) { - kfree_skb(skb); - skb = NULL; - } else { - __skb_queue_tail(&list, skb); - } - fput(file); - file = NULL; - break; - } - - if (!file) - break; - - __skb_queue_tail(&list, skb); - - skb = skb_dequeue(head); - } - - if (skb_peek(&list)) { - spin_lock_irq(&head->lock); - while ((skb = __skb_dequeue(&list)) != NULL) - __skb_queue_tail(head, skb); - spin_unlock_irq(&head->lock); - } -#else fput(file); -#endif } int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, @@ -986,21 +848,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; /* - * Don't allow io_uring instances to be registered. If UNIX - * isn't enabled, then this causes a reference cycle and this - * instance can never get freed. If UNIX is enabled we'll - * handle it just fine, but there's still no point in allowing - * a ring fd as it doesn't support regular read/write anyway. + * Don't allow io_uring instances to be registered. */ if (io_is_uring_fops(file)) { fput(file); goto fail; } - ret = io_scm_file_account(ctx, file); - if (ret) { - fput(file); - goto fail; - } file_slot = io_fixed_file_slot(&ctx->file_table, i); io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index acaf8dad05401..85f145607c620 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,21 +77,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx); int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args, u64 __user *tags); -int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); - -static inline bool io_file_need_scm(struct file *filp) -{ - return false; -} - -static inline int io_scm_file_account(struct io_ring_ctx *ctx, - struct file *file) -{ - if (likely(!io_file_need_scm(file))) - return 0; - return __io_scm_file_account(ctx, file); -} - int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args); int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, -- GitLab From 7762c2d4cc0b6a1c4682e7fd01f0586f028aeba4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 6 Jan 2023 10:33:47 -0500 Subject: [PATCH 0747/2290] nfsd: allow nfsd_file_get to sanely handle a NULL pointer [ Upstream commit 70f62231cdfd52357836733dd31db787e0412ab2 ] ...and remove some now-useless NULL pointer checks in its callers. Suggested-by: NeilBrown Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 5 ++--- fs/nfsd/nfs4state.c | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 697acf5c3c681..6e8712bd7c998 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -431,7 +431,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf) struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { - if (likely(refcount_inc_not_zero(&nf->nf_ref))) + if (nf && refcount_inc_not_zero(&nf->nf_ref)) return nf; return NULL; } @@ -1086,8 +1086,7 @@ retry: rcu_read_lock(); nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); - if (nf) - nf = nfsd_file_get(nf); + nf = nfsd_file_get(nf); rcu_read_unlock(); if (nf) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b9d694ec25d19..e4522e86e984e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -602,9 +602,7 @@ put_nfs4_file(struct nfs4_file *fi) static struct nfsd_file * __nfs4_get_fd(struct nfs4_file *f, int oflag) { - if (f->fi_fds[oflag]) - return nfsd_file_get(f->fi_fds[oflag]); - return NULL; + return nfsd_file_get(f->fi_fds[oflag]); } static struct nfsd_file * -- GitLab From 19d22c5ba5c9c7a7132e2d75d6fc0b5afaa6dd1e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 07:15:09 -0500 Subject: [PATCH 0748/2290] nfsd: don't open-code clear_and_wake_up_bit [ Upstream commit b8bea9f6cdd7236c7c2238d022145e9b2f8aac22 ] Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 6e8712bd7c998..5b5d39ec7b010 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1174,9 +1174,7 @@ open_file: status = nfserr_jukebox; if (status != nfs_ok) nfsd_file_unhash(nf); - clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); - smp_mb__after_atomic(); - wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); goto out; } -- GitLab From fab03e0db0c2e8bcb651ea42634f5449cedc705f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 6 Jan 2023 10:39:00 -0500 Subject: [PATCH 0749/2290] nfsd: NFSD_FILE_KEY_INODE only needs to find GC'ed entries [ Upstream commit 6c31e4c98853a4ba47355ea151b36a77c42b7734 ] Since v4 files are expected to be long-lived, there's little value in closing them out of the cache when there is conflicting access. Change the comparator to also match the gc value in the key. Change both of the current users of that key to set the gc value in the key to "true". Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 5b5d39ec7b010..c36e3032d4386 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -175,6 +175,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, switch (key->type) { case NFSD_FILE_KEY_INODE: + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) + return 1; if (nf->nf_inode != key->inode) return 1; break; @@ -695,6 +697,7 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, + .gc = true, }; struct nfsd_file *nf; @@ -1049,6 +1052,7 @@ nfsd_file_is_cached(struct inode *inode) struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, + .gc = true, }; bool ret = false; -- GitLab From 8a6c19f15766756acfad4f75c9d07fad0479a362 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 6 Jan 2023 10:39:01 -0500 Subject: [PATCH 0750/2290] nfsd: simplify test_bit return in NFSD_FILE_KEY_FULL comparator [ Upstream commit d69b8dbfd0866abc5ec84652cc1c10fc3d4d91ef ] test_bit returns bool, so we can just compare the result of that to the key->gc value without the "!!". Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index c36e3032d4386..568963b8a4777 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -189,7 +189,7 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, return 1; if (!nfsd_match_cred(nf->nf_cred, key->cred)) return 1; - if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) return 1; if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) return 1; -- GitLab From ee84c44b4a0a320f6c733cd3b33d4d43db7a35fc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 07:15:11 -0500 Subject: [PATCH 0751/2290] nfsd: don't kill nfsd_files because of lease break error [ Upstream commit c6593366c0bf222be9c7561354dfb921c611745e ] An error from break_lease is non-fatal, so we needn't destroy the nfsd_file in that case. Just put the reference like we normally would and return the error. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 568963b8a4777..ab37b85b72077 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1102,7 +1102,7 @@ retry: nf = nfsd_file_alloc(&key, may_flags); if (!nf) { status = nfserr_jukebox; - goto out_status; + goto out; } ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, @@ -1111,13 +1111,11 @@ retry: if (likely(ret == 0)) goto open_file; - nfsd_file_slab_free(&nf->nf_rcu); - nf = NULL; if (ret == -EEXIST) goto retry; trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); status = nfserr_jukebox; - goto out_status; + goto construction_err; wait_for_construction: wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); @@ -1127,29 +1125,25 @@ wait_for_construction: trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); if (!open_retry) { status = nfserr_jukebox; - goto out; + goto construction_err; } open_retry = false; - if (refcount_dec_and_test(&nf->nf_ref)) - nfsd_file_free(nf); goto retry; } - this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); + if (status != nfs_ok) { + nfsd_file_put(nf); + nf = NULL; + } + out: if (status == nfs_ok) { this_cpu_inc(nfsd_file_acquisitions); nfsd_file_check_write_error(nf); *pnf = nf; - } else { - if (refcount_dec_and_test(&nf->nf_ref)) - nfsd_file_free(nf); - nf = NULL; } - -out_status: put_cred(key.cred); trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; @@ -1179,6 +1173,13 @@ open_file: if (status != nfs_ok) nfsd_file_unhash(nf); clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); + if (status == nfs_ok) + goto out; + +construction_err: + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); + nf = NULL; goto out; } -- GitLab From 917dadb09e3114b9db2e22b84c7d84de937b3b33 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 07:15:12 -0500 Subject: [PATCH 0752/2290] nfsd: add some comments to nfsd_file_do_acquire [ Upstream commit b680cb9b737331aad271feebbedafb865504e234 ] David Howells mentioned that he found this bit of code confusing, so sprinkle in some comments to clarify. Reported-by: David Howells Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ab37b85b72077..50349449a4e52 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1094,6 +1094,11 @@ retry: rcu_read_unlock(); if (nf) { + /* + * If the nf is on the LRU then it holds an extra reference + * that must be put if it's removed. It had better not be + * the last one however, since we should hold another. + */ if (nfsd_file_lru_remove(nf)) WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; -- GitLab From c01b3f0fef71cdf1bedbd34b30304eaafe2dd97a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jan 2023 12:31:37 -0500 Subject: [PATCH 0753/2290] nfsd: don't take/put an extra reference when putting a file [ Upstream commit b2ff1bd71db2a1b193a6dde0845adcd69cbcf75e ] The last thing that filp_close does is an fput, so don't bother taking and putting the extra reference. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 50349449a4e52..51e2947c21a7d 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -382,10 +382,8 @@ nfsd_file_free(struct nfsd_file *nf) if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { - get_file(nf->nf_file); - filp_close(nf->nf_file, NULL); nfsd_file_check_write_error(nf); - fput(nf->nf_file); + filp_close(nf->nf_file, NULL); } /* -- GitLab From 7cc95476337220ebb5c108077c7294343b4f2e66 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 26 Jan 2023 12:21:16 -0500 Subject: [PATCH 0754/2290] nfsd: update comment over __nfsd_file_cache_purge [ Upstream commit 972cc0e0924598cb293b919d39c848dc038b2c28 ] Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 51e2947c21a7d..9b7082fdd2115 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -907,7 +907,8 @@ out_err: * @net: net-namespace to shut down the cache (may be NULL) * * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, - * then close out everything. Called when an nfsd instance is being shut down. + * then close out everything. Called when an nfsd instance is being shut down, + * and when the exports table is flushed. */ static void __nfsd_file_cache_purge(struct net *net) -- GitLab From f7ae480886873659b0ecff139a35adb173187b74 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 15 Feb 2023 06:53:54 -0500 Subject: [PATCH 0755/2290] nfsd: allow reaping files still under writeback [ Upstream commit dcb779fcd4ed5984ad15991d574943d12a8693d1 ] On most filesystems, there is no reason to delay reaping an nfsd_file just because its underlying inode is still under writeback. nfsd just relies on client activity or the local flusher threads to do writeback. The main exception is NFS, which flushes all of its dirty data on last close. Add a new EXPORT_OP_FLUSH_ON_CLOSE flag to allow filesystems to signal that they do this, and only skip closing files under writeback on such filesystems. Also, remove a redundant NULL file pointer check in nfsd_file_check_writeback, and clean up nfs's export op flag definitions. Signed-off-by: Jeff Layton Acked-by: Anna Schumaker [ cel: adjusted to apply to v6.1.y ] Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfs/export.c | 9 ++++++--- fs/nfsd/filecache.c | 12 +++++++++++- include/linux/exportfs.h | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 01596f2d0a1ed..9fe9586a51b71 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -156,7 +156,10 @@ const struct export_operations nfs_export_ops = { .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, .fetch_iversion = nfs_fetch_iversion, - .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| - EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| - EXPORT_OP_NOATOMIC_ATTR, + .flags = EXPORT_OP_NOWCC | + EXPORT_OP_NOSUBTREECHK | + EXPORT_OP_CLOSE_BEFORE_UNLINK | + EXPORT_OP_REMOTE_FS | + EXPORT_OP_NOATOMIC_ATTR | + EXPORT_OP_FLUSH_ON_CLOSE, }; diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 9b7082fdd2115..a6fa6e9802772 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -402,13 +402,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) struct file *file = nf->nf_file; struct address_space *mapping; - if (!file || !(file->f_mode & FMODE_WRITE)) + /* File not open for write? */ + if (!(file->f_mode & FMODE_WRITE)) return false; + + /* + * Some filesystems (e.g. NFS) flush all dirty data on close. + * On others, there is no need to wait for writeback. + */ + if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) + return false; + mapping = file->f_mapping; return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } + static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fe848901fcc3a..218fc5c54e901 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -221,6 +221,7 @@ struct export_operations { #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply atomic attribute updates */ +#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ unsigned long flags; }; -- GitLab From 0af5ee518165fb227a5cf30a414d284de93614ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 24 Nov 2022 15:09:04 -0500 Subject: [PATCH 0756/2290] NFSD: Convert filecache to rhltable [ Upstream commit c4c649ab413ba6a785b25f0edbb12f617c87db2a ] While we were converting the nfs4_file hashtable to use the kernel's resizable hashtable data structure, Neil Brown observed that the list variant (rhltable) would be better for managing nfsd_file items as well. The nfsd_file hash table will contain multiple entries for the same inode -- these should be kept together on a list. And, it could be possible for exotic or malicious client behavior to cause the hash table to resize itself on every insertion. A nice simplification is that rhltable_lookup() can return a list that contains only nfsd_file items that match a given inode, which enables us to eliminate specialized hash table helper functions and use the default functions provided by the rhashtable implementation). Since we are now storing nfsd_file items for the same inode on a single list, that effectively reduces the number of hash entries that have to be tracked in the hash table. The mininum bucket count is therefore lowered. Light testing with fstests generic/531 show no regressions. Suggested-by: Neil Brown Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 311 ++++++++++++++++++-------------------------- fs/nfsd/filecache.h | 9 +- 2 files changed, 133 insertions(+), 187 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a6fa6e9802772..2f0b2d964cbb1 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -74,70 +74,9 @@ static struct list_lru nfsd_file_lru; static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static struct delayed_work nfsd_filecache_laundrette; -static struct rhashtable nfsd_file_rhash_tbl +static struct rhltable nfsd_file_rhltable ____cacheline_aligned_in_smp; -enum nfsd_file_lookup_type { - NFSD_FILE_KEY_INODE, - NFSD_FILE_KEY_FULL, -}; - -struct nfsd_file_lookup_key { - struct inode *inode; - struct net *net; - const struct cred *cred; - unsigned char need; - bool gc; - enum nfsd_file_lookup_type type; -}; - -/* - * The returned hash value is based solely on the address of an in-code - * inode, a pointer to a slab-allocated object. The entropy in such a - * pointer is concentrated in its middle bits. - */ -static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) -{ - unsigned long ptr = (unsigned long)inode; - u32 k; - - k = ptr >> L1_CACHE_SHIFT; - k &= 0x00ffffff; - return jhash2(&k, 1, seed); -} - -/** - * nfsd_file_key_hashfn - Compute the hash value of a lookup key - * @data: key on which to compute the hash value - * @len: rhash table's key_len parameter (unused) - * @seed: rhash table's random seed of the day - * - * Return value: - * Computed 32-bit hash value - */ -static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) -{ - const struct nfsd_file_lookup_key *key = data; - - return nfsd_file_inode_hash(key->inode, seed); -} - -/** - * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file - * @data: object on which to compute the hash value - * @len: rhash table's key_len parameter (unused) - * @seed: rhash table's random seed of the day - * - * Return value: - * Computed 32-bit hash value - */ -static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) -{ - const struct nfsd_file *nf = data; - - return nfsd_file_inode_hash(nf->nf_inode, seed); -} - static bool nfsd_match_cred(const struct cred *c1, const struct cred *c2) { @@ -158,55 +97,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2) return true; } -/** - * nfsd_file_obj_cmpfn - Match a cache item against search criteria - * @arg: search criteria - * @ptr: cache item to check - * - * Return values: - * %0 - Item matches search criteria - * %1 - Item does not match search criteria - */ -static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, - const void *ptr) -{ - const struct nfsd_file_lookup_key *key = arg->key; - const struct nfsd_file *nf = ptr; - - switch (key->type) { - case NFSD_FILE_KEY_INODE: - if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) - return 1; - if (nf->nf_inode != key->inode) - return 1; - break; - case NFSD_FILE_KEY_FULL: - if (nf->nf_inode != key->inode) - return 1; - if (nf->nf_may != key->need) - return 1; - if (nf->nf_net != key->net) - return 1; - if (!nfsd_match_cred(nf->nf_cred, key->cred)) - return 1; - if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) - return 1; - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) - return 1; - break; - } - return 0; -} - static const struct rhashtable_params nfsd_file_rhash_params = { .key_len = sizeof_field(struct nfsd_file, nf_inode), .key_offset = offsetof(struct nfsd_file, nf_inode), - .head_offset = offsetof(struct nfsd_file, nf_rhash), - .hashfn = nfsd_file_key_hashfn, - .obj_hashfn = nfsd_file_obj_hashfn, - .obj_cmpfn = nfsd_file_obj_cmpfn, - /* Reduce resizing churn on light workloads */ - .min_size = 512, /* buckets */ + .head_offset = offsetof(struct nfsd_file, nf_rlist), + + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, .automatic_shrinking = true, }; @@ -309,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) } static struct nfsd_file * -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) +nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, + bool want_gc) { struct nfsd_file *nf; nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); - if (nf) { - INIT_LIST_HEAD(&nf->nf_lru); - nf->nf_birthtime = ktime_get(); - nf->nf_file = NULL; - nf->nf_cred = get_current_cred(); - nf->nf_net = key->net; - nf->nf_flags = 0; - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - if (key->gc) - __set_bit(NFSD_FILE_GC, &nf->nf_flags); - nf->nf_inode = key->inode; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = key->need; - nf->nf_mark = NULL; - } + if (unlikely(!nf)) + return NULL; + + INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_birthtime = ktime_get(); + nf->nf_file = NULL; + nf->nf_cred = get_current_cred(); + nf->nf_net = net; + nf->nf_flags = want_gc ? + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); + nf->nf_inode = inode; + refcount_set(&nf->nf_ref, 1); + nf->nf_may = need; + nf->nf_mark = NULL; return nf; } @@ -354,8 +254,8 @@ static void nfsd_file_hash_remove(struct nfsd_file *nf) { trace_nfsd_file_unhash(nf); - rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, - nfsd_file_rhash_params); + rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, + nfsd_file_rhash_params); } static bool @@ -688,8 +588,8 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) * @inode: inode on which to close out nfsd_files * @dispose: list on which to gather nfsd_files to close out * - * An nfsd_file represents a struct file being held open on behalf of nfsd. An - * open file however can block other activity (such as leases), or cause + * An nfsd_file represents a struct file being held open on behalf of nfsd. + * An open file however can block other activity (such as leases), or cause * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). * * This function is intended to find open nfsd_files when this sort of @@ -702,21 +602,17 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) static void nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_INODE, - .inode = inode, - .gc = true, - }; + struct rhlist_head *tmp, *list; struct nfsd_file *nf; rcu_read_lock(); - do { - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params); - if (!nf) - break; + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { + if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) + continue; nfsd_file_cond_queue(nf, dispose); - } while (1); + } rcu_read_unlock(); } @@ -840,7 +736,7 @@ nfsd_file_cache_init(void) if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; - ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); + ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); if (ret) return ret; @@ -908,7 +804,7 @@ out_err: nfsd_file_mark_slab = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; - rhashtable_destroy(&nfsd_file_rhash_tbl); + rhltable_destroy(&nfsd_file_rhltable); goto out; } @@ -927,7 +823,7 @@ __nfsd_file_cache_purge(struct net *net) struct nfsd_file *nf; LIST_HEAD(dispose); - rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); + rhltable_walk_enter(&nfsd_file_rhltable, &iter); do { rhashtable_walk_start(&iter); @@ -1033,7 +929,7 @@ nfsd_file_cache_shutdown(void) nfsd_file_mark_slab = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; - rhashtable_destroy(&nfsd_file_rhash_tbl); + rhltable_destroy(&nfsd_file_rhltable); for_each_possible_cpu(i) { per_cpu(nfsd_file_cache_hits, i) = 0; @@ -1044,6 +940,35 @@ nfsd_file_cache_shutdown(void) } } +static struct nfsd_file * +nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, + struct inode *inode, unsigned char need, + bool want_gc) +{ + struct rhlist_head *tmp, *list; + struct nfsd_file *nf; + + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { + if (nf->nf_may != need) + continue; + if (nf->nf_net != net) + continue; + if (!nfsd_match_cred(nf->nf_cred, cred)) + continue; + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) + continue; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + continue; + + if (!nfsd_file_get(nf)) + continue; + return nf; + } + return NULL; +} + /** * nfsd_file_is_cached - are there any cached open files for this inode? * @inode: inode to check @@ -1058,16 +983,20 @@ nfsd_file_cache_shutdown(void) bool nfsd_file_is_cached(struct inode *inode) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_INODE, - .inode = inode, - .gc = true, - }; + struct rhlist_head *tmp, *list; + struct nfsd_file *nf; bool ret = false; - if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params) != NULL) - ret = true; + rcu_read_lock(); + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) + if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { + ret = true; + break; + } + rcu_read_unlock(); + trace_nfsd_file_is_cached(inode, (int)ret); return ret; } @@ -1077,14 +1006,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **pnf, bool want_gc) { - struct nfsd_file_lookup_key key = { - .type = NFSD_FILE_KEY_FULL, - .need = may_flags & NFSD_FILE_MAY_MASK, - .net = SVC_NET(rqstp), - .gc = want_gc, - }; + unsigned char need = may_flags & NFSD_FILE_MAY_MASK; + struct net *net = SVC_NET(rqstp); + struct nfsd_file *new, *nf; + const struct cred *cred; bool open_retry = true; - struct nfsd_file *nf; + struct inode *inode; __be32 status; int ret; @@ -1092,14 +1019,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; - key.inode = d_inode(fhp->fh_dentry); - key.cred = get_current_cred(); + inode = d_inode(fhp->fh_dentry); + cred = get_current_cred(); retry: rcu_read_lock(); - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, - nfsd_file_rhash_params); - nf = nfsd_file_get(nf); + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); rcu_read_unlock(); if (nf) { @@ -1113,21 +1038,32 @@ retry: goto wait_for_construction; } - nf = nfsd_file_alloc(&key, may_flags); - if (!nf) { + new = nfsd_file_alloc(net, inode, need, want_gc); + if (!new) { status = nfserr_jukebox; goto out; } - ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, - &key, &nf->nf_rhash, - nfsd_file_rhash_params); + rcu_read_lock(); + spin_lock(&inode->i_lock); + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); + if (unlikely(nf)) { + spin_unlock(&inode->i_lock); + rcu_read_unlock(); + nfsd_file_slab_free(&new->nf_rcu); + goto wait_for_construction; + } + nf = new; + ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, + nfsd_file_rhash_params); + spin_unlock(&inode->i_lock); + rcu_read_unlock(); if (likely(ret == 0)) goto open_file; if (ret == -EEXIST) goto retry; - trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); + trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); status = nfserr_jukebox; goto construction_err; @@ -1136,7 +1072,7 @@ wait_for_construction: /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); + trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); if (!open_retry) { status = nfserr_jukebox; goto construction_err; @@ -1158,13 +1094,13 @@ out: nfsd_file_check_write_error(nf); *pnf = nf; } - put_cred(key.cred); - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + put_cred(cred); + trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); + nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); if (nf->nf_mark) { if (file) { get_file(file); @@ -1182,7 +1118,7 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status == nfs_ok && key.inode->i_nlink == 0) + if (status != nfs_ok || inode->i_nlink == 0) status = nfserr_jukebox; if (status != nfs_ok) nfsd_file_unhash(nf); @@ -1209,8 +1145,11 @@ construction_err: * seconds after the final nfsd_file_put() in case the caller * wants to re-use it. * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1230,8 +1169,11 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, * but not garbage-collected. The object is unhashed after the * final nfsd_file_put(). * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1252,8 +1194,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * and @file is non-NULL, use it to instantiate a new nfsd_file instead of * opening a new one. * - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in - * network byte order is returned. + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. */ __be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -1284,7 +1229,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) lru = list_lru_count(&nfsd_file_lru); rcu_read_lock(); - ht = &nfsd_file_rhash_tbl; + ht = &nfsd_file_rhltable.ht; count = atomic_read(&ht->nelems); tbl = rht_dereference_rcu(ht->tbl, ht); buckets = tbl->size; @@ -1300,7 +1245,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) evictions += per_cpu(nfsd_file_evictions, i); } - seq_printf(m, "total entries: %u\n", count); + seq_printf(m, "total inodes: %u\n", count); seq_printf(m, "hash buckets: %u\n", buckets); seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 41516a4263ea5..e54165a3224f0 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -29,9 +29,8 @@ struct nfsd_file_mark { * never be dereferenced, only used for comparison. */ struct nfsd_file { - struct rhash_head nf_rhash; - struct list_head nf_lru; - struct rcu_head nf_rcu; + struct rhlist_head nf_rlist; + void *nf_inode; struct file *nf_file; const struct cred *nf_cred; struct net *nf_net; @@ -40,10 +39,12 @@ struct nfsd_file { #define NFSD_FILE_REFERENCED (2) #define NFSD_FILE_GC (3) unsigned long nf_flags; - struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; unsigned char nf_may; + struct nfsd_file_mark *nf_mark; + struct list_head nf_lru; + struct rcu_head nf_rcu; ktime_t nf_birthtime; }; -- GitLab From 448f1dcd6240e3ab4c78d4052167fd33e7c824c7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Apr 2023 17:31:44 -0400 Subject: [PATCH 0757/2290] nfsd: simplify the delayed disposal list code [ Upstream commit 92e4a6733f922f0fef1d0995f7b2d0eaff86c7ea ] When queueing a dispose list to the appropriate "freeme" lists, it pointlessly queues the objects one at a time to an intermediate list. Remove a few helpers and just open code a list_move to make it more clear and efficient. Better document the resulting functions with kerneldoc comments. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 64 ++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 2f0b2d964cbb1..f40d8f3b35a4c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -402,49 +402,26 @@ nfsd_file_dispose_list(struct list_head *dispose) } } -static void -nfsd_file_list_remove_disposal(struct list_head *dst, - struct nfsd_fcache_disposal *l) -{ - spin_lock(&l->lock); - list_splice_init(&l->freeme, dst); - spin_unlock(&l->lock); -} - -static void -nfsd_file_list_add_disposal(struct list_head *files, struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct nfsd_fcache_disposal *l = nn->fcache_disposal; - - spin_lock(&l->lock); - list_splice_tail_init(files, &l->freeme); - spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); -} - -static void -nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, - struct net *net) -{ - struct nfsd_file *nf, *tmp; - - list_for_each_entry_safe(nf, tmp, src, nf_lru) { - if (nf->nf_net == net) - list_move_tail(&nf->nf_lru, dst); - } -} - +/** + * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list + * @dispose: list of nfsd_files to be disposed + * + * Transfers each file to the "freeme" list for its nfsd_net, to eventually + * be disposed of by the per-net garbage collector. + */ static void nfsd_file_dispose_list_delayed(struct list_head *dispose) { - LIST_HEAD(list); - struct nfsd_file *nf; - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); - nfsd_file_list_add_disposal(&list, nf->nf_net); + struct nfsd_file *nf = list_first_entry(dispose, + struct nfsd_file, nf_lru); + struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; + + spin_lock(&l->lock); + list_move_tail(&nf->nf_lru, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); } } @@ -665,8 +642,8 @@ nfsd_file_close_inode_sync(struct inode *inode) * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and destroy any entries that have not been used since - * the last scan. + * Scrape the freeme list for this nfsd_net, and then dispose of them + * all. */ static void nfsd_file_delayed_close(struct work_struct *work) @@ -675,7 +652,10 @@ nfsd_file_delayed_close(struct work_struct *work) struct nfsd_fcache_disposal *l = container_of(work, struct nfsd_fcache_disposal, work); - nfsd_file_list_remove_disposal(&head, l); + spin_lock(&l->lock); + list_splice_init(&l->freeme, &head); + spin_unlock(&l->lock); + nfsd_file_dispose_list(&head); } -- GitLab From 37085bbd92b3e7c31309c85d1a8273aa0ed213f5 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 19 Apr 2023 10:53:18 -0700 Subject: [PATCH 0758/2290] NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop [ Upstream commit 147abcacee33781e75588869e944ddb07528a897 ] The following request sequence to the same file causes the NFS client and server getting into an infinite loop with COMMIT and NFS4ERR_DELAY: OPEN REMOVE WRITE COMMIT Problem reported by recall11, recall12, recall14, recall20, recall22, recall40, recall42, recall48, recall50 of nfstest suite. This patch restores the handling of race condition in nfsd_file_do_acquire with unlink to that prior of the regression. Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache") Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f40d8f3b35a4c..ee9c923192e08 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1099,8 +1099,6 @@ open_file: * then unhash. */ if (status != nfs_ok || inode->i_nlink == 0) - status = nfserr_jukebox; - if (status != nfs_ok) nfsd_file_unhash(nf); clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); if (status == nfs_ok) -- GitLab From 96e18f236178a5d444a15547413b4e7101da611a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:13:39 -0400 Subject: [PATCH 0759/2290] NFSD: Add an nfsd4_encode_nfstime4() helper [ Upstream commit 262176798b18b12fd8ab84c94cfece0a6a652476 ] Clean up: de-duplicate some common code. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 597f14a80512f..514f4456cf5c6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, return p; } +static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, + struct timespec64 *tv) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 3); + if (!p) + return nfserr_resource; + + p = xdr_encode_hyper(p, (s64)tv->tv_sec); + *p = cpu_to_be32(tv->tv_nsec); + return nfs_ok; +} + /* * ctime (in NFSv4, time_metadata) is not writeable, and the client * doesn't really care what resolution could theoretically be stored by @@ -3346,11 +3360,9 @@ out_acl: p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); - *p++ = cpu_to_be32(stat.atime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.atime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); @@ -3359,25 +3371,19 @@ out_acl: p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); - *p++ = cpu_to_be32(stat.ctime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.ctime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); - *p++ = cpu_to_be32(stat.mtime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.mtime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec); - *p++ = cpu_to_be32(stat.btime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.btime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { u64 ino = stat.ino; -- GitLab From 806a0a1819babb6defff385c8d74bf82e0604dec Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Fri, 23 Jun 2023 17:09:06 -0400 Subject: [PATCH 0760/2290] nfsd: Fix creation time serialization order In nfsd4_encode_fattr(), TIME_CREATE was being written out after all other times. However, they should be written out in an order that matches the bit flags in bmval1, which in this case are #define FATTR4_WORD1_TIME_ACCESS (1UL << 15) #define FATTR4_WORD1_TIME_CREATE (1UL << 18) #define FATTR4_WORD1_TIME_DELTA (1UL << 19) #define FATTR4_WORD1_TIME_METADATA (1UL << 20) #define FATTR4_WORD1_TIME_MODIFY (1UL << 21) so TIME_CREATE should come second. I noticed this on a FreeBSD NFSv4.2 client, which supports creation times. On this client, file times were weirdly permuted. With this patch applied on the server, times looked normal on the client. Fixes: e377a3e698fb ("nfsd: Add support for the birth time attribute") Link: https://unix.stackexchange.com/q/749605/56202 Signed-off-by: Tavian Barnes Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4xdr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 514f4456cf5c6..4ed9fef14adc2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3364,6 +3364,11 @@ out_acl: if (status) goto out; } + if (bmval1 & FATTR4_WORD1_TIME_CREATE) { + status = nfsd4_encode_nfstime4(xdr, &stat.btime); + if (status) + goto out; + } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); if (!p) @@ -3380,11 +3385,6 @@ out_acl: if (status) goto out; } - if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - status = nfsd4_encode_nfstime4(xdr, &stat.btime); - if (status) - goto out; - } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { u64 ino = stat.ino; -- GitLab From abd34206f396d3ae50cddbd5aa840b8cd7f68c63 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 18 Dec 2023 08:54:01 +0100 Subject: [PATCH 0761/2290] media: rkisp1: Fix IRQ handling due to shared interrupts [ Upstream commit ffb635bb398fc07cb38f8a7b4a82cbe5f412f08e ] The driver requests the interrupts as IRQF_SHARED, so the interrupt handlers can be called at any time. If such a call happens while the ISP is powered down, the SoC will hang as the driver tries to access the ISP registers. This can be reproduced even without the platform sharing the IRQ line: Enable CONFIG_DEBUG_SHIRQ and unload the driver, and the board will hang. Fix this by adding a new field, 'irqs_enabled', which is used to bail out from the interrupt handler when the ISP is not operational. Link: https://lore.kernel.org/r/20231218-rkisp-shirq-fix-v1-2-173007628248@ideasonboard.com Signed-off-by: Tomi Valkeinen Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- .../platform/rockchip/rkisp1/rkisp1-capture.c | 3 +++ .../platform/rockchip/rkisp1/rkisp1-common.h | 2 ++ .../platform/rockchip/rkisp1/rkisp1-csi.c | 3 +++ .../platform/rockchip/rkisp1/rkisp1-dev.c | 22 +++++++++++++++++++ .../platform/rockchip/rkisp1/rkisp1-isp.c | 3 +++ 5 files changed, 33 insertions(+) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c index d4540684ea9af..0bcb9db5ad190 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c @@ -701,6 +701,9 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx) unsigned int i; u32 status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h index f9ec1c6138947..5776292f914a4 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h @@ -467,6 +467,7 @@ struct rkisp1_debug { * @debug: debug params to be exposed on debugfs * @info: version-specific ISP information * @irqs: IRQ line numbers + * @irqs_enabled: the hardware is enabled and can cause interrupts */ struct rkisp1_device { void __iomem *base_addr; @@ -488,6 +489,7 @@ struct rkisp1_device { struct rkisp1_debug debug; const struct rkisp1_info *info; int irqs[RKISP1_NUM_IRQS]; + bool irqs_enabled; }; /* diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c index e862f515cc6d3..95b6e41c48ec2 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c @@ -211,6 +211,9 @@ irqreturn_t rkisp1_csi_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 val, status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index 41abb18b00acb..7a3b69ba51b97 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -305,6 +305,24 @@ static int __maybe_unused rkisp1_runtime_suspend(struct device *dev) { struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); + rkisp1->irqs_enabled = false; + /* Make sure the IRQ handler will see the above */ + mb(); + + /* + * Wait until any running IRQ handler has returned. The IRQ handler + * may get called even after this (as it's a shared interrupt line) + * but the 'irqs_enabled' flag will make the handler return immediately. + */ + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) { + if (rkisp1->irqs[il] == -1) + continue; + + /* Skip if the irq line is the same as previous */ + if (il == 0 || rkisp1->irqs[il - 1] != rkisp1->irqs[il]) + synchronize_irq(rkisp1->irqs[il]); + } + clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks); return pinctrl_pm_select_sleep_state(dev); } @@ -321,6 +339,10 @@ static int __maybe_unused rkisp1_runtime_resume(struct device *dev) if (ret) return ret; + rkisp1->irqs_enabled = true; + /* Make sure the IRQ handler will see the above */ + mb(); + return 0; } diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c index 00dca284c1222..2af5c1a48070b 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c @@ -1023,6 +1023,9 @@ irqreturn_t rkisp1_isp_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 status, isp_err; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS); if (!status) return IRQ_NONE; -- GitLab From d23425dab9975e3abff49fd416856c020dd90dd1 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Fri, 9 Feb 2024 17:11:09 +0000 Subject: [PATCH 0762/2290] perf/arm-cmn: Workaround AmpereOneX errata AC04_MESH_1 (incorrect child count) [ Upstream commit 50572064ec7109b00eef8880e905f55861c8b3de ] AmpereOneX mesh implementation has a bug in HN-P nodes that makes them report incorrect child count. The failing crosspoints report 8 children while they only have two. When the driver tries to access the inexistent child nodes, it believes it has reached an invalid node type and probing fails. The workaround is to ignore those incorrect child nodes and continue normally. Signed-off-by: Ilkka Koskinen [ rm: rewrote simpler generalised version ] Tested-by: Ilkka Koskinen Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ce4b1442135fe03d0de41859b04b268c88c854a3.1707498577.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm-cmn.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 47e7c3206939f..899e4ed49905c 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2178,6 +2178,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); continue; } + /* + * AmpereOneX erratum AC04_MESH_1 makes some XPs report a bogus + * child count larger than the number of valid child pointers. + * A child offset of 0 can only occur on CMN-600; otherwise it + * would imply the root node being its own grandchild, which + * we can safely dismiss in general. + */ + if (reg == 0 && cmn->part != PART_CMN600) { + dev_dbg(cmn->dev, "bogus child pointer?\n"); + continue; + } arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); -- GitLab From c4b96f7eaba0fbbf948b5b1c13eecd614b3582e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:23 -0800 Subject: [PATCH 0763/2290] selftests: tls: use exact comparison in recv_partial [ Upstream commit 49d821064c44cb5ffdf272905236012ea9ce50e3 ] This exact case was fail for async crypto and we weren't catching it. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/tls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5b80fb155d549..d89ee6e1926c7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -926,12 +926,12 @@ TEST_F(tls, recv_partial) memset(recv_mem, 0, sizeof(recv_mem)); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first), + MSG_WAITALL), strlen(test_str_first)); EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); memset(recv_mem, 0, sizeof(recv_mem)); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second), + MSG_WAITALL), strlen(test_str_second)); EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 0); } -- GitLab From aa9e9c776442da3c79a08fa3f68512959ab5e990 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 11 Feb 2024 22:27:35 +0100 Subject: [PATCH 0764/2290] ASoC: rt5645: Make LattePanda board DMI match more precise [ Upstream commit 551539a8606e28cb2a130f8ef3e9834235b456c4 ] The DMI strings used for the LattePanda board DMI quirks are very generic. Using the dmidecode database from https://linux-hardware.org/ shows that the chosen DMI strings also match the following 2 laptops which also have a rt5645 codec: Insignia NS-P11W7100 https://linux-hardware.org/?computer=E092FFF8BA04 Insignia NS-P10W8100 https://linux-hardware.org/?computer=AFB6C0BF7934 All 4 hw revisions of the LattePanda board have "S70CR" in their BIOS version DMI strings: DF-BI-7-S70CR100-* DF-BI-7-S70CR110-* DF-BI-7-S70CR200-* LP-BS-7-S70CR700-* See e.g. https://linux-hardware.org/?computer=D98250A817C0 Add a partial (non exact) DMI match on this string to make the LattePanda board DMI match more precise to avoid false-positive matches. Signed-off-by: Hans de Goede Link: https://msgid.link/r/20240211212736.179605-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5645.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 844d14d4c9a51..aac9140749968 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3802,6 +3802,16 @@ static const struct dmi_system_id dmi_platform_data[] = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"), + /* + * Above strings are too generic, LattePanda BIOS versions for + * all 4 hw revisions are: + * DF-BI-7-S70CR100-* + * DF-BI-7-S70CR110-* + * DF-BI-7-S70CR200-* + * LP-BS-7-S70CR700-* + * Do a partial match for S70CR to avoid false positive matches. + */ + DMI_MATCH(DMI_BIOS_VERSION, "S70CR"), }, .driver_data = (void *)&lattepanda_board_platform_data, }, -- GitLab From 7056108e01779e9188f37c22968e131f188bc6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20T=C5=91k=C3=A9s?= Date: Sat, 10 Feb 2024 21:36:38 +0200 Subject: [PATCH 0765/2290] ASoC: amd: yc: Fix non-functional mic on Lenovo 82UU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f7fe85b229bc30cb5dc95b4e9015a601c9e3a8cd ] Like many other models, the Lenovo 82UU (Yoga Slim 7 Pro 14ARH7) needs a quirk entry for the internal microphone to function. Signed-off-by: Attila Tőkés Link: https://msgid.link/r/20240210193638.144028-1-attitokes@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 28da4e1858d7e..5921af7fd92c5 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -227,6 +227,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82UU"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From f49c513f46dc19bf01ffad2aaaf234d7f37f6799 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 19 Jan 2024 17:49:48 +0800 Subject: [PATCH 0766/2290] x86/xen: Add some null pointer checking to smp.c [ Upstream commit 3693bb4465e6e32a204a5b86d3ec7e6b9f7e67c2 ] kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401161119.iof6BQsf-lkp@intel.com/ Suggested-by: Markus Elfring Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240119094948.275390-1-chentao@kylinos.cn Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/smp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4b0d6fff88de5..1fb9a1644d944 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu) char *resched_name, *callfunc_name, *debug_name; resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); + if (!resched_name) + goto fail_mem; per_cpu(xen_resched_irq, cpu).name = resched_name; rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu, @@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu) per_cpu(xen_resched_irq, cpu).irq = rc; callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); + if (!callfunc_name) + goto fail_mem; per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, cpu, @@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu) if (!xen_fifo_events) { debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); + if (!debug_name) + goto fail_mem; + per_cpu(xen_debug_irq, cpu).name = debug_name; rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, @@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu) } callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); + if (!callfunc_name) + goto fail_mem; + per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, cpu, @@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu) return 0; + fail_mem: + rc = -ENOMEM; fail: xen_smp_intr_free(cpu); return rc; -- GitLab From 9a07188311af2d978e84cb91ff7e54eda01f2876 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 2 Feb 2024 12:30:27 +0000 Subject: [PATCH 0767/2290] MIPS: Clear Cause.BD in instruction_pointer_set [ Upstream commit 9d6e21ddf20293b3880ae55b9d14de91c5891c59 ] Clear Cause.BD after we use instruction_pointer_set to override EPC. This can prevent exception_epc check against instruction code at new return address. It won't be considered as "in delay slot" after epc being overridden anyway. Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/include/asm/ptrace.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index daf3cf244ea97..b3e4dd6be7e20 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -60,6 +60,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->cp0_epc = val; + regs->cp0_cause &= ~CAUSEF_BD; } /* Query offset/name of register from its name/offset */ -- GitLab From d1614e1fd6c363549311c45a38c321f1c5be5774 Mon Sep 17 00:00:00 2001 From: Manuel Fombuena Date: Sun, 11 Feb 2024 19:04:29 +0000 Subject: [PATCH 0768/2290] HID: multitouch: Add required quirk for Synaptics 0xcddc device [ Upstream commit 1741a8269e1c51fa08d4bfdf34667387a6eb10ec ] Add support for the pointing stick (Accupoint) and 2 mouse buttons. Present on some Toshiba/dynabook Portege X30 and X40 laptops. It should close https://bugzilla.kernel.org/show_bug.cgi?id=205817 Signed-off-by: Manuel Fombuena Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 5ec1f174127a3..3816fd06bc953 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2153,6 +2153,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xcd7e) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xcddc) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, -- GitLab From 6ce8cc5e6251229d5239d141d0e5102a0ffa1c7f Mon Sep 17 00:00:00 2001 From: Andrew Ballance Date: Tue, 13 Feb 2024 19:23:05 -0600 Subject: [PATCH 0769/2290] gen_compile_commands: fix invalid escape sequence warning [ Upstream commit dae4a0171e25884787da32823b3081b4c2acebb2 ] With python 3.12, '\#' results in this warning SyntaxWarning: invalid escape sequence '\#' Signed-off-by: Andrew Ballance Reviewed-by: Justin Stitt Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/clang-tools/gen_compile_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index d800b2c0af977..4f414ab706bd8 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -170,7 +170,7 @@ def process_line(root_directory, command_prefix, file_path): # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the # kernel version). The compile_commands.json file is not interepreted # by Make, so this code replaces the escaped version with '#'. - prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') + prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#') # Use os.path.abspath() to normalize the path resolving '.' and '..' . abs_path = os.path.abspath(os.path.join(root_directory, file_path)) -- GitLab From 0e5b11ff7354021b77c1d4f8b5976c2f491d11c8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 18:24:38 +0000 Subject: [PATCH 0770/2290] arm64/sve: Lower the maximum allocation for the SVE ptrace regset [ Upstream commit 2813926261e436d33bc74486b51cce60b76edf78 ] Doug Anderson observed that ChromeOS crashes are being reported which include failing allocations of order 7 during core dumps due to ptrace allocating storage for regsets: chrome: page allocation failure: order:7, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=urgent,mems_allowed=0 ... regset_get_alloc+0x1c/0x28 elf_core_dump+0x3d8/0xd8c do_coredump+0xeb8/0x1378 with further investigation showing that this is: [ 66.957385] DOUG: Allocating 279584 bytes which is the maximum size of the SVE regset. As Doug observes it is not entirely surprising that such a large allocation of contiguous memory might fail on a long running system. The SVE regset is currently sized to hold SVE registers with a VQ of SVE_VQ_MAX which is 512, substantially more than the architectural maximum of 16 which we might see even in a system emulating the limits of the architecture. Since we don't expose the size we tell the regset core externally let's define ARCH_SVE_VQ_MAX with the actual architectural maximum and use that for the regset, we'll still overallocate most of the time but much less so which will be helpful even if the core is fixed to not require contiguous allocations. Specify ARCH_SVE_VQ_MAX in terms of the maximum value that can be written into ZCR_ELx.LEN (where this is set in the hardware). For consistency update the maximum SME vector length to be specified in the same style while we are at it. We could also teach the ptrace core about runtime discoverable regset sizes but that would be a more invasive change and this is being observed in practical systems. Reported-by: Doug Anderson Signed-off-by: Mark Brown Tested-by: Douglas Anderson Link: https://lore.kernel.org/r/20240213-arm64-sve-ptrace-regset-size-v2-1-c7600ca74b9b@kernel.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/fpsimd.h | 12 ++++++------ arch/arm64/kernel/ptrace.c | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index da18413712c04..930b0e6c94622 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -36,13 +36,13 @@ * When we defined the maximum SVE vector length we defined the ABI so * that the maximum vector length included all the reserved for future * expansion bits in ZCR rather than those just currently defined by - * the architecture. While SME follows a similar pattern the fact that - * it includes a square matrix means that any allocations that attempt - * to cover the maximum potential vector length (such as happen with - * the regset used for ptrace) end up being extremely large. Define - * the much lower actual limit for use in such situations. + * the architecture. Using this length to allocate worst size buffers + * results in excessively large allocations, and this effect is even + * more pronounced for SME due to ZA. Define more suitable VLs for + * these situations. */ -#define SME_VQ_MAX 16 +#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1) +#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1) struct task_struct; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e1f6366b7ccdf..d02dd2be17b3b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1450,7 +1450,8 @@ static const struct user_regset aarch64_regsets[] = { #ifdef CONFIG_ARM64_SVE [REGSET_SVE] = { /* Scalable Vector Extension */ .core_note_type = NT_ARM_SVE, - .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX, + SVE_PT_REGS_SVE), SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, -- GitLab From 6d6aa6c0bfd653890372360c52f0ca8a566c8b3d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Jan 2024 16:04:14 +0100 Subject: [PATCH 0771/2290] soc: microchip: Fix POLARFIRE_SOC_SYS_CTRL input prompt [ Upstream commit 6dd9a236042e305d7b69ee92db7347bf5943e7d3 ] The symbol's prompt should be a one-line description, instead of just duplicating the symbol name. Signed-off-by: Geert Uytterhoeven Signed-off-by: Conor Dooley Signed-off-by: Sasha Levin --- drivers/soc/microchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig index eb656b33156ba..f19e74d342aa2 100644 --- a/drivers/soc/microchip/Kconfig +++ b/drivers/soc/microchip/Kconfig @@ -1,5 +1,5 @@ config POLARFIRE_SOC_SYS_CTRL - tristate "POLARFIRE_SOC_SYS_CTRL" + tristate "Microchip PolarFire SoC (MPFS) system controller support" depends on POLARFIRE_SOC_MAILBOX help This driver adds support for the PolarFire SoC (MPFS) system controller. -- GitLab From cad82f1671e41094acd3b9a60cd27d67a3c64a21 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Jan 2024 11:29:11 +0200 Subject: [PATCH 0772/2290] RDMA/mlx5: Fix fortify source warning while accessing Eth segment [ Upstream commit 4d5e86a56615cc387d21c629f9af8fb0e958d350 ] ------------[ cut here ]------------ memcpy: detected field-spanning write (size 56) of single field "eseg->inline_hdr.start" at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 (size 2) WARNING: CPU: 0 PID: 293779 at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Modules linked in: 8021q garp mrp stp llc rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) ib_core(OE) mlx5_core(OE) pci_hyperv_intf mlxdevm(OE) mlx_compat(OE) tls mlxfw(OE) psample nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink mst_pciconf(OE) knem(OE) vfio_pci vfio_pci_core vfio_iommu_type1 vfio iommufd irqbypass cuse nfsv3 nfs fscache netfs xfrm_user xfrm_algo ipmi_devintf ipmi_msghandler binfmt_misc crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic ghash_clmulni_intel sha512_ssse3 snd_pcsp aesni_intel crypto_simd cryptd snd_pcm snd_timer joydev snd soundcore input_leds serio_raw evbug nfsd auth_rpcgss nfs_acl lockd grace sch_fq_codel sunrpc drm efi_pstore ip_tables x_tables autofs4 psmouse virtio_net net_failover failover floppy [last unloaded: mlx_compat(OE)] CPU: 0 PID: 293779 Comm: ssh Tainted: G OE 6.2.0-32-generic #32~22.04.1-Ubuntu Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Code: 0c 01 00 a8 01 75 25 48 8b 75 a0 b9 02 00 00 00 48 c7 c2 10 5b fd c0 48 c7 c7 80 5b fd c0 c6 05 57 0c 03 00 01 e8 95 4d 93 da <0f> 0b 44 8b 4d b0 4c 8b 45 c8 48 8b 4d c0 e9 49 fb ff ff 41 0f b7 RSP: 0018:ffffb5b48478b570 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb5b48478b628 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffb5b48478b5e8 R13: ffff963a3c609b5e R14: ffff9639c3fbd800 R15: ffffb5b480475a80 FS: 00007fc03b444c80(0000) GS:ffff963a3dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000556f46bdf000 CR3: 0000000006ac6003 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_regs+0x72/0x90 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? __warn+0x8d/0x160 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? report_bug+0x1bb/0x1d0 ? handle_bug+0x46/0x90 ? exc_invalid_op+0x19/0x80 ? asm_exc_invalid_op+0x1b/0x20 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] mlx5_ib_post_send_nodrain+0xb/0x20 [mlx5_ib] ipoib_send+0x2ec/0x770 [ib_ipoib] ipoib_start_xmit+0x5a0/0x770 [ib_ipoib] dev_hard_start_xmit+0x8e/0x1e0 ? validate_xmit_skb_list+0x4d/0x80 sch_direct_xmit+0x116/0x3a0 __dev_xmit_skb+0x1fd/0x580 __dev_queue_xmit+0x284/0x6b0 ? _raw_spin_unlock_irq+0xe/0x50 ? __flush_work.isra.0+0x20d/0x370 ? push_pseudo_header+0x17/0x40 [ib_ipoib] neigh_connected_output+0xcd/0x110 ip_finish_output2+0x179/0x480 ? __smp_call_single_queue+0x61/0xa0 __ip_finish_output+0xc3/0x190 ip_finish_output+0x2e/0xf0 ip_output+0x78/0x110 ? __pfx_ip_finish_output+0x10/0x10 ip_local_out+0x64/0x70 __ip_queue_xmit+0x18a/0x460 ip_queue_xmit+0x15/0x30 __tcp_transmit_skb+0x914/0x9c0 tcp_write_xmit+0x334/0x8d0 tcp_push_one+0x3c/0x60 tcp_sendmsg_locked+0x2e1/0xac0 tcp_sendmsg+0x2d/0x50 inet_sendmsg+0x43/0x90 sock_sendmsg+0x68/0x80 sock_write_iter+0x93/0x100 vfs_write+0x326/0x3c0 ksys_write+0xbd/0xf0 ? do_syscall_64+0x69/0x90 __x64_sys_write+0x19/0x30 do_syscall_64+0x59/0x90 ? do_user_addr_fault+0x1d0/0x640 ? exit_to_user_mode_prepare+0x3b/0xd0 ? irqentry_exit_to_user_mode+0x9/0x20 ? irqentry_exit+0x43/0x50 ? exc_page_fault+0x92/0x1b0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fc03ad14a37 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffdf8697fe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000008024 RCX: 00007fc03ad14a37 RDX: 0000000000008024 RSI: 0000556f46bd8270 RDI: 0000000000000003 RBP: 0000556f46bb1800 R08: 0000000000007fe3 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 0000556f46bc66b0 R14: 000000000000000a R15: 0000556f46bb2f50 ---[ end trace 0000000000000000 ]--- Link: https://lore.kernel.org/r/8228ad34bd1a25047586270f7b1fb4ddcd046282.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/wr.c | 2 +- include/linux/mlx5/qp.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 855f3f4fefadd..737db67a9ce1d 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, */ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, left); - memcpy(eseg->inline_hdr.start, pdata, copysz); + memcpy(eseg->inline_hdr.data, pdata, copysz); stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - sizeof(eseg->inline_hdr.start) + copysz, 16); *size += stride / 16; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4657d5c54abef..ca0eee571ad7b 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; -- GitLab From 6d4c7bd6dc35427b76b16bd69e5552af53c11d5b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Jan 2024 11:29:13 +0200 Subject: [PATCH 0773/2290] RDMA/mlx5: Relax DEVX access upon modify commands [ Upstream commit be551ee1574280ef8afbf7c271212ac3e38933ef ] Relax DEVX access upon modify commands to be UVERBS_ACCESS_READ. The kernel doesn't need to protect what firmware protects, or what causes no damage to anyone but the user. As firmware needs to protect itself from parallel access to the same object, don't block parallel modify/query commands on the same object in the kernel side. This change will allow user space application to run parallel updates to different entries in the same bulk object. Tested-by: Tamar Mashiah Signed-off-by: Yishai Hadas Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/7407d5ed35dc427c1097699e12b49c01e1073406.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/devx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index f8e2baed27a5c..7013ce20549bd 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2951,7 +2951,7 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, UVERBS_IDR_ANY_OBJECT, - UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, -- GitLab From d35d346b5df607bb612fd587561ec8b7917be6e3 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 13 Feb 2024 19:45:40 +0000 Subject: [PATCH 0774/2290] riscv: dts: sifive: add missing #interrupt-cells to pmic [ Upstream commit ce6b6d1513965f500a05f3facf223fa01fd74920 ] At W=2 dtc complains: hifive-unmatched-a00.dts:120.10-238.4: Warning (interrupt_provider): /soc/i2c@10030000/pmic@58: Missing '#interrupt-cells' in interrupt provider Add the missing property. Reviewed-by: Samuel Holland Signed-off-by: Conor Dooley Signed-off-by: Sasha Levin --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 07387f9c135ca..72b87b08ab444 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -123,6 +123,7 @@ interrupt-parent = <&gpio>; interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; -- GitLab From 46c8615de5bf7ee2076750bd3024e5912d7ee4eb Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 2 Feb 2024 18:39:33 +0800 Subject: [PATCH 0775/2290] x86/mm: Move is_vsyscall_vaddr() into asm/vsyscall.h [ Upstream commit ee0e39a63b78849f8abbef268b13e4838569f646 ] Move is_vsyscall_vaddr() into asm/vsyscall.h to make it available for copy_from_kernel_nofault_allowed() in arch/x86/mm/maccess.c. Reviewed-by: Sohil Mehta Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20240202103935.3154011-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- arch/x86/include/asm/vsyscall.h | 10 ++++++++++ arch/x86/mm/fault.c | 9 --------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index ab60a71a8dcb9..472f0263dbc61 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -4,6 +4,7 @@ #include #include +#include #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); @@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code, } #endif +/* + * The (legacy) vsyscall page is the long page in the kernel portion + * of the address space that has user-accessible permissions. + */ +static inline bool is_vsyscall_vaddr(unsigned long vaddr) +{ + return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); +} + #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1dbbad73192a1..f20636510eb1e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -818,15 +818,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, show_opcodes(regs, loglvl); } -/* - * The (legacy) vsyscall page is the long page in the kernel portion - * of the address space that has user-accessible permissions. - */ -static bool is_vsyscall_vaddr(unsigned long vaddr) -{ - return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); -} - static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) -- GitLab From f175de546a3eb77614d94d4c02550181c0a8493e Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 2 Feb 2024 18:39:34 +0800 Subject: [PATCH 0776/2290] x86/mm: Disallow vsyscall page read for copy_from_kernel_nofault() [ Upstream commit 32019c659ecfe1d92e3bf9fcdfbb11a7c70acd58 ] When trying to use copy_from_kernel_nofault() to read vsyscall page through a bpf program, the following oops was reported: BUG: unable to handle page fault for address: ffffffffff600000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 3231067 P4D 3231067 PUD 3233067 PMD 3235067 PTE 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 20390 Comm: test_progs ...... 6.7.0+ #58 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ...... RIP: 0010:copy_from_kernel_nofault+0x6f/0x110 ...... Call Trace: ? copy_from_kernel_nofault+0x6f/0x110 bpf_probe_read_kernel+0x1d/0x50 bpf_prog_2061065e56845f08_do_probe_read+0x51/0x8d trace_call_bpf+0xc5/0x1c0 perf_call_bpf_enter.isra.0+0x69/0xb0 perf_syscall_enter+0x13e/0x200 syscall_trace_enter+0x188/0x1c0 do_syscall_64+0xb5/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 ...... ---[ end trace 0000000000000000 ]--- The oops is triggered when: 1) A bpf program uses bpf_probe_read_kernel() to read from the vsyscall page and invokes copy_from_kernel_nofault() which in turn calls __get_user_asm(). 2) Because the vsyscall page address is not readable from kernel space, a page fault exception is triggered accordingly. 3) handle_page_fault() considers the vsyscall page address as a user space address instead of a kernel space address. This results in the fix-up setup by bpf not being applied and a page_fault_oops() is invoked due to SMAP. Considering handle_page_fault() has already considered the vsyscall page address as a userspace address, fix the problem by disallowing vsyscall page read for copy_from_kernel_nofault(). Originally-by: Thomas Gleixner Reported-by: syzbot+72aa0161922eba61b50e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/CAG48ez06TZft=ATH1qh2c5mpS5BT8UakwNkzi6nvK5_djC-4Nw@mail.gmail.com Reported-by: xingwei lee Closes: https://lore.kernel.org/bpf/CABOYnLynjBoFZOf3Z4BhaZkc5hx_kHfsjiW+UWLoB=w33LvScw@mail.gmail.com Signed-off-by: Hou Tao Reviewed-by: Sohil Mehta Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240202103935.3154011-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- arch/x86/mm/maccess.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index 6993f026adec9..42115ac079cfe 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -3,6 +3,8 @@ #include #include +#include + #ifdef CONFIG_X86_64 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { @@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) if (vaddr < TASK_SIZE_MAX + PAGE_SIZE) return false; + /* + * Reading from the vsyscall page may cause an unhandled fault in + * certain cases. Though it is at an address above TASK_SIZE_MAX, it is + * usually considered as a user space address. + */ + if (is_vsyscall_vaddr(vaddr)) + return false; + /* * Allow everything during early boot before 'x86_virt_bits' * is initialized. Needed for instruction decoding in early -- GitLab From 05896c8ff4ed16ce622169c072046f8b86d8cde7 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 14 Feb 2024 17:32:40 +0100 Subject: [PATCH 0777/2290] net/iucv: fix the allocation size of iucv_path_table array [ Upstream commit b4ea9b6a18ebf7f9f3a7a60f82e925186978cfcf ] iucv_path_table is a dynamically allocated array of pointers to struct iucv_path items. Yet, its size is calculated as if it was an array of struct iucv_path items. Signed-off-by: Alexander Gordeev Reviewed-by: Alexandra Winter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/iucv/iucv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fc3fddeb6f36d..f66b5f74cd83a 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID"; static LIST_HEAD(iucv_handler_list); /* - * iucv_path_table: an array of iucv_path structures. + * iucv_path_table: array of pointers to iucv_path structures. */ static struct iucv_path **iucv_path_table; static unsigned long iucv_max_pathid; @@ -544,7 +544,7 @@ static int iucv_enable(void) cpus_read_lock(); rc = -ENOMEM; - alloc_size = iucv_max_pathid * sizeof(struct iucv_path); + alloc_size = iucv_max_pathid * sizeof(*iucv_path_table); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); if (!iucv_path_table) goto out; -- GitLab From 4492f21263186a84237f3167aed7b0cb455869c5 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Sun, 11 Feb 2024 23:43:14 +0100 Subject: [PATCH 0778/2290] parisc/ftrace: add missing CONFIG_DYNAMIC_FTRACE check [ Upstream commit 250f5402e636a5cec9e0e95df252c3d54307210f ] Fixes a bug revealed by -Wmissing-prototypes when CONFIG_FUNCTION_GRAPH_TRACER is enabled but not CONFIG_DYNAMIC_FTRACE: arch/parisc/kernel/ftrace.c:82:5: error: no previous prototype for 'ftrace_enable_ftrace_graph_caller' [-Werror=missing-prototypes] 82 | int ftrace_enable_ftrace_graph_caller(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/parisc/kernel/ftrace.c:88:5: error: no previous prototype for 'ftrace_disable_ftrace_graph_caller' [-Werror=missing-prototypes] 88 | int ftrace_disable_ftrace_graph_caller(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Max Kellermann Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 4d392e4ed3584..ac7253891d5ed 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -78,7 +78,7 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, #endif } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_FUNCTION_GRAPH_TRACER) int ftrace_enable_ftrace_graph_caller(void) { static_key_enable(&ftrace_graph_enable.key); -- GitLab From 6fb80b3e75b5ab85ca7eeb1e5cba33b5f1d2d0db Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Fri, 16 Feb 2024 15:04:17 -0600 Subject: [PATCH 0779/2290] block: sed-opal: handle empty atoms when parsing response [ Upstream commit 5429c8de56f6b2bd8f537df3a1e04e67b9c04282 ] The SED Opal response parsing function response_parse() does not handle the case of an empty atom in the response. This causes the entry count to be too high and the response fails to be parsed. Recognizing, but ignoring, empty atoms allows response handling to succeed. Signed-off-by: Greg Joyce Link: https://lore.kernel.org/r/20240216210417.3526064-2-gjoyce@linux.ibm.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/opal_proto.h | 1 + block/sed-opal.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/block/opal_proto.h b/block/opal_proto.h index 7152aa1f1a49e..7f306b08a0fe7 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -71,6 +71,7 @@ enum opal_response_token { #define SHORT_ATOM_BYTE 0xBF #define MEDIUM_ATOM_BYTE 0xDF #define LONG_ATOM_BYTE 0xE3 +#define EMPTY_ATOM_BYTE 0xFF #define OPAL_INVAL_PARAM 12 #define OPAL_MANUFACTURED_INACTIVE 0x08 diff --git a/block/sed-opal.c b/block/sed-opal.c index 9bdb833e5817d..25e4ce452c1d3 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -935,16 +935,20 @@ static int response_parse(const u8 *buf, size_t length, token_length = response_parse_medium(iter, pos); else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */ token_length = response_parse_long(iter, pos); + else if (pos[0] == EMPTY_ATOM_BYTE) /* empty atom */ + token_length = 1; else /* TOKEN */ token_length = response_parse_token(iter, pos); if (token_length < 0) return token_length; + if (pos[0] != EMPTY_ATOM_BYTE) + num_entries++; + pos += token_length; total -= token_length; iter++; - num_entries++; } resp->num = num_entries; -- GitLab From b22b54f247f601a0896092c779d4c6bb80038475 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 20 Feb 2024 19:11:51 +0100 Subject: [PATCH 0780/2290] dm-verity, dm-crypt: align "struct bvec_iter" correctly [ Upstream commit 787f1b2800464aa277236a66eb3c279535edd460 ] "struct bvec_iter" is defined with the __packed attribute, so it is aligned on a single byte. On X86 (and on other architectures that support unaligned addresses in hardware), "struct bvec_iter" is accessed using the 8-byte and 4-byte memory instructions, however these instructions are less efficient if they operate on unaligned addresses. (on RISC machines that don't have unaligned access in hardware, GCC generates byte-by-byte accesses that are very inefficient - see [1]) This commit reorders the entries in "struct dm_verity_io" and "struct convert_context", so that "struct bvec_iter" is aligned on 8 bytes. [1] https://lore.kernel.org/all/ZcLuWUNRZadJr0tQ@fedora/T/ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-crypt.c | 4 ++-- drivers/md/dm-verity.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3e215aa85b99a..e8c534b5870ac 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -52,11 +52,11 @@ struct convert_context { struct completion restart; struct bio *bio_in; - struct bio *bio_out; struct bvec_iter iter_in; + struct bio *bio_out; struct bvec_iter iter_out; - u64 cc_sector; atomic_t cc_pending; + u64 cc_sector; union { struct skcipher_request *req; struct aead_request *req_aead; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 4620a98c99561..db93a91169d5e 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -80,12 +80,12 @@ struct dm_verity_io { /* original value of bio->bi_end_io */ bio_end_io_t *orig_bi_end_io; + struct bvec_iter iter; + sector_t block; unsigned int n_blocks; bool in_tasklet; - struct bvec_iter iter; - struct work_struct work; char *recheck_buffer; -- GitLab From 53983d354bd7abf0605a85e28e1325a82d35d455 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Feb 2024 13:34:27 -0600 Subject: [PATCH 0781/2290] arm64: dts: Fix dtc interrupt_provider warnings [ Upstream commit 91adecf911e5df78ea3e8f866e69db2c33416a5c ] The dtc interrupt_provider warning is off by default. Fix all the warnings so it can be enabled. Signed-off-by: Rob Herring Reviewed-By: AngeloGioacchino Del Regno # Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Acked-by: Florian Fainelli #Broadcom Acked-by: Chanho Min Link: https://lore.kernel.org/r/20240213-arm-dt-cleanups-v1-3-f2dee1292525@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amazon/alpine-v2.dtsi | 1 - arch/arm64/boot/dts/amazon/alpine-v3.dtsi | 1 - arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi | 1 + arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi | 1 + arch/arm64/boot/dts/lg/lg1312.dtsi | 1 - arch/arm64/boot/dts/lg/lg1313.dtsi | 1 - arch/arm64/boot/dts/marvell/armada-ap80x.dtsi | 1 - arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 1 + arch/arm64/boot/dts/renesas/ulcb-kf.dtsi | 4 ++++ 9 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi index 4eb2cd14e00b0..9b6da84deae7a 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi @@ -145,7 +145,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <160>; al,msi-num-spis = <160>; diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi index 73a352ea8fd5c..b30014d4dc29c 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi @@ -351,7 +351,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <336>; al,msi-num-spis = <959>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index fda97c47f4e97..d5778417455c0 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -584,6 +584,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 8f8c25e51194d..473d7d0ddf369 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -442,6 +442,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; gpio-ranges = <&pinmux 0 0 16>, <&pinmux 16 71 2>, diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 78ae73d0cf365..98ff17b14b2a5 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -124,7 +124,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi index 2173316573bee..8e9410d8f46c0 100644 --- a/arch/arm64/boot/dts/lg/lg1313.dtsi +++ b/arch/arm64/boot/dts/lg/lg1313.dtsi @@ -124,7 +124,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi index a06a0a889c43f..73d8803b54d8b 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi @@ -133,7 +133,6 @@ odmi: odmi@300000 { compatible = "marvell,odmi-controller"; - interrupt-controller; msi-controller; marvell,odmi-frames = <4>; reg = <0x300000 0x4000>, diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index 5117b2e7985af..998c2e78168a6 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -111,6 +111,7 @@ compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; + #interrupt-cells = <1>; interrupts-extended = <&pio 101 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi index 588b14b66b6fb..f37abfc13fe59 100644 --- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi @@ -251,6 +251,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <8 IRQ_TYPE_EDGE_FALLING>; @@ -311,6 +312,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <4 IRQ_TYPE_EDGE_FALLING>; }; @@ -331,6 +333,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio7>; interrupts = <3 IRQ_TYPE_EDGE_FALLING>; }; @@ -341,6 +344,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio5>; interrupts = <9 IRQ_TYPE_EDGE_FALLING>; }; -- GitLab From 995e91c9556c8fc6028b474145a36e947d1eb6b6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Feb 2024 19:41:23 +0000 Subject: [PATCH 0782/2290] btrfs: fix data races when accessing the reserved amount of block reserves [ Upstream commit e06cc89475eddc1f3a7a4d471524256152c68166 ] At space_info.c we have several places where we access the ->reserved field of a block reserve without taking the block reserve's spinlock first, which makes KCSAN warn about a data race since that field is always updated while holding the spinlock. The reports from KCSAN are like the following: [117.193526] BUG: KCSAN: data-race in btrfs_block_rsv_release [btrfs] / need_preemptive_reclaim [btrfs] [117.195148] read to 0x000000017f587190 of 8 bytes by task 6303 on cpu 3: [117.195172] need_preemptive_reclaim+0x222/0x2f0 [btrfs] [117.195992] __reserve_bytes+0xbb0/0xdc8 [btrfs] [117.196807] btrfs_reserve_metadata_bytes+0x4c/0x120 [btrfs] [117.197620] btrfs_block_rsv_add+0x78/0xa8 [btrfs] [117.198434] btrfs_delayed_update_inode+0x154/0x368 [btrfs] [117.199300] btrfs_update_inode+0x108/0x1c8 [btrfs] [117.200122] btrfs_dirty_inode+0xb4/0x140 [btrfs] [117.200937] btrfs_update_time+0x8c/0xb0 [btrfs] [117.201754] touch_atime+0x16c/0x1e0 [117.201789] filemap_read+0x674/0x728 [117.201823] btrfs_file_read_iter+0xf8/0x410 [btrfs] [117.202653] vfs_read+0x2b6/0x498 [117.203454] ksys_read+0xa2/0x150 [117.203473] __s390x_sys_read+0x68/0x88 [117.203495] do_syscall+0x1c6/0x210 [117.203517] __do_syscall+0xc8/0xf0 [117.203539] system_call+0x70/0x98 [117.203579] write to 0x000000017f587190 of 8 bytes by task 11 on cpu 0: [117.203604] btrfs_block_rsv_release+0x2e8/0x578 [btrfs] [117.204432] btrfs_delayed_inode_release_metadata+0x7c/0x1d0 [btrfs] [117.205259] __btrfs_update_delayed_inode+0x37c/0x5e0 [btrfs] [117.206093] btrfs_async_run_delayed_root+0x356/0x498 [btrfs] [117.206917] btrfs_work_helper+0x160/0x7a0 [btrfs] [117.207738] process_one_work+0x3b6/0x838 [117.207768] worker_thread+0x75e/0xb10 [117.207797] kthread+0x21a/0x230 [117.207830] __ret_from_fork+0x6c/0xb8 [117.207861] ret_from_fork+0xa/0x30 So add a helper to get the reserved amount of a block reserve while holding the lock. The value may be not be up to date anymore when used by need_preemptive_reclaim() and btrfs_preempt_reclaim_metadata_space(), but that's ok since the worst it can do is cause more reclaim work do be done sooner rather than later. Reading the field while holding the lock instead of using the data_race() annotation is used in order to prevent load tearing. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/block-rsv.h | 16 ++++++++++++++++ fs/btrfs/space-info.c | 26 +++++++++++++------------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index 578c3497a455c..cda79d3e0c263 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -101,4 +101,20 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv) return data_race(rsv->full); } +/* + * Get the reserved mount of a block reserve in a context where getting a stale + * value is acceptable, instead of accessing it directly and trigger data race + * warning from KCSAN. + */ +static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->reserved; + spin_unlock(&rsv->lock); + + return ret; +} + #endif /* BTRFS_BLOCK_RSV_H */ diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 2635fb4bffa06..8b75f436a9a3c 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -847,7 +847,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info) { - u64 global_rsv_size = fs_info->global_block_rsv.reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv); u64 ordered, delalloc; u64 total = writable_total_bytes(fs_info, space_info); u64 thresh; @@ -948,8 +948,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1; delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes); if (ordered >= delalloc) - used += fs_info->delayed_refs_rsv.reserved + - fs_info->delayed_block_rsv.reserved; + used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) + + btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv); else used += space_info->bytes_may_use - global_rsv_size; @@ -1164,7 +1164,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) enum btrfs_flush_state flush; u64 delalloc_size = 0; u64 to_reclaim, block_rsv_size; - u64 global_rsv_size = global_rsv->reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv); loops++; @@ -1176,9 +1176,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) * assume it's tied up in delalloc reservations. */ block_rsv_size = global_rsv_size + - delayed_block_rsv->reserved + - delayed_refs_rsv->reserved + - trans_rsv->reserved; + btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv) + + btrfs_block_rsv_reserved(trans_rsv); if (block_rsv_size < space_info->bytes_may_use) delalloc_size = space_info->bytes_may_use - block_rsv_size; @@ -1198,16 +1198,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) to_reclaim = delalloc_size; flush = FLUSH_DELALLOC; } else if (space_info->bytes_pinned > - (delayed_block_rsv->reserved + - delayed_refs_rsv->reserved)) { + (btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv))) { to_reclaim = space_info->bytes_pinned; flush = COMMIT_TRANS; - } else if (delayed_block_rsv->reserved > - delayed_refs_rsv->reserved) { - to_reclaim = delayed_block_rsv->reserved; + } else if (btrfs_block_rsv_reserved(delayed_block_rsv) > + btrfs_block_rsv_reserved(delayed_refs_rsv)) { + to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv); flush = FLUSH_DELAYED_ITEMS_NR; } else { - to_reclaim = delayed_refs_rsv->reserved; + to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv); flush = FLUSH_DELAYED_REFS_NR; } -- GitLab From ab1be3f1aa7799f99155488c28eacaef65eb68fb Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Feb 2024 20:10:07 +0000 Subject: [PATCH 0783/2290] btrfs: fix data race at btrfs_use_block_rsv() when accessing block reserve [ Upstream commit c7bb26b847e5b97814f522686068c5628e2b3646 ] At btrfs_use_block_rsv() we read the size of a block reserve without locking its spinlock, which makes KCSAN complain because the size of a block reserve is always updated while holding its spinlock. The report from KCSAN is the following: [653.313148] BUG: KCSAN: data-race in btrfs_update_delayed_refs_rsv [btrfs] / btrfs_use_block_rsv [btrfs] [653.314755] read to 0x000000017f5871b8 of 8 bytes by task 7519 on cpu 0: [653.314779] btrfs_use_block_rsv+0xe4/0x2f8 [btrfs] [653.315606] btrfs_alloc_tree_block+0xdc/0x998 [btrfs] [653.316421] btrfs_force_cow_block+0x220/0xe38 [btrfs] [653.317242] btrfs_cow_block+0x1ac/0x568 [btrfs] [653.318060] btrfs_search_slot+0xda2/0x19b8 [btrfs] [653.318879] btrfs_del_csums+0x1dc/0x798 [btrfs] [653.319702] __btrfs_free_extent.isra.0+0xc24/0x2028 [btrfs] [653.320538] __btrfs_run_delayed_refs+0xd3c/0x2390 [btrfs] [653.321340] btrfs_run_delayed_refs+0xae/0x290 [btrfs] [653.322140] flush_space+0x5e4/0x718 [btrfs] [653.322958] btrfs_preempt_reclaim_metadata_space+0x102/0x2f8 [btrfs] [653.323781] process_one_work+0x3b6/0x838 [653.323800] worker_thread+0x75e/0xb10 [653.323817] kthread+0x21a/0x230 [653.323836] __ret_from_fork+0x6c/0xb8 [653.323855] ret_from_fork+0xa/0x30 [653.323887] write to 0x000000017f5871b8 of 8 bytes by task 576 on cpu 3: [653.323906] btrfs_update_delayed_refs_rsv+0x1a4/0x250 [btrfs] [653.324699] btrfs_add_delayed_data_ref+0x468/0x6d8 [btrfs] [653.325494] btrfs_free_extent+0x76/0x120 [btrfs] [653.326280] __btrfs_mod_ref+0x6a8/0x6b8 [btrfs] [653.327064] btrfs_dec_ref+0x50/0x70 [btrfs] [653.327849] walk_up_proc+0x236/0xa50 [btrfs] [653.328633] walk_up_tree+0x21c/0x448 [btrfs] [653.329418] btrfs_drop_snapshot+0x802/0x1328 [btrfs] [653.330205] btrfs_clean_one_deleted_snapshot+0x184/0x238 [btrfs] [653.330995] cleaner_kthread+0x2b0/0x2f0 [btrfs] [653.331781] kthread+0x21a/0x230 [653.331800] __ret_from_fork+0x6c/0xb8 [653.331818] ret_from_fork+0xa/0x30 So add a helper to get the size of a block reserve while holding the lock. Reading the field while holding the lock instead of using the data_race() annotation is used in order to prevent load tearing. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/block-rsv.c | 2 +- fs/btrfs/block-rsv.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 507b44d18572d..4cbf386166209 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -512,7 +512,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); - if (unlikely(block_rsv->size == 0)) + if (unlikely(btrfs_block_rsv_size(block_rsv) == 0)) goto try_reserve; again: ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index cda79d3e0c263..df87c4949d065 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -117,4 +117,20 @@ static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv) return ret; } +/* + * Get the size of a block reserve in a context where getting a stale value is + * acceptable, instead of accessing it directly and trigger data race warning + * from KCSAN. + */ +static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->size; + spin_unlock(&rsv->lock); + + return ret; +} + #endif /* BTRFS_BLOCK_RSV_H */ -- GitLab From 80656ee2baa7d7b86834c6de2ce8ca19e403edfc Mon Sep 17 00:00:00 2001 From: Andre Werner Date: Mon, 19 Feb 2024 06:33:32 +0100 Subject: [PATCH 0784/2290] net: smsc95xx: add support for SYS TEC USB-SPEmodule1 [ Upstream commit 45532b21dc2a692444b6ad5f71c253cca53e8103 ] This patch adds support for the SYS TEC USB-SPEmodule1 10Base-T1L ethernet device to the existing smsc95xx driver by adding the new USB VID/PID pair. Signed-off-by: Andre Werner Link: https://lore.kernel.org/r/20240219053413.4732-1-andre.werner@systec-electronic.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/smsc95xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index a530f20ee2575..2fa46baa589e5 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -2104,6 +2104,11 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0424, 0x9E08), .driver_info = (unsigned long) &smsc95xx_info, }, + { + /* SYSTEC USB-SPEmodule1 10BASE-T1L Ethernet Device */ + USB_DEVICE(0x0878, 0x1400), + .driver_info = (unsigned long)&smsc95xx_info, + }, { /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */ USB_DEVICE(0x184F, 0x0051), -- GitLab From 50fbd3a7210f932ae6b35a31a005217f5b26677c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 21 Feb 2024 15:05:35 +0100 Subject: [PATCH 0785/2290] wifi: mac80211: only call drv_sta_rc_update for uploaded stations [ Upstream commit 413dafc8170fcb925fb17af8842f06af305f8e0b ] When a station has not been uploaded yet, receiving SMPS or channel width notification action frames can lead to rate_control_rate_update calling drv_sta_rc_update with uninitialized driver private data. Fix this by adding a missing check for sta->uploaded. Signed-off-by: Felix Fietkau Link: https://msgid.link/20240221140535.16102-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d5ea5f5bcf3a0..9d33fd2377c88 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local, rcu_read_unlock(); } - drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); + if (sta->uploaded) + drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) -- GitLab From 008985fd35a7aa284177f69856206fcf0d0bd129 Mon Sep 17 00:00:00 2001 From: Johnny Hsieh Date: Mon, 26 Feb 2024 21:44:50 +0800 Subject: [PATCH 0786/2290] ASoC: amd: yc: Add Lenovo ThinkBook 21J0 into DMI quirk table [ Upstream commit 50ee641643dd0f46702e9a99354398196e1734c2 ] This patch adds Lenovo 21J0 (ThinkBook 16 G5+ ARP) to the DMI quirks table to enable internal microphone array. Cc: linux-sound@vger.kernel.org Signed-off-by: Johnny Hsieh Link: https://msgid.link/r/TYSPR04MB8429D62DFDB6727866ECF1DEC55A2@TYSPR04MB8429.apcprd04.prod.outlook.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 5921af7fd92c5..0568e64d10150 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,6 +199,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J0"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From b0e50fa810e805fb48bbabfc38096194eb5591f7 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Wed, 21 Feb 2024 12:47:24 +0530 Subject: [PATCH 0787/2290] scsi: mpt3sas: Prevent sending diag_reset when the controller is ready [ Upstream commit ee0017c3ed8a8abfa4d40e42f908fb38c31e7515 ] If the driver detects that the controller is not ready before sending the first IOC facts command, it will wait for a maximum of 10 seconds for it to become ready. However, even if the controller becomes ready within 10 seconds, the driver will still issue a diagnostic reset. Modify the driver to avoid sending a diag reset if the controller becomes ready within the 10-second wait time. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240221071724.14986-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpt3sas/mpt3sas_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 809be43f440dc..8e6ac08e553bb 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7398,7 +7398,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout) return -EFAULT; } - issue_diag_reset: + return 0; + +issue_diag_reset: rc = _base_diag_reset(ioc); return rc; } -- GitLab From 5e61a994b2280f0dfc1ff2465d763460fd944bc1 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 23 Feb 2024 14:54:34 +0800 Subject: [PATCH 0788/2290] ALSA: hda/realtek - ALC285 reduce pop noise from Headphone port [ Upstream commit b34bf65838f7c6e785f62681605a538b73c2808c ] It had pop noise from Headphone port when system reboot state. If NID 58h Index 0x0 to fill default value, it will reduce pop noise. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/7493e207919a4fb3a0599324fd010e3e@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 75bd7b2fa4ee6..ede3f8b273d79 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3681,6 +3681,7 @@ static void alc285_hp_init(struct hda_codec *codec) int i, val; int coef38, coef0d, coef36; + alc_write_coefex_idx(codec, 0x58, 0x00, 0x1888); /* write default value */ alc_update_coef_idx(codec, 0x4a, 1<<15, 1<<15); /* Reset HP JD */ coef38 = alc_read_coef_idx(codec, 0x38); /* Amp control */ coef0d = alc_read_coef_idx(codec, 0x0d); /* Digital Misc control */ -- GitLab From 05c7c2d198a0143ef660f98eac2fc5086e644551 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 22 Feb 2024 20:56:59 +0800 Subject: [PATCH 0789/2290] drm/amdgpu: Enable gpu reset for S3 abort cases on Raven series [ Upstream commit c671ec01311b4744b377f98b0b4c6d033fe569b3 ] Currently, GPU resets can now be performed successfully on the Raven series. While GPU reset is required for the S3 suspend abort case. So now can enable gpu reset for S3 abort cases on the Raven series. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc15.c | 45 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 489c89465c78b..c373a2a3248eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -584,11 +584,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1285,24 +1308,6 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } -static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) -{ - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. - */ - if (adev->flags & AMD_IS_APU && adev->in_s3 && - !adev->suspend_complete && - sol_reg) - return true; - - return false; -} - static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; -- GitLab From 0e3732d1df3550ac9fc0fbf4d1acbd2901514605 Mon Sep 17 00:00:00 2001 From: Jiawei Wang Date: Wed, 28 Feb 2024 15:39:14 +0800 Subject: [PATCH 0790/2290] ASoC: amd: yc: Fix non-functional mic on Lenovo 21J2 [ Upstream commit ed00a6945dc32462c2d3744a3518d2316da66fcc ] Like many other models, the Lenovo 21J2 (ThinkBook 16 G5+ APO) needs a quirk entry for the internal microphone to function. Signed-off-by: Jiawei Wang Link: https://msgid.link/r/20240228073914.232204-2-me@jwang.link Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 0568e64d10150..e0f406b6646ba 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,6 +199,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J2"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From 567c0411dc3b424fc7bd1e6109726d7ba32d4f73 Mon Sep 17 00:00:00 2001 From: Yuxuan Hu <20373622@buaa.edu.cn> Date: Wed, 3 Jan 2024 17:10:43 +0800 Subject: [PATCH 0791/2290] Bluetooth: rfcomm: Fix null-ptr-deref in rfcomm_check_security [ Upstream commit 2535b848fa0f42ddff3e5255cf5e742c9b77bb26 ] During our fuzz testing of the connection and disconnection process at the RFCOMM layer, we discovered this bug. By comparing the packets from a normal connection and disconnection process with the testcase that triggered a KASAN report. We analyzed the cause of this bug as follows: 1. In the packets captured during a normal connection, the host sends a `Read Encryption Key Size` type of `HCI_CMD` packet (Command Opcode: 0x1408) to the controller to inquire the length of encryption key.After receiving this packet, the controller immediately replies with a Command Completepacket (Event Code: 0x0e) to return the Encryption Key Size. 2. In our fuzz test case, the timing of the controller's response to this packet was delayed to an unexpected point: after the RFCOMM and L2CAP layers had disconnected but before the HCI layer had disconnected. 3. After receiving the Encryption Key Size Response at the time described in point 2, the host still called the rfcomm_check_security function. However, by this time `struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn;` had already been released, and when the function executed `return hci_conn_security(conn->hcon, d->sec_level, auth_type, d->out);`, specifically when accessing `conn->hcon`, a null-ptr-deref error occurred. To fix this bug, check if `sk->sk_state` is BT_CLOSED before calling rfcomm_recv_frame in rfcomm_process_rx. Signed-off-by: Yuxuan Hu <20373622@buaa.edu.cn> Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/rfcomm/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 8d6fce9005bdd..4f54c7df3a94f 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1937,7 +1937,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - if (!skb_linearize(skb)) { + if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) { s = rfcomm_recv_frame(s, skb); if (!s) break; -- GitLab From 2f3ce8fcbf52c4f686a3762e20982ce068dace54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Mon, 22 Jan 2024 17:59:55 +0100 Subject: [PATCH 0792/2290] Bluetooth: mgmt: Fix limited discoverable off timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0bd1fb586235224048c726922db048d1bce6354a ] LIMITED_DISCOVERABLE flag is not reset from Class of Device and advertisement on limited discoverable timeout. This prevents to pass PTS test GAP/DISC/LIMM/BV-02-C Calling set_discoverable_sync as when the limited discovery is set correctly update the Class of Device and advertisement. Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6d631a2e60166..ab63f807e3c80 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1039,6 +1039,8 @@ static void rpa_expired(struct work_struct *work) hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL); } +static int set_discoverable_sync(struct hci_dev *hdev, void *data); + static void discov_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1057,7 +1059,7 @@ static void discov_off(struct work_struct *work) hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hdev->discov_timeout = 0; - hci_update_discoverable(hdev); + hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL); mgmt_new_settings(hdev); -- GitLab From 72dbf660e01a24a4205df2cffffb4e2e6ea5321e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 29 Feb 2024 22:17:37 +0900 Subject: [PATCH 0793/2290] firewire: core: use long bus reset on gap count error [ Upstream commit d0b06dc48fb15902d7da09c5c0861e7f042a9381 ] When resetting the bus after a gap count error, use a long rather than short bus reset. IEEE 1394-1995 uses only long bus resets. IEEE 1394a adds the option of short bus resets. When video or audio transmission is in progress and a device is hot-plugged elsewhere on the bus, the resulting bus reset can cause video frame drops or audio dropouts. Short bus resets reduce or eliminate this problem. Accordingly, short bus resets are almost always preferred. However, on a mixed 1394/1394a bus, a short bus reset can trigger an immediate additional bus reset. This double bus reset can be interpreted differently by different nodes on the bus, resulting in an inconsistent gap count after the bus reset. An inconsistent gap count will cause another bus reset, leading to a neverending bus reset loop. This only happens for some bus topologies, not for all mixed 1394/1394a buses. By instead sending a long bus reset after a gap count inconsistency, we avoid the doubled bus reset, restoring the bus to normal operation. Signed-off-by: Adam Goldman Link: https://sourceforge.net/p/linux1394/mailman/message/58741624/ Signed-off-by: Takashi Sakamoto Signed-off-by: Sasha Levin --- drivers/firewire/core-card.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 8aaa7fcb2630d..401a77e3b5fa8 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -500,7 +500,19 @@ static void bm_work(struct work_struct *work) fw_notice(card, "phy config: new root=%x, gap_count=%d\n", new_root_id, gap_count); fw_send_phy_config(card, new_root_id, generation, gap_count); - reset_bus(card, true); + /* + * Where possible, use a short bus reset to minimize + * disruption to isochronous transfers. But in the event + * of a gap count inconsistency, use a long bus reset. + * + * As noted in 1394a 8.4.6.2, nodes on a mixed 1394/1394a bus + * may set different gap counts after a bus reset. On a mixed + * 1394/1394a bus, a short bus reset can get doubled. Some + * nodes may treat the double reset as one bus reset and others + * may treat it as two, causing a gap count inconsistency + * again. Using a long bus reset prevents this. + */ + reset_bus(card, card->gap_count != 0); /* Will allocate broadcast channel after the reset. */ goto out; } -- GitLab From 9a4fb2bdee2f72b1f33b5bf96b4a51e5692878b8 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 2 Feb 2024 11:08:12 +0100 Subject: [PATCH 0794/2290] arm64: tegra: Set the correct PHY mode for MGBE [ Upstream commit 4c892121d43bc2b45896ca207b54f39a8fa6b852 ] The PHY is configured in 10GBASE-R, so make sure to reflect that in DT. Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts index f094011be9ed9..8099dc04ed2e1 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts @@ -2024,7 +2024,7 @@ status = "okay"; phy-handle = <&mgbe0_phy>; - phy-mode = "usxgmii"; + phy-mode = "10gbase-r"; mdio { #address-cells = <1>; -- GitLab From 374709a7e541ca3e910f93e6ff24fe819923fc63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alban=20Boy=C3=A9?= Date: Wed, 28 Feb 2024 19:28:41 +0000 Subject: [PATCH 0795/2290] ASoC: Intel: bytcr_rt5640: Add an extra entry for the Chuwi Vi8 tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f8b0127aca8c60826e7354e504a12d4a46b1c3bb ] The bios version can differ depending if it is a dual-boot variant of the tablet. Therefore another DMI match is required. Signed-off-by: Alban Boyé Reviewed-by: Cezary Rojewski Acked-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240228192807.15130-1-alban.boye@protonmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/bytcr_rt5640.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 797d0a48d6066..094445036c20f 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -685,6 +685,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { /* Chuwi Vi8 dual-boot (CWI506) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "i86"), + /* The above are too generic, also match BIOS info */ + DMI_MATCH(DMI_BIOS_VERSION, "CHUWI2.D86JHBNR02"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* Chuwi Vi10 (CWI505) */ .matches = { -- GitLab From d575eb8747d6f748805187a49a614a152021728c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 5 Mar 2024 11:10:42 +0100 Subject: [PATCH 0796/2290] Input: gpio_keys_polled - suppress deferred probe error for gpio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 963465a33141d0d52338e77f80fe543d2c9dc053 ] On a PC Engines APU our admins are faced with: $ dmesg | grep -c "gpio-keys-polled gpio-keys-polled: unable to claim gpio 0, err=-517" 261 Such a message always appears when e.g. a new USB device is plugged in. Suppress this message which considerably clutters the kernel log for EPROBE_DEFER (i.e. -517). Signed-off-by: Uwe Kleine-König Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240305101042.10953-2-u.kleine-koenig@pengutronix.de Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/gpio_keys_polled.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c index c3937d2fc7446..a0f9978c68f55 100644 --- a/drivers/input/keyboard/gpio_keys_polled.c +++ b/drivers/input/keyboard/gpio_keys_polled.c @@ -319,12 +319,10 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) error = devm_gpio_request_one(dev, button->gpio, flags, button->desc ? : DRV_NAME); - if (error) { - dev_err(dev, - "unable to claim gpio %u, err=%d\n", - button->gpio, error); - return error; - } + if (error) + return dev_err_probe(dev, error, + "unable to claim gpio %u\n", + button->gpio); bdata->gpiod = gpio_to_desc(button->gpio); if (!bdata->gpiod) { -- GitLab From f11b50b8baff81f46d21477c88e5af5912bdb847 Mon Sep 17 00:00:00 2001 From: Stuart Henderson Date: Wed, 6 Mar 2024 16:14:35 +0000 Subject: [PATCH 0797/2290] ASoC: wm8962: Enable oscillator if selecting WM8962_FLL_OSC [ Upstream commit 03c7874106ca5032a312626b927b1c35f07b1f35 ] Signed-off-by: Stuart Henderson Link: https://msgid.link/r/20240306161439.1385643-1-stuarth@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b901e4c65e8a5..35d8cd4fc71a7 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2914,8 +2914,12 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s switch (fll_id) { case WM8962_FLL_MCLK: case WM8962_FLL_BCLK: + fll1 |= (fll_id - 1) << WM8962_FLL_REFCLK_SRC_SHIFT; + break; case WM8962_FLL_OSC: fll1 |= (fll_id - 1) << WM8962_FLL_REFCLK_SRC_SHIFT; + snd_soc_component_update_bits(component, WM8962_PLL2, + WM8962_OSC_ENA, WM8962_OSC_ENA); break; case WM8962_FLL_INT: snd_soc_component_update_bits(component, WM8962_FLL_CONTROL_1, -- GitLab From 0a5d59ed95f1c4bbef208863c14783b1f42b7a8e Mon Sep 17 00:00:00 2001 From: Stuart Henderson Date: Wed, 6 Mar 2024 16:14:36 +0000 Subject: [PATCH 0798/2290] ASoC: wm8962: Enable both SPKOUTR_ENA and SPKOUTL_ENA in mono mode [ Upstream commit 6fa849e4d78b880e878138bf238e4fd2bac3c4fa ] Signed-off-by: Stuart Henderson Link: https://msgid.link/r/20240306161439.1385643-2-stuarth@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 35d8cd4fc71a7..806b69c9b2e36 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2229,6 +2229,9 @@ SND_SOC_DAPM_PGA_E("HPOUT", SND_SOC_NOPM, 0, 0, NULL, 0, hp_event, SND_SOC_DAPM_OUTPUT("HPOUTL"), SND_SOC_DAPM_OUTPUT("HPOUTR"), + +SND_SOC_DAPM_PGA("SPKOUTL Output", WM8962_CLASS_D_CONTROL_1, 6, 0, NULL, 0), +SND_SOC_DAPM_PGA("SPKOUTR Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), }; static const struct snd_soc_dapm_widget wm8962_dapm_spk_mono_widgets[] = { @@ -2236,7 +2239,6 @@ SND_SOC_DAPM_MIXER("Speaker Mixer", WM8962_MIXER_ENABLES, 1, 0, spkmixl, ARRAY_SIZE(spkmixl)), SND_SOC_DAPM_MUX_E("Speaker PGA", WM8962_PWR_MGMT_2, 4, 0, &spkoutl_mux, out_pga_event, SND_SOC_DAPM_POST_PMU), -SND_SOC_DAPM_PGA("Speaker Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), SND_SOC_DAPM_OUTPUT("SPKOUT"), }; @@ -2251,9 +2253,6 @@ SND_SOC_DAPM_MUX_E("SPKOUTL PGA", WM8962_PWR_MGMT_2, 4, 0, &spkoutl_mux, SND_SOC_DAPM_MUX_E("SPKOUTR PGA", WM8962_PWR_MGMT_2, 3, 0, &spkoutr_mux, out_pga_event, SND_SOC_DAPM_POST_PMU), -SND_SOC_DAPM_PGA("SPKOUTR Output", WM8962_CLASS_D_CONTROL_1, 7, 0, NULL, 0), -SND_SOC_DAPM_PGA("SPKOUTL Output", WM8962_CLASS_D_CONTROL_1, 6, 0, NULL, 0), - SND_SOC_DAPM_OUTPUT("SPKOUTL"), SND_SOC_DAPM_OUTPUT("SPKOUTR"), }; @@ -2366,12 +2365,18 @@ static const struct snd_soc_dapm_route wm8962_spk_mono_intercon[] = { { "Speaker PGA", "Mixer", "Speaker Mixer" }, { "Speaker PGA", "DAC", "DACL" }, - { "Speaker Output", NULL, "Speaker PGA" }, - { "Speaker Output", NULL, "SYSCLK" }, - { "Speaker Output", NULL, "TOCLK" }, - { "Speaker Output", NULL, "TEMP_SPK" }, + { "SPKOUTL Output", NULL, "Speaker PGA" }, + { "SPKOUTL Output", NULL, "SYSCLK" }, + { "SPKOUTL Output", NULL, "TOCLK" }, + { "SPKOUTL Output", NULL, "TEMP_SPK" }, + + { "SPKOUTR Output", NULL, "Speaker PGA" }, + { "SPKOUTR Output", NULL, "SYSCLK" }, + { "SPKOUTR Output", NULL, "TOCLK" }, + { "SPKOUTR Output", NULL, "TEMP_SPK" }, - { "SPKOUT", NULL, "Speaker Output" }, + { "SPKOUT", NULL, "SPKOUTL Output" }, + { "SPKOUT", NULL, "SPKOUTR Output" }, }; static const struct snd_soc_dapm_route wm8962_spk_stereo_intercon[] = { -- GitLab From 807f991396648830274b644d53c9cef67e519983 Mon Sep 17 00:00:00 2001 From: Stuart Henderson Date: Wed, 6 Mar 2024 16:14:39 +0000 Subject: [PATCH 0799/2290] ASoC: wm8962: Fix up incorrect error message in wm8962_set_fll [ Upstream commit 96e202f8c52ac49452f83317cf3b34cd1ad81e18 ] Use source instead of ret, which seems to be unrelated and will always be zero. Signed-off-by: Stuart Henderson Link: https://msgid.link/r/20240306161439.1385643-5-stuarth@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 806b69c9b2e36..d215e58c4a7b3 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2933,7 +2933,7 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s WM8962_FLL_FRC_NCO, WM8962_FLL_FRC_NCO); break; default: - dev_err(component->dev, "Unknown FLL source %d\n", ret); + dev_err(component->dev, "Unknown FLL source %d\n", source); return -EINVAL; } -- GitLab From e6450d5e46a737a008b4885aa223486113bf0ad6 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 19 Jan 2024 07:39:06 -0800 Subject: [PATCH 0800/2290] do_sys_name_to_handle(): use kzalloc() to fix kernel-infoleak [ Upstream commit 3948abaa4e2be938ccdfc289385a27342fb13d43 ] syzbot identified a kernel information leak vulnerability in do_sys_name_to_handle() and issued the following report [1]. [1] "BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xbc/0x100 lib/usercopy.c:40 instrument_copy_to_user include/linux/instrumented.h:114 [inline] _copy_to_user+0xbc/0x100 lib/usercopy.c:40 copy_to_user include/linux/uaccess.h:191 [inline] do_sys_name_to_handle fs/fhandle.c:73 [inline] __do_sys_name_to_handle_at fs/fhandle.c:112 [inline] __se_sys_name_to_handle_at+0x949/0xb10 fs/fhandle.c:94 __x64_sys_name_to_handle_at+0xe4/0x140 fs/fhandle.c:94 ... Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] __kmem_cache_alloc_node+0x5c9/0x970 mm/slub.c:3517 __do_kmalloc_node mm/slab_common.c:1006 [inline] __kmalloc+0x121/0x3c0 mm/slab_common.c:1020 kmalloc include/linux/slab.h:604 [inline] do_sys_name_to_handle fs/fhandle.c:39 [inline] __do_sys_name_to_handle_at fs/fhandle.c:112 [inline] __se_sys_name_to_handle_at+0x441/0xb10 fs/fhandle.c:94 __x64_sys_name_to_handle_at+0xe4/0x140 fs/fhandle.c:94 ... Bytes 18-19 of 20 are uninitialized Memory access of size 20 starts at ffff888128a46380 Data copied to user address 0000000020000240" Per Chuck Lever's suggestion, use kzalloc() instead of kmalloc() to solve the problem. Fixes: 990d6c2d7aee ("vfs: Add name to file handle conversion support") Suggested-by: Chuck Lever III Reported-and-tested-by: Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20240119153906.4367-1-n.zhandarovich@fintech.ru Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/fhandle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index f2bc27d1975e1..a8c25557c8c12 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -37,7 +37,7 @@ static long do_sys_name_to_handle(const struct path *path, if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; - handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, GFP_KERNEL); if (!handle) return -ENOMEM; -- GitLab From 7533ed7668bc7296a5ad84e61cdf907aa8eb8fec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 2 Feb 2024 12:39:20 -0800 Subject: [PATCH 0801/2290] fs: Fix rw_hint validation [ Upstream commit ec16b147a55bfa14e858234eb7b1a7c8e7cd5021 ] Reject values that are valid rw_hints after truncation but not before truncation by passing an untruncated value to rw_hint_valid(). Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Cc: Jeff Layton Cc: Chuck Lever Cc: Jens Axboe Cc: Stephen Rothwell Fixes: 5657cb0797c4 ("fs/fcntl: use copy_to/from_user() for u64 types") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240202203926.2478590-2-bvanassche@acm.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/fcntl.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 146c9ab0cd4b7..0964e5dbf0cac 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -267,7 +267,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) } #endif -static bool rw_hint_valid(enum rw_hint hint) +static bool rw_hint_valid(u64 hint) { switch (hint) { case RWH_WRITE_LIFE_NOT_SET: @@ -287,19 +287,17 @@ static long fcntl_rw_hint(struct file *file, unsigned int cmd, { struct inode *inode = file_inode(file); u64 __user *argp = (u64 __user *)arg; - enum rw_hint hint; - u64 h; + u64 hint; switch (cmd) { case F_GET_RW_HINT: - h = inode->i_write_hint; - if (copy_to_user(argp, &h, sizeof(*argp))) + hint = inode->i_write_hint; + if (copy_to_user(argp, &hint, sizeof(*argp))) return -EFAULT; return 0; case F_SET_RW_HINT: - if (copy_from_user(&h, argp, sizeof(h))) + if (copy_from_user(&hint, argp, sizeof(hint))) return -EFAULT; - hint = (enum rw_hint) h; if (!rw_hint_valid(hint)) return -EINVAL; -- GitLab From ba0e1cc43e09167f3e516b426d5648d2903b0afe Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 5 Apr 2023 16:20:12 +0200 Subject: [PATCH 0802/2290] s390/dasd: add autoquiesce feature [ Upstream commit 1cee2975bbabd89df1097c354867192106b058ea ] Add the internal logic to check for autoquiesce triggers and handle them. Quiesce and resume are functions that tell Linux to stop/resume issuing I/Os to a specific DASD. The DASD driver allows a manual quiesce/resume via ioctl. Autoquiesce will define an amount of triggers that will lead to an automatic quiesce if a certain event occurs. There is no automatic resume. All events will be reported via DASD Extended Error Reporting (EER) if configured. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Reviewed-by: Halil Pasic Link: https://lore.kernel.org/r/20230405142017.2446986-3-sth@linux.ibm.com Signed-off-by: Jens Axboe Stable-dep-of: c3116e62ddef ("s390/dasd: fix double module refcount decrement") Signed-off-by: Sasha Levin --- arch/s390/include/uapi/asm/dasd.h | 2 ++ drivers/s390/block/dasd.c | 60 ++++++++++++++++++++++--------- drivers/s390/block/dasd_eer.c | 1 + drivers/s390/block/dasd_int.h | 2 ++ 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 93d1ccd3304c7..9c49c3d67cd56 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -78,6 +78,7 @@ typedef struct dasd_information2_t { * 0x040: give access to raw eckd data * 0x080: enable discard support * 0x100: enable autodisable for IFCC errors (default) + * 0x200: enable requeue of all requests on autoquiesce */ #define DASD_FEATURE_READONLY 0x001 #define DASD_FEATURE_USEDIAG 0x002 @@ -88,6 +89,7 @@ typedef struct dasd_information2_t { #define DASD_FEATURE_USERAW 0x040 #define DASD_FEATURE_DISCARD 0x080 #define DASD_FEATURE_PATH_AUTODISABLE 0x100 +#define DASD_FEATURE_REQUEUEQUIESCE 0x200 #define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE #define DASD_PARTN_BITS 2 diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index f207de4a87a0f..2f6976671496f 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -73,7 +73,8 @@ static void dasd_profile_init(struct dasd_profile *, struct dentry *); static void dasd_profile_exit(struct dasd_profile *); static void dasd_hosts_init(struct dentry *, struct dasd_device *); static void dasd_hosts_exit(struct dasd_device *); - +static int dasd_handle_autoquiesce(struct dasd_device *, struct dasd_ccw_req *, + unsigned int); /* * SECTION: Operations on the device structure. */ @@ -2327,7 +2328,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) /* Non-temporary stop condition will trigger fail fast */ if (device->stopped & ~DASD_STOPPED_PENDING && test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!dasd_eer_enabled(device))) { + !dasd_eer_enabled(device) && device->aq_mask == 0) { cqr->status = DASD_CQR_FAILED; cqr->intrc = -ENOLINK; continue; @@ -2803,20 +2804,18 @@ restart: dasd_log_sense(cqr, &cqr->irb); } - /* First of all call extended error reporting. */ - if (dasd_eer_enabled(base) && - cqr->status == DASD_CQR_FAILED) { - dasd_eer_write(base, cqr, DASD_EER_FATALERROR); - - /* restart request */ + /* + * First call extended error reporting and check for autoquiesce + */ + spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); + if (cqr->status == DASD_CQR_FAILED && + dasd_handle_autoquiesce(base, cqr, DASD_EER_FATALERROR)) { cqr->status = DASD_CQR_FILLED; cqr->retries = 255; - spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); - spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), - flags); + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); goto restart; } + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); /* Process finished ERP request. */ if (cqr->refers) { @@ -2858,7 +2857,7 @@ static void __dasd_block_start_head(struct dasd_block *block) /* Non-temporary stop condition will trigger fail fast */ if (block->base->stopped & ~DASD_STOPPED_PENDING && test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!dasd_eer_enabled(block->base))) { + !dasd_eer_enabled(block->base) && block->base->aq_mask == 0) { cqr->status = DASD_CQR_FAILED; cqr->intrc = -ENOLINK; dasd_schedule_block_bh(block); @@ -3682,8 +3681,8 @@ int dasd_generic_last_path_gone(struct dasd_device *device) dev_warn(&device->cdev->dev, "No operational channel path is left " "for the device\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last path gone"); - /* First of all call extended error reporting. */ - dasd_eer_write(device, NULL, DASD_EER_NOPATH); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH); if (device->state < DASD_STATE_BASIC) return 0; @@ -3815,7 +3814,8 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event) "No verified channel paths remain for the device\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last verified path gone"); - dasd_eer_write(device, NULL, DASD_EER_NOPATH); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH); dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT); } @@ -3837,7 +3837,8 @@ EXPORT_SYMBOL_GPL(dasd_generic_verify_path); void dasd_generic_space_exhaust(struct dasd_device *device, struct dasd_ccw_req *cqr) { - dasd_eer_write(device, NULL, DASD_EER_NOSPC); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOSPC); if (device->state < DASD_STATE_BASIC) return; @@ -3931,6 +3932,31 @@ void dasd_schedule_requeue(struct dasd_device *device) } EXPORT_SYMBOL(dasd_schedule_requeue); +static int dasd_handle_autoquiesce(struct dasd_device *device, + struct dasd_ccw_req *cqr, + unsigned int reason) +{ + /* in any case write eer message with reason */ + if (dasd_eer_enabled(device)) + dasd_eer_write(device, cqr, reason); + + if (!test_bit(reason, &device->aq_mask)) + return 0; + + /* notify eer about autoquiesce */ + if (dasd_eer_enabled(device)) + dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE); + + pr_info("%s: The DASD has been put in the quiesce state\n", + dev_name(&device->cdev->dev)); + dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE); + + if (device->features & DASD_FEATURE_REQUEUEQUIESCE) + dasd_schedule_requeue(device); + + return 1; +} + static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, int rdc_buffer_size, int magic) diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index d4d31cd11d261..d16c699b9ac6d 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -387,6 +387,7 @@ void dasd_eer_write(struct dasd_device *device, struct dasd_ccw_req *cqr, break; case DASD_EER_NOPATH: case DASD_EER_NOSPC: + case DASD_EER_AUTOQUIESCE: dasd_eer_write_standard_trigger(device, NULL, id); break; case DASD_EER_STATECHANGE: diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index f50932518f83a..00bcd177264ac 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -464,6 +464,7 @@ extern struct dasd_discipline *dasd_diag_discipline_pointer; #define DASD_EER_STATECHANGE 3 #define DASD_EER_PPRCSUSPEND 4 #define DASD_EER_NOSPC 5 +#define DASD_EER_AUTOQUIESCE 31 /* DASD path handling */ @@ -641,6 +642,7 @@ struct dasd_device { struct dasd_format_entry format_entry; struct kset *paths_info; struct dasd_copy_relation *copy; + unsigned long aq_mask; }; struct dasd_block { -- GitLab From 977bb962a116fc8f2d5b633ba355cbab4f48fbf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Thu, 8 Feb 2024 17:42:48 +0100 Subject: [PATCH 0803/2290] s390/dasd: Use dev_*() for device log messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 79ae56fc475869d636071f66d9e4ef2a3819eee6 ] All log messages in dasd.c use the printk variants of pr_*(). They all add the name of the affected device manually to the log message. This can be simplified by using the dev_*() variants of printk, which include the device information and make a separate call to dev_name() unnecessary. The KMSG_COMPONENT and the pr_fmt() definition can be dropped. Note that this removes the "dasd: " prefix from the one pr_info() call in dasd_init(). However, the log message already provides all relevant information. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-10-sth@linux.ibm.com Signed-off-by: Jens Axboe Stable-dep-of: c3116e62ddef ("s390/dasd: fix double module refcount decrement") Signed-off-by: Sasha Levin --- drivers/s390/block/dasd.c | 50 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 2f6976671496f..029bb9e15ad90 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -8,9 +8,6 @@ * Copyright IBM Corp. 1999, 2009 */ -#define KMSG_COMPONENT "dasd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - #include #include #include @@ -3390,8 +3387,7 @@ static void dasd_generic_auto_online(void *data, async_cookie_t cookie) ret = ccw_device_set_online(cdev); if (ret) - pr_warn("%s: Setting the DASD online failed with rc=%d\n", - dev_name(&cdev->dev), ret); + dev_warn(&cdev->dev, "Setting the DASD online failed with rc=%d\n", ret); } /* @@ -3478,8 +3474,11 @@ int dasd_generic_set_online(struct ccw_device *cdev, { struct dasd_discipline *discipline; struct dasd_device *device; + struct device *dev; int rc; + dev = &cdev->dev; + /* first online clears initial online feature flag */ dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0); device = dasd_create_device(cdev); @@ -3492,11 +3491,10 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* Try to load the required module. */ rc = request_module(DASD_DIAG_MOD); if (rc) { - pr_warn("%s Setting the DASD online failed " - "because the required module %s " - "could not be loaded (rc=%d)\n", - dev_name(&cdev->dev), DASD_DIAG_MOD, - rc); + dev_warn(dev, "Setting the DASD online failed " + "because the required module %s " + "could not be loaded (rc=%d)\n", + DASD_DIAG_MOD, rc); dasd_delete_device(device); return -ENODEV; } @@ -3504,8 +3502,7 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* Module init could have failed, so check again here after * request_module(). */ if (!dasd_diag_discipline_pointer) { - pr_warn("%s Setting the DASD online failed because of missing DIAG discipline\n", - dev_name(&cdev->dev)); + dev_warn(dev, "Setting the DASD online failed because of missing DIAG discipline\n"); dasd_delete_device(device); return -ENODEV; } @@ -3526,8 +3523,8 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* check_device will allocate block device if necessary */ rc = discipline->check_device(device); if (rc) { - pr_warn("%s Setting the DASD online with discipline %s failed with rc=%i\n", - dev_name(&cdev->dev), discipline->name, rc); + dev_warn(dev, "Setting the DASD online with discipline %s failed with rc=%i\n", + discipline->name, rc); module_put(discipline->owner); module_put(base_discipline->owner); dasd_delete_device(device); @@ -3536,16 +3533,15 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_set_target_state(device, DASD_STATE_ONLINE); if (device->state <= DASD_STATE_KNOWN) { - pr_warn("%s Setting the DASD online failed because of a missing discipline\n", - dev_name(&cdev->dev)); + dev_warn(dev, "Setting the DASD online failed because of a missing discipline\n"); rc = -ENODEV; dasd_set_target_state(device, DASD_STATE_NEW); if (device->block) dasd_free_block(device->block); dasd_delete_device(device); - } else - pr_debug("dasd_generic device %s found\n", - dev_name(&cdev->dev)); + } else { + dev_dbg(dev, "dasd_generic device found\n"); + } wait_event(dasd_init_waitq, _wait_for_device(device)); @@ -3556,10 +3552,13 @@ EXPORT_SYMBOL_GPL(dasd_generic_set_online); int dasd_generic_set_offline(struct ccw_device *cdev) { + int max_count, open_count, rc; struct dasd_device *device; struct dasd_block *block; - int max_count, open_count, rc; unsigned long flags; + struct device *dev; + + dev = &cdev->dev; rc = 0; spin_lock_irqsave(get_ccwdev_lock(cdev), flags); @@ -3580,11 +3579,10 @@ int dasd_generic_set_offline(struct ccw_device *cdev) open_count = atomic_read(&device->block->open_count); if (open_count > max_count) { if (open_count > 0) - pr_warn("%s: The DASD cannot be set offline with open count %i\n", - dev_name(&cdev->dev), open_count); + dev_warn(dev, "The DASD cannot be set offline with open count %i\n", + open_count); else - pr_warn("%s: The DASD cannot be set offline while it is in use\n", - dev_name(&cdev->dev)); + dev_warn(dev, "The DASD cannot be set offline while it is in use\n"); rc = -EBUSY; goto out_err; } @@ -3947,8 +3945,8 @@ static int dasd_handle_autoquiesce(struct dasd_device *device, if (dasd_eer_enabled(device)) dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE); - pr_info("%s: The DASD has been put in the quiesce state\n", - dev_name(&device->cdev->dev)); + dev_info(&device->cdev->dev, + "The DASD has been put in the quiesce state\n"); dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE); if (device->features & DASD_FEATURE_REQUEUEQUIESCE) -- GitLab From ad999aa18103fa038787b6a8a55020abcf34df1a Mon Sep 17 00:00:00 2001 From: Miroslav Franc Date: Fri, 9 Feb 2024 13:45:22 +0100 Subject: [PATCH 0804/2290] s390/dasd: fix double module refcount decrement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c3116e62ddeff79cae342147753ce596f01fcf06 ] Once the discipline is associated with the device, deleting the device takes care of decrementing the module's refcount. Doing it manually on this error path causes refcount to artificially decrease on each error while it should just stay the same. Fixes: c020d722b110 ("s390/dasd: fix panic during offline processing") Signed-off-by: Miroslav Franc Signed-off-by: Jan Höppner Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240209124522.3697827-3-sth@linux.ibm.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/s390/block/dasd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 029bb9e15ad90..341d65acd715d 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3512,12 +3512,11 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_delete_device(device); return -EINVAL; } + device->base_discipline = base_discipline; if (!try_module_get(discipline->owner)) { - module_put(base_discipline->owner); dasd_delete_device(device); return -EINVAL; } - device->base_discipline = base_discipline; device->discipline = discipline; /* check_device will allocate block device if necessary */ @@ -3525,8 +3524,6 @@ int dasd_generic_set_online(struct ccw_device *cdev, if (rc) { dev_warn(dev, "Setting the DASD online with discipline %s failed with rc=%i\n", discipline->name, rc); - module_put(discipline->owner); - module_put(base_discipline->owner); dasd_delete_device(device); return rc; } -- GitLab From f8b89a36721f0ae80e2d3321368607f9e00c74ad Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:15 +0100 Subject: [PATCH 0805/2290] rcu/exp: Fix RCU expedited parallel grace period kworker allocation failure recovery [ Upstream commit a636c5e6f8fc34be520277e69c7c6ee1d4fc1d17 ] Under CONFIG_RCU_EXP_KTHREAD=y, the nodes initialization for expedited grace periods is queued to a kworker. However if the allocation of that kworker failed, the nodes initialization is performed synchronously by the caller instead. Now the check for kworker initialization failure relies on the kworker pointer to be NULL while its value might actually encapsulate an allocation failure error. Make sure to handle this case. Reviewed-by: Kalesh Singh Fixes: 9621fbee44df ("rcu: Move expedited grace period (GP) work to RT kthread_worker") Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng Signed-off-by: Sasha Levin --- kernel/rcu/tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9d7464a90f85d..c879ed0c55079 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4471,6 +4471,7 @@ static void __init rcu_start_exp_gp_kworkers(void) rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { pr_err("Failed to create %s!\n", par_gp_kworker_name); + rcu_exp_par_gp_kworker = NULL; kthread_destroy_worker(rcu_exp_gp_kworker); return; } -- GitLab From 267a6af60863490b54658fc239b3e3be5a0b6f92 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:16 +0100 Subject: [PATCH 0806/2290] rcu/exp: Handle RCU expedited grace period kworker allocation failure [ Upstream commit e7539ffc9a770f36bacedcf0fbfb4bf2f244f4a5 ] Just like is done for the kworker performing nodes initialization, gracefully handle the possible allocation failure of the RCU expedited grace period main kworker. While at it perform a rename of the related checking functions to better reflect the expedited specifics. Reviewed-by: Kalesh Singh Fixes: 9621fbee44df ("rcu: Move expedited grace period (GP) work to RT kthread_worker") Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng Signed-off-by: Sasha Levin --- kernel/rcu/tree.c | 2 ++ kernel/rcu/tree_exp.h | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c879ed0c55079..61f9503a5fe9c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4465,6 +4465,7 @@ static void __init rcu_start_exp_gp_kworkers(void) rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { pr_err("Failed to create %s!\n", gp_kworker_name); + rcu_exp_gp_kworker = NULL; return; } @@ -4473,6 +4474,7 @@ static void __init rcu_start_exp_gp_kworkers(void) pr_err("Failed to create %s!\n", par_gp_kworker_name); rcu_exp_par_gp_kworker = NULL; kthread_destroy_worker(rcu_exp_gp_kworker); + rcu_exp_gp_kworker = NULL; return; } diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6d2cbed96b462..75e8d9652f7bb 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -427,7 +427,12 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) __sync_rcu_exp_select_node_cpus(rewp); } -static inline bool rcu_gp_par_worker_started(void) +static inline bool rcu_exp_worker_started(void) +{ + return !!READ_ONCE(rcu_exp_gp_kworker); +} + +static inline bool rcu_exp_par_worker_started(void) { return !!READ_ONCE(rcu_exp_par_gp_kworker); } @@ -477,7 +482,12 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) __sync_rcu_exp_select_node_cpus(rewp); } -static inline bool rcu_gp_par_worker_started(void) +static inline bool rcu_exp_worker_started(void) +{ + return !!READ_ONCE(rcu_gp_wq); +} + +static inline bool rcu_exp_par_worker_started(void) { return !!READ_ONCE(rcu_par_gp_wq); } @@ -540,7 +550,7 @@ static void sync_rcu_exp_select_cpus(void) rnp->exp_need_flush = false; if (!READ_ONCE(rnp->expmask)) continue; /* Avoid early boot non-existent wq. */ - if (!rcu_gp_par_worker_started() || + if (!rcu_exp_par_worker_started() || rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_is_last_leaf_node(rnp)) { /* No worker started yet or last leaf, do direct call. */ @@ -910,7 +920,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) */ void synchronize_rcu_expedited(void) { - bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT); + bool use_worker; unsigned long flags; struct rcu_exp_work rew; struct rcu_node *rnp; @@ -921,6 +931,9 @@ void synchronize_rcu_expedited(void) lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu_expedited() in RCU read-side critical section"); + use_worker = (rcu_scheduler_active != RCU_SCHEDULER_INIT) && + rcu_exp_worker_started(); + /* Is the state is such that the call is a grace period? */ if (rcu_blocking_is_gp()) { // Note well that this code runs with !PREEMPT && !SMP. @@ -950,7 +963,7 @@ void synchronize_rcu_expedited(void) return; /* Someone else did our work for us. */ /* Ensure that load happens before action based on it. */ - if (unlikely(boottime)) { + if (unlikely(!use_worker)) { /* Direct call during scheduler init and early_initcalls(). */ rcu_exp_sel_wait_wake(s); } else { @@ -968,7 +981,7 @@ void synchronize_rcu_expedited(void) /* Let the next expedited grace period start. */ mutex_unlock(&rcu_state.exp_mutex); - if (likely(!boottime)) + if (likely(use_worker)) synchronize_rcu_expedited_destroy_work(&rew); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- GitLab From 96436365e5d80d0106ea785a4f80a58e7c9edff8 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 17 Feb 2024 20:25:38 -0800 Subject: [PATCH 0807/2290] nbd: null check for nla_nest_start [ Upstream commit 31edf4bbe0ba27fd03ac7d87eb2ee3d2a231af6d ] nla_nest_start() may fail and return NULL. Insert a check and set errno based on other call sites within the same source code. Signed-off-by: Navid Emamdoost Reviewed-by: Michal Kubecek Fixes: 47d902b90a32 ("nbd: add a status netlink command") Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20240218042534.it.206-kees@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9a53165de4cef..5c4be8dda253c 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2408,6 +2408,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) } dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); + if (!dev_list) { + nlmsg_free(reply); + ret = -EMSGSIZE; + goto out; + } + if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) { -- GitLab From 283e38fc7dcefc6cc2272fdaddfd9f65b1cc9a0f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Feb 2024 21:23:34 +0100 Subject: [PATCH 0808/2290] fs/select: rework stack allocation hack for clang [ Upstream commit ddb9fd7a544088ed70eccbb9f85e9cc9952131c1 ] A while ago, we changed the way that select() and poll() preallocate a temporary buffer just under the size of the static warning limit of 1024 bytes, as clang was frequently going slightly above that limit. The warnings have recently returned and I took another look. As it turns out, clang is not actually inherently worse at reserving stack space, it just happens to inline do_select() into core_sys_select(), while gcc never inlines it. Annotate do_select() to never be inlined and in turn remove the special case for the allocation size. This should give the same behavior for both clang and gcc all the time and once more avoids those warnings. Fixes: ad312f95d41c ("fs/select: avoid clang stack usage warning") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240216202352.2492798-1-arnd@kernel.org Reviewed-by: Kees Cook Reviewed-by: Andi Kleen Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/select.c | 2 +- include/linux/poll.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/select.c b/fs/select.c index 0ee55af1a55c2..d4d881d439dcd 100644 --- a/fs/select.c +++ b/fs/select.c @@ -476,7 +476,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in, wait->_key |= POLLOUT_SET; } -static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) +static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) { ktime_t expire, *to = NULL; struct poll_wqueues table; diff --git a/include/linux/poll.h b/include/linux/poll.h index a9e0e1c2d1f2f..d1ea4f3714a84 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -14,11 +14,7 @@ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ -#ifdef __clang__ -#define MAX_STACK_ALLOC 768 -#else #define MAX_STACK_ALLOC 832 -#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC -- GitLab From d6c28aefe9b46583767b706dbf53e9bcf7552b72 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:40 +0800 Subject: [PATCH 0809/2290] md: Don't clear MD_CLOSING when the raid is about to stop [ Upstream commit 9674f54e41fffaf06f6a60202e1fa4cc13de3cf5 ] The raid should not be opened anymore when it is about to be stopped. However, other processes can open it again if the flag MD_CLOSING is cleared before exiting. From now on, this flag will not be cleared when the raid will be stopped. Fixes: 065e519e71b2 ("md: MD_CLOSING needs to be cleared after called md_set_readonly or do_md_stop") Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-6-linan666@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1c87f3e708094..788acc81e7a84 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6243,7 +6243,15 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - mddev->flags = 0; + /* + * Don't clear MD_CLOSING, or mddev can be opened again. + * 'hold_active != 0' means mddev is still in the creation + * process and will be used later. + */ + if (mddev->hold_active) + mddev->flags = 0; + else + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); mddev->sb_flags = 0; mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; @@ -7571,7 +7579,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, int err = 0; void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; - bool did_set_md_closing = false; if (!md_ioctl_valid(cmd)) return -ENOTTY; @@ -7658,7 +7665,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } - did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7821,7 +7827,7 @@ unlock: mddev->hold_active = 0; mddev_unlock(mddev); out: - if(did_set_md_closing) + if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY)) clear_bit(MD_CLOSING, &mddev->flags); return err; } -- GitLab From eaf5eaa4064da1c69c2eb5fb500df5e19b4bd564 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:15 +0800 Subject: [PATCH 0810/2290] lib/cmdline: Fix an invalid format specifier in an assertion msg [ Upstream commit d2733a026fc7247ba42d7a8e1b737cf14bf1df21 ] The correct format specifier for p - n (both p and n are pointers) is %td, as the type should be ptrdiff_t. This was discovered by annotating KUnit assertion macros with gcc's printf specifier, but note that gcc incorrectly suggested a %d or %ld specifier (depending on the pointer size of the architecture being built). Fixes: 0ea09083116d ("lib/cmdline: Allow get_options() to take 0 to validate the input") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Daniel Latypov Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- lib/cmdline_kunit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c index d4572dbc91453..705b82736be08 100644 --- a/lib/cmdline_kunit.c +++ b/lib/cmdline_kunit.c @@ -124,7 +124,7 @@ static void cmdline_do_one_range_test(struct kunit *test, const char *in, n, e[0], r[0]); p = memchr_inv(&r[1], 0, sizeof(r) - sizeof(r[0])); - KUNIT_EXPECT_PTR_EQ_MSG(test, p, NULL, "in test %u at %u out of bound", n, p - r); + KUNIT_EXPECT_PTR_EQ_MSG(test, p, NULL, "in test %u at %td out of bound", n, p - r); } static void cmdline_test_range(struct kunit *test) -- GitLab From 188e9aff68875f757ee1577daa30a046dcd5667b Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:16 +0800 Subject: [PATCH 0811/2290] lib: memcpy_kunit: Fix an invalid format specifier in an assertion msg [ Upstream commit 0a549ed22c3c7cc6da5c5f5918efd019944489a5 ] The 'i' passed as an assertion message is a size_t, so should use '%zu', not '%d'. This was found by annotating the _MSG() variants of KUnit's assertions to let gcc validate the format strings. Fixes: bb95ebbe89a7 ("lib: Introduce CONFIG_MEMCPY_KUNIT_TEST") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Justin Stitt Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- lib/memcpy_kunit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c index 2b5cc70ac53fc..dbedd99aa6163 100644 --- a/lib/memcpy_kunit.c +++ b/lib/memcpy_kunit.c @@ -32,7 +32,7 @@ struct some_bytes { BUILD_BUG_ON(sizeof(instance.data) != 32); \ for (size_t i = 0; i < sizeof(instance.data); i++) { \ KUNIT_ASSERT_EQ_MSG(test, instance.data[i], v, \ - "line %d: '%s' not initialized to 0x%02x @ %d (saw 0x%02x)\n", \ + "line %d: '%s' not initialized to 0x%02x @ %zu (saw 0x%02x)\n", \ __LINE__, #instance, v, i, instance.data[i]); \ } \ } while (0) @@ -41,7 +41,7 @@ struct some_bytes { BUILD_BUG_ON(sizeof(one) != sizeof(two)); \ for (size_t i = 0; i < sizeof(one); i++) { \ KUNIT_EXPECT_EQ_MSG(test, one.data[i], two.data[i], \ - "line %d: %s.data[%d] (0x%02x) != %s.data[%d] (0x%02x)\n", \ + "line %d: %s.data[%zu] (0x%02x) != %s.data[%zu] (0x%02x)\n", \ __LINE__, #one, i, one.data[i], #two, i, two.data[i]); \ } \ kunit_info(test, "ok: " TEST_OP "() " name "\n"); \ -- GitLab From d12ffa03085e580b12a67068472134c980cd2e47 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:17 +0800 Subject: [PATCH 0812/2290] time: test: Fix incorrect format specifier [ Upstream commit 133e267ef4a26d19c93996a874714e9f3f8c70aa ] 'days' is a s64 (from div_s64), and so should use a %lld specifier. This was found by extending KUnit's assertion macros to use gcc's __printf attribute. Fixes: 276010551664 ("time: Improve performance of time64_to_tm()") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Justin Stitt Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- kernel/time/time_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c index 831e8e779acef..f7c3de01197c9 100644 --- a/kernel/time/time_test.c +++ b/kernel/time/time_test.c @@ -73,7 +73,7 @@ static void time64_to_tm_test_date_range(struct kunit *test) days = div_s64(secs, 86400); - #define FAIL_MSG "%05ld/%02d/%02d (%2d) : %ld", \ + #define FAIL_MSG "%05ld/%02d/%02d (%2d) : %lld", \ year, month, mdday, yday, days KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); -- GitLab From 28e7fd1c70a203303203d1246135bcedc45b4d6c Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:18 +0800 Subject: [PATCH 0813/2290] rtc: test: Fix invalid format specifier. [ Upstream commit 8a904a3caa88118744062e872ae90f37748a8fd8 ] 'days' is a s64 (from div_s64), and so should use a %lld specifier. This was found by extending KUnit's assertion macros to use gcc's __printf attribute. Fixes: 1d1bb12a8b18 ("rtc: Improve performance of rtc_time64_to_tm(). Add tests.") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Justin Stitt Acked-by: Alexandre Belloni Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- drivers/rtc/lib_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/lib_test.c index d5caf36c56cdc..225c859d6da55 100644 --- a/drivers/rtc/lib_test.c +++ b/drivers/rtc/lib_test.c @@ -54,7 +54,7 @@ static void rtc_time64_to_tm_test_date_range(struct kunit *test) days = div_s64(secs, 86400); - #define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \ + #define FAIL_MSG "%d/%02d/%02d (%2d) : %lld", \ year, month, mday, yday, days KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); -- GitLab From a34fba8c44ac11a354535f93f60b4db4706ca2be Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 Feb 2024 14:16:47 -0700 Subject: [PATCH 0814/2290] io_uring/net: unify how recvmsg and sendmsg copy in the msghdr [ Upstream commit 52307ac4f2b507f60bae6df5be938d35e199c688 ] For recvmsg, we roll our own since we support buffer selections. This isn't the case for sendmsg right now, but in preparation for doing so, make the recvmsg copy helpers generic so we can call them from the sendmsg side as well. Signed-off-by: Jens Axboe Stable-dep-of: 8ede3db5061b ("io_uring/net: fix overflow check in io_recvmsg_mshot_prep()") Signed-off-by: Sasha Levin --- io_uring/net.c | 271 ++++++++++++++++++++++++++----------------------- 1 file changed, 142 insertions(+), 129 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index c062ce66af12c..c770961079749 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -183,16 +183,150 @@ static int io_setup_async_msg(struct io_kiocb *req, return -EAGAIN; } +static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) +{ + int hdr; + + if (iomsg->namelen < 0) + return true; + if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), + iomsg->namelen, &hdr)) + return true; + if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) + return true; + + return false; +} + +#ifdef CONFIG_COMPAT +static int __io_compat_msg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + struct sockaddr __user **addr, int ddir) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct compat_msghdr msg; + struct compat_iovec __user *uiov; + int ret; + + if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) + return -EFAULT; + + ret = __get_compat_msghdr(&iomsg->msg, &msg, addr); + if (ret) + return ret; + + uiov = compat_ptr(msg.msg_iov); + if (req->flags & REQ_F_BUFFER_SELECT) { + compat_ssize_t clen; + + iomsg->free_iov = NULL; + if (msg.msg_iovlen == 0) { + sr->len = 0; + } else if (msg.msg_iovlen > 1) { + return -EINVAL; + } else { + if (!access_ok(uiov, sizeof(*uiov))) + return -EFAULT; + if (__get_user(clen, &uiov->iov_len)) + return -EFAULT; + if (clen < 0) + return -EINVAL; + sr->len = clen; + } + + if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { + iomsg->namelen = msg.msg_namelen; + iomsg->controllen = msg.msg_controllen; + if (io_recvmsg_multishot_overflow(iomsg)) + return -EOVERFLOW; + } + + return 0; + } + + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg.msg_iovlen, + UIO_FASTIOV, &iomsg->free_iov, + &iomsg->msg.msg_iter, true); + if (unlikely(ret < 0)) + return ret; + + return 0; +} +#endif + +static int __io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, + struct sockaddr __user **addr, int ddir) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct user_msghdr msg; + int ret; + + if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) + return -EFAULT; + + ret = __copy_msghdr(&iomsg->msg, &msg, addr); + if (ret) + return ret; + + if (req->flags & REQ_F_BUFFER_SELECT) { + if (msg.msg_iovlen == 0) { + sr->len = iomsg->fast_iov[0].iov_len = 0; + iomsg->fast_iov[0].iov_base = NULL; + iomsg->free_iov = NULL; + } else if (msg.msg_iovlen > 1) { + return -EINVAL; + } else { + if (copy_from_user(iomsg->fast_iov, msg.msg_iov, + sizeof(*msg.msg_iov))) + return -EFAULT; + sr->len = iomsg->fast_iov[0].iov_len; + iomsg->free_iov = NULL; + } + + if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { + iomsg->namelen = msg.msg_namelen; + iomsg->controllen = msg.msg_controllen; + if (io_recvmsg_multishot_overflow(iomsg)) + return -EOVERFLOW; + } + + return 0; + } + + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(ddir, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, + &iomsg->free_iov, &iomsg->msg.msg_iter, false); + if (unlikely(ret < 0)) + return ret; + + return 0; +} + +static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, + struct sockaddr __user **addr, int ddir) +{ + iomsg->msg.msg_name = &iomsg->addr; + iomsg->msg.msg_iter.nr_segs = 0; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + return __io_compat_msg_copy_hdr(req, iomsg, addr, ddir); +#endif + + return __io_msg_copy_hdr(req, iomsg, addr, ddir); +} + static int io_sendmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); int ret; - iomsg->msg.msg_name = &iomsg->addr; - iomsg->free_iov = iomsg->fast_iov; - ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, - &iomsg->free_iov); + ret = io_msg_copy_hdr(req, iomsg, NULL, ITER_SOURCE); + if (ret) + return ret; + /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = iomsg->msg.msg_control_user; return ret; @@ -415,142 +549,21 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) -{ - int hdr; - - if (iomsg->namelen < 0) - return true; - if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), - iomsg->namelen, &hdr)) - return true; - if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) - return true; - - return false; -} - -static int __io_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct user_msghdr msg; - int ret; - - if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) - return -EFAULT; - - ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); - if (ret) - return ret; - - if (req->flags & REQ_F_BUFFER_SELECT) { - if (msg.msg_iovlen == 0) { - sr->len = iomsg->fast_iov[0].iov_len = 0; - iomsg->fast_iov[0].iov_base = NULL; - iomsg->free_iov = NULL; - } else if (msg.msg_iovlen > 1) { - return -EINVAL; - } else { - if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) - return -EFAULT; - sr->len = iomsg->fast_iov[0].iov_len; - iomsg->free_iov = NULL; - } - - if (req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, - &iomsg->free_iov, &iomsg->msg.msg_iter, - false); - if (ret > 0) - ret = 0; - } - - return ret; -} - -#ifdef CONFIG_COMPAT -static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct compat_msghdr msg; - struct compat_iovec __user *uiov; - int ret; - - if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) - return -EFAULT; - - ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); - if (ret) - return ret; - - uiov = compat_ptr(msg.msg_iov); - if (req->flags & REQ_F_BUFFER_SELECT) { - compat_ssize_t clen; - - iomsg->free_iov = NULL; - if (msg.msg_iovlen == 0) { - sr->len = 0; - } else if (msg.msg_iovlen > 1) { - return -EINVAL; - } else { - if (!access_ok(uiov, sizeof(*uiov))) - return -EFAULT; - if (__get_user(clen, &uiov->iov_len)) - return -EFAULT; - if (clen < 0) - return -EINVAL; - sr->len = clen; - } - - if (req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen, - UIO_FASTIOV, &iomsg->free_iov, - &iomsg->msg.msg_iter, true); - if (ret < 0) - return ret; - } - - return 0; -} -#endif - static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { - iomsg->msg.msg_name = &iomsg->addr; - iomsg->msg.msg_iter.nr_segs = 0; - -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - return __io_compat_recvmsg_copy_hdr(req, iomsg); -#endif - - return __io_recvmsg_copy_hdr(req, iomsg); + return io_msg_copy_hdr(req, iomsg, &iomsg->uaddr, ITER_DEST); } int io_recvmsg_prep_async(struct io_kiocb *req) { + struct io_async_msghdr *iomsg; int ret; if (!io_msg_alloc_async_prep(req)) return -ENOMEM; - ret = io_recvmsg_copy_hdr(req, req->async_data); + iomsg = req->async_data; + ret = io_recvmsg_copy_hdr(req, iomsg); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; -- GitLab From aba7b2140e82196caae71d07a1397d2d5afbffed Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Feb 2024 11:09:20 -0700 Subject: [PATCH 0815/2290] io_uring/net: move receive multishot out of the generic msghdr path [ Upstream commit c55978024d123d43808ab393a0a4ce3ce8568150 ] Move the actual user_msghdr / compat_msghdr into the send and receive sides, respectively, so we can move the uaddr receive handling into its own handler, and ditto the multishot with buffer selection logic. Signed-off-by: Jens Axboe Stable-dep-of: 8ede3db5061b ("io_uring/net: fix overflow check in io_recvmsg_mshot_prep()") Signed-off-by: Sasha Levin --- io_uring/net.c | 161 ++++++++++++++++++++++++++++--------------------- 1 file changed, 91 insertions(+), 70 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index c770961079749..b273914ed99f0 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -183,46 +183,26 @@ static int io_setup_async_msg(struct io_kiocb *req, return -EAGAIN; } -static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) -{ - int hdr; - - if (iomsg->namelen < 0) - return true; - if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), - iomsg->namelen, &hdr)) - return true; - if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) - return true; - - return false; -} - #ifdef CONFIG_COMPAT -static int __io_compat_msg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg, - struct sockaddr __user **addr, int ddir) +static int io_compat_msg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + struct compat_msghdr *msg, int ddir) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct compat_msghdr msg; struct compat_iovec __user *uiov; int ret; - if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) + if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) return -EFAULT; - ret = __get_compat_msghdr(&iomsg->msg, &msg, addr); - if (ret) - return ret; - - uiov = compat_ptr(msg.msg_iov); + uiov = compat_ptr(msg->msg_iov); if (req->flags & REQ_F_BUFFER_SELECT) { compat_ssize_t clen; iomsg->free_iov = NULL; - if (msg.msg_iovlen == 0) { + if (msg->msg_iovlen == 0) { sr->len = 0; - } else if (msg.msg_iovlen > 1) { + } else if (msg->msg_iovlen > 1) { return -EINVAL; } else { if (!access_ok(uiov, sizeof(*uiov))) @@ -234,18 +214,11 @@ static int __io_compat_msg_copy_hdr(struct io_kiocb *req, sr->len = clen; } - if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - return 0; } iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg.msg_iovlen, + ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, UIO_FASTIOV, &iomsg->free_iov, &iomsg->msg.msg_iter, true); if (unlikely(ret < 0)) @@ -255,47 +228,35 @@ static int __io_compat_msg_copy_hdr(struct io_kiocb *req, } #endif -static int __io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, - struct sockaddr __user **addr, int ddir) +static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, + struct user_msghdr *msg, int ddir) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct user_msghdr msg; int ret; - if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) + if (copy_from_user(msg, sr->umsg, sizeof(*sr->umsg))) return -EFAULT; - ret = __copy_msghdr(&iomsg->msg, &msg, addr); - if (ret) - return ret; - if (req->flags & REQ_F_BUFFER_SELECT) { - if (msg.msg_iovlen == 0) { + if (msg->msg_iovlen == 0) { sr->len = iomsg->fast_iov[0].iov_len = 0; iomsg->fast_iov[0].iov_base = NULL; iomsg->free_iov = NULL; - } else if (msg.msg_iovlen > 1) { + } else if (msg->msg_iovlen > 1) { return -EINVAL; } else { - if (copy_from_user(iomsg->fast_iov, msg.msg_iov, - sizeof(*msg.msg_iov))) + if (copy_from_user(iomsg->fast_iov, msg->msg_iov, + sizeof(*msg->msg_iov))) return -EFAULT; sr->len = iomsg->fast_iov[0].iov_len; iomsg->free_iov = NULL; } - if (ddir == ITER_DEST && req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - return 0; } iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ddir, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, + ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV, &iomsg->free_iov, &iomsg->msg.msg_iter, false); if (unlikely(ret < 0)) return ret; @@ -303,30 +264,34 @@ static int __io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg return 0; } -static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, - struct sockaddr __user **addr, int ddir) +static int io_sendmsg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg) { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct user_msghdr msg; + int ret; + iomsg->msg.msg_name = &iomsg->addr; iomsg->msg.msg_iter.nr_segs = 0; #ifdef CONFIG_COMPAT - if (req->ctx->compat) - return __io_compat_msg_copy_hdr(req, iomsg, addr, ddir); -#endif + if (unlikely(req->ctx->compat)) { + struct compat_msghdr cmsg; - return __io_msg_copy_hdr(req, iomsg, addr, ddir); -} + ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); + if (unlikely(ret)) + return ret; -static int io_sendmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - int ret; + return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + } +#endif - ret = io_msg_copy_hdr(req, iomsg, NULL, ITER_SOURCE); - if (ret) + ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); + if (unlikely(ret)) return ret; + ret = __copy_msghdr(&iomsg->msg, &msg, NULL); + /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = iomsg->msg.msg_control_user; return ret; @@ -549,10 +514,66 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } +static int io_recvmsg_mshot_prep(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + size_t namelen, size_t controllen) +{ + if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == + (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { + int hdr; + + if (unlikely(namelen < 0)) + return -EOVERFLOW; + if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), + namelen, &hdr)) + return -EOVERFLOW; + if (check_add_overflow(hdr, (int)controllen, &hdr)) + return -EOVERFLOW; + + iomsg->namelen = namelen; + iomsg->controllen = controllen; + return 0; + } + + return 0; +} + static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { - return io_msg_copy_hdr(req, iomsg, &iomsg->uaddr, ITER_DEST); + struct user_msghdr msg; + int ret; + + iomsg->msg.msg_name = &iomsg->addr; + iomsg->msg.msg_iter.nr_segs = 0; + +#ifdef CONFIG_COMPAT + if (unlikely(req->ctx->compat)) { + struct compat_msghdr cmsg; + + ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); + if (unlikely(ret)) + return ret; + + ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); + if (unlikely(ret)) + return ret; + + return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, + cmsg.msg_controllen); + } +#endif + + ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); + if (unlikely(ret)) + return ret; + + ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); + if (unlikely(ret)) + return ret; + + return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, + msg.msg_controllen); } int io_recvmsg_prep_async(struct io_kiocb *req) -- GitLab From 868ec868616438df487b9e2baa5a99f8662cc47c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Mar 2024 18:29:39 +0300 Subject: [PATCH 0816/2290] io_uring/net: fix overflow check in io_recvmsg_mshot_prep() [ Upstream commit 8ede3db5061bb1fe28e2c9683329aafa89d2b1b4 ] The "controllen" variable is type size_t (unsigned long). Casting it to int could lead to an integer underflow. The check_add_overflow() function considers the type of the destination which is type int. If we add two positive values and the result cannot fit in an integer then that's counted as an overflow. However, if we cast "controllen" to an int and it turns negative, then negative values *can* fit into an int type so there is no overflow. Good: 100 + (unsigned long)-4 = 96 <-- overflow Bad: 100 + (int)-4 = 96 <-- no overflow I deleted the cast of the sizeof() as well. That's not a bug but the cast is unnecessary. Fixes: 9b0fc3c054ff ("io_uring: fix types in io_recvmsg_multishot_overflow") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/138bd2e2-ede8-4bcc-aa7b-f3d9de167a37@moroto.mountain Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index b273914ed99f0..9fc0ffb0b6c12 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -524,10 +524,10 @@ static int io_recvmsg_mshot_prep(struct io_kiocb *req, if (unlikely(namelen < 0)) return -EOVERFLOW; - if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), + if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), namelen, &hdr)) return -EOVERFLOW; - if (check_add_overflow(hdr, (int)controllen, &hdr)) + if (check_add_overflow(hdr, controllen, &hdr)) return -EOVERFLOW; iomsg->namelen = namelen; -- GitLab From 74ca3ef68d2f449bc848c0a814cefc487bf755fa Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Tue, 5 Mar 2024 16:20:48 +0800 Subject: [PATCH 0817/2290] aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts [ Upstream commit f98364e926626c678fb4b9004b75cacf92ff0662 ] This patch is against CVE-2023-6270. The description of cve is: A flaw was found in the ATA over Ethernet (AoE) driver in the Linux kernel. The aoecmd_cfg_pkts() function improperly updates the refcnt on `struct net_device`, and a use-after-free can be triggered by racing between the free on the struct and the access through the `skbtxq` global queue. This could lead to a denial of service condition or potential code execution. In aoecmd_cfg_pkts(), it always calls dev_put(ifp) when skb initial code is finished. But the net_device ifp will still be used in later tx()->dev_queue_xmit() in kthread. Which means that the dev_put(ifp) should NOT be called in the success path of skb initial code in aoecmd_cfg_pkts(). Otherwise tx() may run into use-after-free because the net_device is freed. This patch removed the dev_put(ifp) in the success path in aoecmd_cfg_pkts(), and added dev_put() after skb xmit in tx(). Link: https://nvd.nist.gov/vuln/detail/CVE-2023-6270 Fixes: 7562f876cd93 ("[NET]: Rework dev_base via list_head (v3)") Signed-off-by: Chun-Yi Lee Link: https://lore.kernel.org/r/20240305082048.25526-1-jlee@suse.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/aoe/aoecmd.c | 12 ++++++------ drivers/block/aoe/aoenet.c | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d7317425be510..cc9077b588d7e 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -419,13 +419,16 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu rcu_read_lock(); for_each_netdev_rcu(&init_net, ifp) { dev_hold(ifp); - if (!is_aoe_netif(ifp)) - goto cont; + if (!is_aoe_netif(ifp)) { + dev_put(ifp); + continue; + } skb = new_skb(sizeof *h + sizeof *ch); if (skb == NULL) { printk(KERN_INFO "aoe: skb alloc failure\n"); - goto cont; + dev_put(ifp); + continue; } skb_put(skb, sizeof *h + sizeof *ch); skb->dev = ifp; @@ -440,9 +443,6 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu h->major = cpu_to_be16(aoemajor); h->minor = aoeminor; h->cmd = AOECMD_CFG; - -cont: - dev_put(ifp); } rcu_read_unlock(); } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 63773a90581dd..1e66c7a188a12 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -64,6 +64,7 @@ tx(int id) __must_hold(&txlock) pr_warn("aoe: packet could not be sent on %s. %s\n", ifp ? ifp->name : "netif", "consider increasing tx_queue_len"); + dev_put(ifp); spin_lock_irq(&txlock); } return 0; -- GitLab From f0439b7d67b827615a52dad23a2289658a65b9a3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 22 Jan 2024 10:08:07 -0800 Subject: [PATCH 0818/2290] x86/resctrl: Implement new mba_MBps throttling heuristic [ Upstream commit c2427e70c1630d98966375fffc2b713ab9768a94 ] The mba_MBps feedback loop increases throttling when a group is using more bandwidth than the target set by the user in the schemata file, and decreases throttling when below target. To avoid possibly stepping throttling up and down on every poll a flag "delta_comp" is set whenever throttling is changed to indicate that the actual change in bandwidth should be recorded on the next poll in "delta_bw". Throttling is only reduced if the current bandwidth plus delta_bw is below the user target. This algorithm works well if the workload has steady bandwidth needs. But it can go badly wrong if the workload moves to a different phase just as the throttling level changed. E.g. if the workload becomes essentially idle right as throttling level is increased, the value calculated for delta_bw will be more or less the old bandwidth level. If the workload then resumes, Linux may never reduce throttling because current bandwidth plus delta_bw is above the target set by the user. Implement a simpler heuristic by assuming that in the worst case the currently measured bandwidth is being controlled by the current level of throttling. Compute how much it may increase if throttling is relaxed to the next higher level. If that is still below the user target, then it is ok to reduce the amount of throttling. Fixes: ba0f26d8529c ("x86/intel_rdt/mba_sc: Prepare for feedback loop") Reported-by: Xiaochen Shen Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Xiaochen Shen Link: https://lore.kernel.org/r/20240122180807.70518-1-tony.luck@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/resctrl/internal.h | 4 --- arch/x86/kernel/cpu/resctrl/monitor.c | 42 ++++++-------------------- 2 files changed, 10 insertions(+), 36 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 0b5c6c76f6f7b..4761d489a117a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -281,14 +281,10 @@ struct rftype { * struct mbm_state - status for each MBM counter in each domain * @prev_bw_bytes: Previous bytes value read for bandwidth calculation * @prev_bw: The most recent bandwidth in MBps - * @delta_bw: Difference between the current and previous bandwidth - * @delta_comp: Indicates whether to compute the delta_bw */ struct mbm_state { u64 prev_bw_bytes; u32 prev_bw; - u32 delta_bw; - bool delta_comp; }; /** diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 77538abeb72af..b9adb707750c6 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -428,9 +428,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) cur_bw = bytes / SZ_1M; - if (m->delta_comp) - m->delta_bw = abs(cur_bw - m->prev_bw); - m->delta_comp = false; m->prev_bw = cur_bw; } @@ -508,11 +505,11 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) { u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; - u32 cur_bw, delta_bw, user_bw; struct rdt_resource *r_mba; struct rdt_domain *dom_mba; struct list_head *head; struct rdtgroup *entry; + u32 cur_bw, user_bw; if (!is_mbm_local_enabled()) return; @@ -531,7 +528,6 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) cur_bw = pmbm_data->prev_bw; user_bw = dom_mba->mbps_val[closid]; - delta_bw = pmbm_data->delta_bw; /* MBA resource doesn't support CDP */ cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); @@ -543,49 +539,31 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) list_for_each_entry(entry, head, mon.crdtgrp_list) { cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; cur_bw += cmbm_data->prev_bw; - delta_bw += cmbm_data->delta_bw; } /* * Scale up/down the bandwidth linearly for the ctrl group. The * bandwidth step is the bandwidth granularity specified by the * hardware. - * - * The delta_bw is used when increasing the bandwidth so that we - * dont alternately increase and decrease the control values - * continuously. - * - * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if - * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep - * switching between 90 and 110 continuously if we only check - * cur_bw < user_bw. + * Always increase throttling if current bandwidth is above the + * target set by user. + * But avoid thrashing up and down on every poll by checking + * whether a decrease in throttling is likely to push the group + * back over target. E.g. if currently throttling to 30% of bandwidth + * on a system with 10% granularity steps, check whether moving to + * 40% would go past the limit by multiplying current bandwidth by + * "(30 + 10) / 30". */ if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) { new_msr_val = cur_msr_val - r_mba->membw.bw_gran; } else if (cur_msr_val < MAX_MBA_BW && - (user_bw > (cur_bw + delta_bw))) { + (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) { new_msr_val = cur_msr_val + r_mba->membw.bw_gran; } else { return; } resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); - - /* - * Delta values are updated dynamically package wise for each - * rdtgrp every time the throttle MSR changes value. - * - * This is because (1)the increase in bandwidth is not perfectly - * linear and only "approximately" linear even when the hardware - * says it is linear.(2)Also since MBA is a core specific - * mechanism, the delta values vary based on number of cores used - * by the rdtgrp. - */ - pmbm_data->delta_comp = true; - list_for_each_entry(entry, head, mon.crdtgrp_list) { - cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; - cmbm_data->delta_comp = true; - } } static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) -- GitLab From 3e5e8248c0a4cd9d6e75208b21ff31ce5265e906 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Jan 2024 17:39:19 +0100 Subject: [PATCH 0819/2290] x86/sme: Fix memory encryption setting if enabled by default and not overridden [ Upstream commit e814b59e6c2b11f5a3d007b2e61f7d550c354c3a ] Commit cbebd68f59f0 ("x86/mm: Fix use of uninitialized buffer in sme_enable()") 'fixed' an issue in sme_enable() detected by static analysis, and broke the common case in the process. cmdline_find_option() will return < 0 on an error, or when the command line argument does not appear at all. In this particular case, the latter is not an error condition, and so the early exit is wrong. Instead, without mem_encrypt= on the command line, the compile time default should be honoured, which could be to enable memory encryption, and this is currently broken. Fix it by setting sme_me_mask to a preliminary value based on the compile time default, and only omitting the command line argument test when cmdline_find_option() returns an error. [ bp: Drop active_by_default while at it. ] Fixes: cbebd68f59f0 ("x86/mm: Fix use of uninitialized buffer in sme_enable()") Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20240126163918.2908990-2-ardb+git@google.com Signed-off-by: Sasha Levin --- arch/x86/mm/mem_encrypt_identity.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index d94ebd8acdfde..a11a6ebbf5ecf 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -507,7 +507,6 @@ void __init sme_enable(struct boot_params *bp) const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; - bool active_by_default; unsigned long me_mask; char buffer[16]; bool snp; @@ -593,22 +592,19 @@ void __init sme_enable(struct boot_params *bp) : "p" (sme_cmdline_off)); if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) - active_by_default = true; - else - active_by_default = false; + sme_me_mask = me_mask; cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | ((u64)bp->ext_cmd_line_ptr << 32)); if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0) - return; + goto out; if (!strncmp(buffer, cmdline_on, sizeof(buffer))) sme_me_mask = me_mask; else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) sme_me_mask = 0; - else - sme_me_mask = active_by_default ? me_mask : 0; + out: if (sme_me_mask) { physical_mask &= ~sme_me_mask; -- GitLab From 081bf64a7e9ad999a833bfb4d5412685f8818e9a Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Mon, 18 Dec 2023 08:38:39 +0100 Subject: [PATCH 0820/2290] timekeeping: Fix cross-timestamp interpolation on counter wrap [ Upstream commit 84dccadd3e2a3f1a373826ad71e5ced5e76b0c00 ] cycle_between() decides whether get_device_system_crosststamp() will interpolate for older counter readings. cycle_between() yields wrong results for a counter wrap-around where after < before < test, and for the case after < test < before. Fix the comparison logic. Fixes: 2c756feb18d9 ("time: Add history to cross timestamp interface supporting slower devices") Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/r/20231218073849.35294-2-peter.hilber@opensynergy.com Signed-off-by: Sasha Levin --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 221c8c404973a..c168931c78e01 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1186,7 +1186,7 @@ static bool cycle_between(u64 before, u64 test, u64 after) { if (test > before && test < after) return true; - if (test < before && before > after) + if (before > after && (test > before || test < after)) return true; return false; } -- GitLab From 8a1d2ecd9bb816f515f069e9cc0a04fddf2a8872 Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Mon, 18 Dec 2023 08:38:40 +0100 Subject: [PATCH 0821/2290] timekeeping: Fix cross-timestamp interpolation corner case decision [ Upstream commit 87a41130881995f82f7adbafbfeddaebfb35f0ef ] The cycle_between() helper checks if parameter test is in the open interval (before, after). Colloquially speaking, this also applies to the counter wrap-around special case before > after. get_device_system_crosststamp() currently uses cycle_between() at the first call site to decide whether to interpolate for older counter readings. get_device_system_crosststamp() has the following problem with cycle_between() testing against an open interval: Assume that, by chance, cycles == tk->tkr_mono.cycle_last (in the following, "cycle_last" for brevity). Then, cycle_between() at the first call site, with effective argument values cycle_between(cycle_last, cycles, now), returns false, enabling interpolation. During interpolation, get_device_system_crosststamp() will then call cycle_between() at the second call site (if a history_begin was supplied). The effective argument values are cycle_between(history_begin->cycles, cycles, cycles), since system_counterval.cycles == interval_start == cycles, per the assumption. Due to the test against the open interval, cycle_between() returns false again. This causes get_device_system_crosststamp() to return -EINVAL. This failure should be avoided, since get_device_system_crosststamp() works both when cycles follows cycle_last (no interpolation), and when cycles precedes cycle_last (interpolation). For the case cycles == cycle_last, interpolation is actually unneeded. Fix this by changing cycle_between() into timestamp_in_interval(), which now checks against the closed interval, rather than the open interval. This changes the get_device_system_crosststamp() behavior for three corner cases: 1. Bypass interpolation in the case cycles == tk->tkr_mono.cycle_last, fixing the problem described above. 2. At the first timestamp_in_interval() call site, cycles == now no longer causes failure. 3. At the second timestamp_in_interval() call site, history_begin->cycles == system_counterval.cycles no longer causes failure. adjust_historical_crosststamp() also works for this corner case, where partial_history_cycles == total_history_cycles. These behavioral changes should not cause any problems. Fixes: 2c756feb18d9 ("time: Add history to cross timestamp interface supporting slower devices") Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20231218073849.35294-3-peter.hilber@opensynergy.com Signed-off-by: Sasha Levin --- kernel/time/timekeeping.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c168931c78e01..1749a712f72d1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1180,13 +1180,15 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history, } /* - * cycle_between - true if test occurs chronologically between before and after + * timestamp_in_interval - true if ts is chronologically in [start, end] + * + * True if ts occurs chronologically at or after start, and before or at end. */ -static bool cycle_between(u64 before, u64 test, u64 after) +static bool timestamp_in_interval(u64 start, u64 end, u64 ts) { - if (test > before && test < after) + if (ts >= start && ts <= end) return true; - if (before > after && (test > before || test < after)) + if (start > end && (ts >= start || ts <= end)) return true; return false; } @@ -1246,7 +1248,7 @@ int get_device_system_crosststamp(int (*get_time_fn) */ now = tk_clock_read(&tk->tkr_mono); interval_start = tk->tkr_mono.cycle_last; - if (!cycle_between(interval_start, cycles, now)) { + if (!timestamp_in_interval(interval_start, now, cycles)) { clock_was_set_seq = tk->clock_was_set_seq; cs_was_changed_seq = tk->cs_was_changed_seq; cycles = interval_start; @@ -1277,13 +1279,13 @@ int get_device_system_crosststamp(int (*get_time_fn) bool discontinuity; /* - * Check that the counter value occurs after the provided + * Check that the counter value is not before the provided * history reference and that the history doesn't cross a * clocksource change */ if (!history_begin || - !cycle_between(history_begin->cycles, - system_counterval.cycles, cycles) || + !timestamp_in_interval(history_begin->cycles, + cycles, system_counterval.cycles) || history_begin->cs_was_changed_seq != cs_was_changed_seq) return -EINVAL; partial_history_cycles = cycles - system_counterval.cycles; -- GitLab From 9388721260f6e54ebc2e1e65b64f52c072ed7f83 Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Mon, 18 Dec 2023 08:38:41 +0100 Subject: [PATCH 0822/2290] timekeeping: Fix cross-timestamp interpolation for non-x86 [ Upstream commit 14274d0bd31b4debf28284604589f596ad2e99f2 ] So far, get_device_system_crosststamp() unconditionally passes system_counterval.cycles to timekeeping_cycles_to_ns(). But when interpolating system time (do_interp == true), system_counterval.cycles is before tkr_mono.cycle_last, contrary to the timekeeping_cycles_to_ns() expectations. On x86, CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE will mitigate on interpolating, setting delta to 0. With delta == 0, xtstamp->sys_monoraw and xtstamp->sys_realtime are then set to the last update time, as implicitly expected by adjust_historical_crosststamp(). On other architectures, the resulting nonsense xtstamp->sys_monoraw and xtstamp->sys_realtime corrupt the xtstamp (ts) adjustment in adjust_historical_crosststamp(). Fix this by deriving xtstamp->sys_monoraw and xtstamp->sys_realtime from the last update time when interpolating, by using the local variable "cycles". The local variable already has the right value when interpolating, unlike system_counterval.cycles. Fixes: 2c756feb18d9 ("time: Add history to cross timestamp interface supporting slower devices") Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/r/20231218073849.35294-4-peter.hilber@opensynergy.com Signed-off-by: Sasha Levin --- kernel/time/timekeeping.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1749a712f72d1..b158cbef4d8dc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1261,10 +1261,8 @@ int get_device_system_crosststamp(int (*get_time_fn) tk_core.timekeeper.offs_real); base_raw = tk->tkr_raw.base; - nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, - system_counterval.cycles); - nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, - system_counterval.cycles); + nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); + nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); } while (read_seqcount_retry(&tk_core.seq, seq)); xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); -- GitLab From 790ae577eff350695ba59eb7eafcce415d0a702b Mon Sep 17 00:00:00 2001 From: Keisuke Nishimura Date: Wed, 10 Jan 2024 14:17:06 +0100 Subject: [PATCH 0823/2290] sched/fair: Take the scheduling domain into account in select_idle_smt() [ Upstream commit 8aeaffef8c6eceab0e1498486fdd4f3dc3b7066c ] When picking a CPU on task wakeup, select_idle_smt() has to take into account the scheduling domain of @target. This is because the "isolcpus" kernel command line option can remove CPUs from the domain to isolate them from other SMT siblings. This fix checks if the candidate CPU is in the target scheduling domain. Commit: df3cb4ea1fb6 ("sched/fair: Fix wrong cpu selecting from isolated domain") ... originally introduced this fix by adding the check of the scheduling domain in the loop. However, commit: 3e6efe87cd5cc ("sched/fair: Remove redundant check in select_idle_smt()") ... accidentally removed the check. Bring it back. Fixes: 3e6efe87cd5c ("sched/fair: Remove redundant check in select_idle_smt()") Signed-off-by: Keisuke Nishimura Signed-off-by: Julia Lawall Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240110131707.437301-1-keisuke.nishimura@inria.fr Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2558ab9033bee..1c4e54fffb8b6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6678,13 +6678,19 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu /* * Scan the local SMT mask for idle CPUs. */ -static int select_idle_smt(struct task_struct *p, int target) +static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { int cpu; for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) { if (cpu == target) continue; + /* + * Check if the CPU is in the LLC scheduling domain of @target. + * Due to isolcpus, there is no guarantee that all the siblings are in the domain. + */ + if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) + continue; if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) return cpu; } @@ -6708,7 +6714,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma return __select_idle_cpu(core, p); } -static inline int select_idle_smt(struct task_struct *p, int target) +static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { return -1; } @@ -6970,7 +6976,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) has_idle_core = test_idle_cores(target); if (!has_idle_core && cpus_share_cache(prev, target)) { - i = select_idle_smt(p, prev); + i = select_idle_smt(p, sd, prev); if ((unsigned int)i < nr_cpumask_bits) return i; } -- GitLab From 13fa3326efb257f0fa6aaf17ef509de79e47e301 Mon Sep 17 00:00:00 2001 From: Keisuke Nishimura Date: Wed, 10 Jan 2024 14:17:07 +0100 Subject: [PATCH 0824/2290] sched/fair: Take the scheduling domain into account in select_idle_core() [ Upstream commit 23d04d8c6b8ec339057264659b7834027f3e6a63 ] When picking a CPU on task wakeup, select_idle_core() has to take into account the scheduling domain where the function looks for the CPU. This is because the "isolcpus" kernel command line option can remove CPUs from the domain to isolate them from other SMT siblings. This change replaces the set of CPUs allowed to run the task from p->cpus_ptr by the intersection of p->cpus_ptr and sched_domain_span(sd) which is stored in the 'cpus' argument provided by select_idle_cpu(). Fixes: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()") Signed-off-by: Keisuke Nishimura Signed-off-by: Julia Lawall Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240110131707.437301-2-keisuke.nishimura@inria.fr Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c4e54fffb8b6..91c101ecfef9f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6656,7 +6656,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu if (!available_idle_cpu(cpu)) { idle = false; if (*idle_cpu == -1) { - if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) { + if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) { *idle_cpu = cpu; break; } @@ -6664,7 +6664,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu } break; } - if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr)) + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, cpus)) *idle_cpu = cpu; } -- GitLab From 90f089d77e38db1c48629f111f3c8c336be1bc38 Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Sun, 17 Dec 2023 13:29:01 +0200 Subject: [PATCH 0825/2290] wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() [ Upstream commit ad25ee36f00172f7d53242dc77c69fff7ced0755 ] We should check whether the WMI_TLV_TAG_STRUCT_MGMT_TX_COMPL_EVENT tlv is present before accessing it, otherwise a null pointer deference error will occur. Fixes: dc405152bb64 ("ath10k: handle mgmt tx completion event") Signed-off-by: Xingyuan Mo Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20231208043433.271449-1-hdthky0@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 876410a47d1d2..4d5009604eee7 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -844,6 +844,10 @@ ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev(struct ath10k *ar, struct sk_buff *skb, } ev = tb[WMI_TLV_TAG_STRUCT_MGMT_TX_COMPL_EVENT]; + if (!ev) { + kfree(tb); + return -EPROTO; + } arg->desc_id = ev->desc_id; arg->status = ev->status; -- GitLab From c67698325c68f8768db858f5c87c34823421746d Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 31 Dec 2023 05:03:33 +0000 Subject: [PATCH 0826/2290] wifi: b43: Stop/wake correct queue in DMA Tx path when QoS is disabled [ Upstream commit 9636951e4468f02c72cc75a82dc65d003077edbc ] When QoS is disabled, the queue priority value will not map to the correct ieee80211 queue since there is only one queue. Stop/wake queue 0 when QoS is disabled to prevent trying to stop/wake a non-existent queue and failing to stop/wake the actual queue instantiated. Log of issue before change (with kernel parameter qos=0): [ +5.112651] ------------[ cut here ]------------ [ +0.000005] WARNING: CPU: 7 PID: 25513 at net/mac80211/util.c:449 __ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000067] Modules linked in: b43(O) snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_chain_nat xt_MASQUERADE nf_nat xfrm_user xfrm_algo xt_addrtype overlay ccm af_packet amdgpu snd_hda_codec_cirrus snd_hda_codec_generic ledtrig_audio drm_exec amdxcp gpu_sched xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip6t_rpfilter ipt_rpfilter xt_pkttype xt_LOG nf_log_syslog xt_tcpudp nft_compat nf_tables nfnetlink sch_fq_codel btusb uinput iTCO_wdt ctr btrtl intel_pmc_bxt i915 intel_rapl_msr mei_hdcp mei_pxp joydev at24 watchdog btintel atkbd libps2 serio radeon btbcm vivaldi_fmap btmtk intel_rapl_common snd_hda_codec_hdmi bluetooth uvcvideo nls_iso8859_1 applesmc nls_cp437 x86_pkg_temp_thermal snd_hda_intel intel_powerclamp vfat videobuf2_vmalloc coretemp fat snd_intel_dspcfg crc32_pclmul uvc polyval_clmulni snd_intel_sdw_acpi loop videobuf2_memops snd_hda_codec tun drm_suballoc_helper polyval_generic drm_ttm_helper drm_buddy tap ecdh_generic videobuf2_v4l2 gf128mul macvlan ttm ghash_clmulni_intel ecc tg3 [ +0.000044] videodev bridge snd_hda_core rapl crc16 drm_display_helper cec mousedev snd_hwdep evdev intel_cstate bcm5974 hid_appleir videobuf2_common stp mac_hid libphy snd_pcm drm_kms_helper acpi_als mei_me intel_uncore llc mc snd_timer intel_gtt industrialio_triggered_buffer apple_mfi_fastcharge i2c_i801 mei snd lpc_ich agpgart ptp i2c_smbus thunderbolt apple_gmux i2c_algo_bit kfifo_buf video industrialio soundcore pps_core wmi tiny_power_button sbs sbshc button ac cordic bcma mac80211 cfg80211 ssb rfkill libarc4 kvm_intel kvm drm irqbypass fuse backlight firmware_class efi_pstore configfs efivarfs dmi_sysfs ip_tables x_tables autofs4 dm_crypt cbc encrypted_keys trusted asn1_encoder tee tpm rng_core input_leds hid_apple led_class hid_generic usbhid hid sd_mod t10_pi crc64_rocksoft crc64 crc_t10dif crct10dif_generic ahci libahci libata uhci_hcd ehci_pci ehci_hcd crct10dif_pclmul crct10dif_common sha512_ssse3 sha512_generic sha256_ssse3 sha1_ssse3 aesni_intel usbcore scsi_mod libaes crypto_simd cryptd scsi_common [ +0.000055] usb_common rtc_cmos btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq dm_snapshot dm_bufio dm_mod dax [last unloaded: b43(O)] [ +0.000009] CPU: 7 PID: 25513 Comm: irq/17-b43 Tainted: G W O 6.6.7 #1-NixOS [ +0.000003] Hardware name: Apple Inc. MacBookPro8,3/Mac-942459F5819B171B, BIOS 87.0.0.0.0 06/13/2019 [ +0.000001] RIP: 0010:__ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000046] Code: 00 45 85 e4 0f 85 9b 00 00 00 48 8d bd 40 09 00 00 f0 48 0f ba ad 48 09 00 00 00 72 0f 5b 5d 41 5c 41 5d 41 5e e9 cb 6d 3c d0 <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc 48 8d b4 16 94 00 00 [ +0.000002] RSP: 0018:ffffc90003c77d60 EFLAGS: 00010097 [ +0.000001] RAX: 0000000000000001 RBX: 0000000000000002 RCX: 0000000000000000 [ +0.000001] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff88820b924900 [ +0.000002] RBP: ffff88820b924900 R08: ffffc90003c77d90 R09: 000000000003bfd0 [ +0.000001] R10: ffff88820b924900 R11: ffffc90003c77c68 R12: 0000000000000000 [ +0.000001] R13: 0000000000000000 R14: ffffc90003c77d90 R15: ffffffffc0fa6f40 [ +0.000001] FS: 0000000000000000(0000) GS:ffff88846fb80000(0000) knlGS:0000000000000000 [ +0.000001] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000001] CR2: 00007fafda7ae008 CR3: 000000046d220005 CR4: 00000000000606e0 [ +0.000002] Call Trace: [ +0.000003] [ +0.000001] ? __ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000044] ? __warn+0x81/0x130 [ +0.000005] ? __ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000045] ? report_bug+0x171/0x1a0 [ +0.000004] ? handle_bug+0x41/0x70 [ +0.000004] ? exc_invalid_op+0x17/0x70 [ +0.000003] ? asm_exc_invalid_op+0x1a/0x20 [ +0.000005] ? __ieee80211_wake_queue+0xd5/0x180 [mac80211] [ +0.000043] ieee80211_wake_queue+0x4a/0x80 [mac80211] [ +0.000044] b43_dma_handle_txstatus+0x29c/0x3a0 [b43] [ +0.000016] ? __pfx_irq_thread_fn+0x10/0x10 [ +0.000002] b43_handle_txstatus+0x61/0x80 [b43] [ +0.000012] b43_interrupt_thread_handler+0x3f9/0x6b0 [b43] [ +0.000011] irq_thread_fn+0x23/0x60 [ +0.000002] irq_thread+0xfe/0x1c0 [ +0.000002] ? __pfx_irq_thread_dtor+0x10/0x10 [ +0.000001] ? __pfx_irq_thread+0x10/0x10 [ +0.000001] kthread+0xe8/0x120 [ +0.000003] ? __pfx_kthread+0x10/0x10 [ +0.000003] ret_from_fork+0x34/0x50 [ +0.000002] ? __pfx_kthread+0x10/0x10 [ +0.000002] ret_from_fork_asm+0x1b/0x30 [ +0.000004] [ +0.000001] ---[ end trace 0000000000000000 ]--- [ +0.000065] ------------[ cut here ]------------ [ +0.000001] WARNING: CPU: 0 PID: 56077 at net/mac80211/util.c:514 __ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000077] Modules linked in: b43(O) snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_chain_nat xt_MASQUERADE nf_nat xfrm_user xfrm_algo xt_addrtype overlay ccm af_packet amdgpu snd_hda_codec_cirrus snd_hda_codec_generic ledtrig_audio drm_exec amdxcp gpu_sched xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip6t_rpfilter ipt_rpfilter xt_pkttype xt_LOG nf_log_syslog xt_tcpudp nft_compat nf_tables nfnetlink sch_fq_codel btusb uinput iTCO_wdt ctr btrtl intel_pmc_bxt i915 intel_rapl_msr mei_hdcp mei_pxp joydev at24 watchdog btintel atkbd libps2 serio radeon btbcm vivaldi_fmap btmtk intel_rapl_common snd_hda_codec_hdmi bluetooth uvcvideo nls_iso8859_1 applesmc nls_cp437 x86_pkg_temp_thermal snd_hda_intel intel_powerclamp vfat videobuf2_vmalloc coretemp fat snd_intel_dspcfg crc32_pclmul uvc polyval_clmulni snd_intel_sdw_acpi loop videobuf2_memops snd_hda_codec tun drm_suballoc_helper polyval_generic drm_ttm_helper drm_buddy tap ecdh_generic videobuf2_v4l2 gf128mul macvlan ttm ghash_clmulni_intel ecc tg3 [ +0.000073] videodev bridge snd_hda_core rapl crc16 drm_display_helper cec mousedev snd_hwdep evdev intel_cstate bcm5974 hid_appleir videobuf2_common stp mac_hid libphy snd_pcm drm_kms_helper acpi_als mei_me intel_uncore llc mc snd_timer intel_gtt industrialio_triggered_buffer apple_mfi_fastcharge i2c_i801 mei snd lpc_ich agpgart ptp i2c_smbus thunderbolt apple_gmux i2c_algo_bit kfifo_buf video industrialio soundcore pps_core wmi tiny_power_button sbs sbshc button ac cordic bcma mac80211 cfg80211 ssb rfkill libarc4 kvm_intel kvm drm irqbypass fuse backlight firmware_class efi_pstore configfs efivarfs dmi_sysfs ip_tables x_tables autofs4 dm_crypt cbc encrypted_keys trusted asn1_encoder tee tpm rng_core input_leds hid_apple led_class hid_generic usbhid hid sd_mod t10_pi crc64_rocksoft crc64 crc_t10dif crct10dif_generic ahci libahci libata uhci_hcd ehci_pci ehci_hcd crct10dif_pclmul crct10dif_common sha512_ssse3 sha512_generic sha256_ssse3 sha1_ssse3 aesni_intel usbcore scsi_mod libaes crypto_simd cryptd scsi_common [ +0.000084] usb_common rtc_cmos btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq dm_snapshot dm_bufio dm_mod dax [last unloaded: b43] [ +0.000012] CPU: 0 PID: 56077 Comm: kworker/u16:17 Tainted: G W O 6.6.7 #1-NixOS [ +0.000003] Hardware name: Apple Inc. MacBookPro8,3/Mac-942459F5819B171B, BIOS 87.0.0.0.0 06/13/2019 [ +0.000001] Workqueue: phy7 b43_tx_work [b43] [ +0.000019] RIP: 0010:__ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000076] Code: 74 11 48 8b 78 08 0f b7 d6 89 e9 4c 89 e6 e8 ab f4 00 00 65 ff 0d 9c b7 34 3f 0f 85 55 ff ff ff 0f 1f 44 00 00 e9 4b ff ff ff <0f> 0b 5b 5d 41 5c 41 5d c3 cc cc cc cc 0f 1f 80 00 00 00 00 90 90 [ +0.000002] RSP: 0000:ffffc90004157d50 EFLAGS: 00010097 [ +0.000002] RAX: 0000000000000001 RBX: 0000000000000002 RCX: 0000000000000000 [ +0.000002] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff8882d65d0900 [ +0.000002] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001 [ +0.000001] R10: 00000000000000ff R11: ffff88814d0155a0 R12: ffff8882d65d0900 [ +0.000002] R13: 0000000000000000 R14: ffff8881002d2800 R15: 00000000000000d0 [ +0.000002] FS: 0000000000000000(0000) GS:ffff88846f800000(0000) knlGS:0000000000000000 [ +0.000003] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000002] CR2: 00007f2e8c10c880 CR3: 0000000385b66005 CR4: 00000000000606f0 [ +0.000002] Call Trace: [ +0.000001] [ +0.000001] ? __ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000075] ? __warn+0x81/0x130 [ +0.000004] ? __ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000075] ? report_bug+0x171/0x1a0 [ +0.000005] ? handle_bug+0x41/0x70 [ +0.000003] ? exc_invalid_op+0x17/0x70 [ +0.000004] ? asm_exc_invalid_op+0x1a/0x20 [ +0.000004] ? __ieee80211_stop_queue+0xcc/0xe0 [mac80211] [ +0.000076] ieee80211_stop_queue+0x36/0x50 [mac80211] [ +0.000077] b43_dma_tx+0x550/0x780 [b43] [ +0.000023] b43_tx_work+0x90/0x130 [b43] [ +0.000018] process_one_work+0x174/0x340 [ +0.000003] worker_thread+0x27b/0x3a0 [ +0.000004] ? __pfx_worker_thread+0x10/0x10 [ +0.000002] kthread+0xe8/0x120 [ +0.000003] ? __pfx_kthread+0x10/0x10 [ +0.000004] ret_from_fork+0x34/0x50 [ +0.000002] ? __pfx_kthread+0x10/0x10 [ +0.000003] ret_from_fork_asm+0x1b/0x30 [ +0.000006] [ +0.000001] ---[ end trace 0000000000000000 ]--- Fixes: e6f5b934fba8 ("b43: Add QOS support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://msgid.link/20231231050300.122806-2-sergeantsagara@protonmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/b43.h | 16 ++++++++++++++++ drivers/net/wireless/broadcom/b43/dma.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/b43.h b/drivers/net/wireless/broadcom/b43/b43.h index 67b4bac048e58..c0d8fc0b22fb2 100644 --- a/drivers/net/wireless/broadcom/b43/b43.h +++ b/drivers/net/wireless/broadcom/b43/b43.h @@ -1082,6 +1082,22 @@ static inline bool b43_using_pio_transfers(struct b43_wldev *dev) return dev->__using_pio_transfers; } +static inline void b43_wake_queue(struct b43_wldev *dev, int queue_prio) +{ + if (dev->qos_enabled) + ieee80211_wake_queue(dev->wl->hw, queue_prio); + else + ieee80211_wake_queue(dev->wl->hw, 0); +} + +static inline void b43_stop_queue(struct b43_wldev *dev, int queue_prio) +{ + if (dev->qos_enabled) + ieee80211_stop_queue(dev->wl->hw, queue_prio); + else + ieee80211_stop_queue(dev->wl->hw, 0); +} + /* Message printing */ __printf(2, 3) void b43info(struct b43_wl *wl, const char *fmt, ...); __printf(2, 3) void b43err(struct b43_wl *wl, const char *fmt, ...); diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c index 9a7c62bd5e431..cfaf2f9d67b22 100644 --- a/drivers/net/wireless/broadcom/b43/dma.c +++ b/drivers/net/wireless/broadcom/b43/dma.c @@ -1399,7 +1399,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb) should_inject_overflow(ring)) { /* This TX ring is full. */ unsigned int skb_mapping = skb_get_queue_mapping(skb); - ieee80211_stop_queue(dev->wl->hw, skb_mapping); + b43_stop_queue(dev, skb_mapping); dev->wl->tx_queue_stopped[skb_mapping] = true; ring->stopped = true; if (b43_debug(dev, B43_DBG_DMAVERBOSE)) { @@ -1570,7 +1570,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev, } else { /* If the driver queue is running wake the corresponding * mac80211 queue. */ - ieee80211_wake_queue(dev->wl->hw, ring->queue_prio); + b43_wake_queue(dev, ring->queue_prio); if (b43_debug(dev, B43_DBG_DMAVERBOSE)) { b43dbg(dev->wl, "Woke up TX ring %d\n", ring->index); } -- GitLab From b6b6bdfca8bc0525e4b390f2a4aa8ac671efd9d3 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 31 Dec 2023 05:03:45 +0000 Subject: [PATCH 0827/2290] wifi: b43: Stop/wake correct queue in PIO Tx path when QoS is disabled [ Upstream commit 77135a38f6c2f950d2306ac3d37cbb407e6243f2 ] When QoS is disabled, the queue priority value will not map to the correct ieee80211 queue since there is only one queue. Stop/wake queue 0 when QoS is disabled to prevent trying to stop/wake a non-existent queue and failing to stop/wake the actual queue instantiated. Fixes: 5100d5ac81b9 ("b43: Add PIO support for PCMCIA devices") Signed-off-by: Rahul Rameshbabu Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://msgid.link/20231231050300.122806-3-sergeantsagara@protonmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/pio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/pio.c b/drivers/net/wireless/broadcom/b43/pio.c index 8c28a9250cd19..cc19b589fa70d 100644 --- a/drivers/net/wireless/broadcom/b43/pio.c +++ b/drivers/net/wireless/broadcom/b43/pio.c @@ -525,7 +525,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb) if (total_len > (q->buffer_size - q->buffer_used)) { /* Not enough memory on the queue. */ err = -EBUSY; - ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb)); + b43_stop_queue(dev, skb_get_queue_mapping(skb)); q->stopped = true; goto out; } @@ -552,7 +552,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb) if (((q->buffer_size - q->buffer_used) < roundup(2 + 2 + 6, 4)) || (q->free_packet_slots == 0)) { /* The queue is full. */ - ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb)); + b43_stop_queue(dev, skb_get_queue_mapping(skb)); q->stopped = true; } @@ -587,7 +587,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev, list_add(&pack->list, &q->packets_list); if (q->stopped) { - ieee80211_wake_queue(dev->wl->hw, q->queue_prio); + b43_wake_queue(dev, q->queue_prio); q->stopped = false; } } -- GitLab From 3033583a786ef2ada0e351d0940adef1193fba17 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 31 Dec 2023 05:03:51 +0000 Subject: [PATCH 0828/2290] wifi: b43: Stop correct queue in DMA worker when QoS is disabled [ Upstream commit 581c8967d66c4961076dbbee356834e9c6777184 ] When QoS is disabled, the queue priority value will not map to the correct ieee80211 queue since there is only one queue. Stop queue 0 when QoS is disabled to prevent trying to stop a non-existent queue and failing to stop the actual queue instantiated. Fixes: bad691946966 ("b43: avoid packet losses in the dma worker code.") Signed-off-by: Rahul Rameshbabu Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://msgid.link/20231231050300.122806-4-sergeantsagara@protonmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/main.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index b2539a916fd04..c75f294ca1bc9 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -3603,7 +3603,7 @@ static void b43_tx_work(struct work_struct *work) err = b43_dma_tx(dev, skb); if (err == -ENOSPC) { wl->tx_queue_stopped[queue_num] = true; - ieee80211_stop_queue(wl->hw, queue_num); + b43_stop_queue(dev, queue_num); skb_queue_head(&wl->tx_queue[queue_num], skb); break; } @@ -3627,6 +3627,7 @@ static void b43_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb) { struct b43_wl *wl = hw_to_b43_wl(hw); + u16 skb_queue_mapping; if (unlikely(skb->len < 2 + 2 + 6)) { /* Too short, this can't be a valid frame. */ @@ -3635,12 +3636,12 @@ static void b43_op_tx(struct ieee80211_hw *hw, } B43_WARN_ON(skb_shinfo(skb)->nr_frags); - skb_queue_tail(&wl->tx_queue[skb->queue_mapping], skb); - if (!wl->tx_queue_stopped[skb->queue_mapping]) { + skb_queue_mapping = skb_get_queue_mapping(skb); + skb_queue_tail(&wl->tx_queue[skb_queue_mapping], skb); + if (!wl->tx_queue_stopped[skb_queue_mapping]) ieee80211_queue_work(wl->hw, &wl->tx_work); - } else { - ieee80211_stop_queue(wl->hw, skb->queue_mapping); - } + else + b43_stop_queue(wl->current_dev, skb_queue_mapping); } static void b43_qos_params_upload(struct b43_wldev *dev, -- GitLab From b8dd353a1a914721558d42d5bfaf5d6a30adafa1 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 31 Dec 2023 05:03:58 +0000 Subject: [PATCH 0829/2290] wifi: b43: Disable QoS for bcm4331 [ Upstream commit 09795bded2e725443fe4a4803cae2079cdaf7b26 ] bcm4331 seems to not function correctly with QoS support. This may be due to issues with currently available firmware or potentially a device specific issue. When queues that are not of the default "best effort" priority are selected, traffic appears to not transmit out of the hardware while no errors are returned. This behavior is present among all the other priority queues: video, voice, and background. While this can be worked around by setting a kernel parameter, the default behavior is problematic for most users and may be difficult to debug. This patch offers a working out-of-box experience for bcm4331 users. Log of the issue (using ssh low-priority traffic as an example): ssh -T -vvvv git@github.com OpenSSH_9.6p1, OpenSSL 3.0.12 24 Oct 2023 debug1: Reading configuration data /etc/ssh/ssh_config debug2: checking match for 'host * exec "/nix/store/q1c2flcykgr4wwg5a6h450hxbk4ch589-bash-5.2-p15/bin/bash -c '/nix/store/c015armnkhr6v18za0rypm7sh1i8js8w-gnupg-2.4.1/bin/gpg-connect-agent --quiet updatestartuptty /bye >/dev/null 2>&1'"' host github.com originally github.com debug3: /etc/ssh/ssh_config line 5: matched 'host "github.com"' debug1: Executing command: '/nix/store/q1c2flcykgr4wwg5a6h450hxbk4ch589-bash-5.2-p15/bin/bash -c '/nix/store/c015armnkhr6v18za0rypm7sh1i8js8w-gnupg-2.4.1/bin/gpg-connect-agent --quiet updatestartuptty /bye >/dev/null 2>&1'' debug3: command returned status 0 debug3: /etc/ssh/ssh_config line 5: matched 'exec "/nix/store/q1c2flcykgr4wwg5a6h450hxbk4ch589-bash-5.2-p15/bin/bash -c '/nix/store/c015armnkhr6v18za0r"' debug2: match found debug1: /etc/ssh/ssh_config line 9: Applying options for * debug3: expanded UserKnownHostsFile '~/.ssh/known_hosts' -> '/home/binary-eater/.ssh/known_hosts' debug3: expanded UserKnownHostsFile '~/.ssh/known_hosts2' -> '/home/binary-eater/.ssh/known_hosts2' debug2: resolving "github.com" port 22 debug3: resolve_host: lookup github.com:22 debug3: channel_clear_timeouts: clearing debug3: ssh_connect_direct: entering debug1: Connecting to github.com [192.30.255.113] port 22. debug3: set_sock_tos: set socket 3 IP_TOS 0x48 Fixes: e6f5b934fba8 ("b43: Add QOS support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://msgid.link/20231231050300.122806-5-sergeantsagara@protonmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index c75f294ca1bc9..bdfa68cc7ee2a 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -2587,7 +2587,8 @@ static void b43_request_firmware(struct work_struct *work) start_ieee80211: wl->hw->queues = B43_QOS_QUEUE_NUM; - if (!modparam_qos || dev->fw.opensource) + if (!modparam_qos || dev->fw.opensource || + dev->dev->chip_id == BCMA_CHIP_ID_BCM4331) wl->hw->queues = 1; err = ieee80211_register_hw(wl->hw); -- GitLab From d8766257c2e96df971e6a7c3870f2229dd75a40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Fri, 5 Jan 2024 08:57:32 +0100 Subject: [PATCH 0830/2290] wifi: wilc1000: fix declarations ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 535733e90e5d8912ebeccebb05b354a2d06ff459 ] Reorder parameters declaration in wilc_parse_join_bss_param to enforce reverse christmas tree Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240105075733.36331-2-alexis.lothore@bootlin.com Stable-dep-of: 205c50306acf ("wifi: wilc1000: fix RCU usage in connect path") Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/hif.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index a1b75feec6edf..00ecf14afab01 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -374,13 +374,13 @@ out: void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct cfg80211_crypto_settings *crypto) { - struct wilc_join_bss_param *param; - struct ieee80211_p2p_noa_attr noa_attr; - u8 rates_len = 0; + const struct cfg80211_bss_ies *ies = rcu_dereference(bss->ies); const u8 *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie; const u8 *ht_ie, *wpa_ie, *wmm_ie, *rsn_ie; + struct ieee80211_p2p_noa_attr noa_attr; + struct wilc_join_bss_param *param; + u8 rates_len = 0; int ret; - const struct cfg80211_bss_ies *ies = rcu_dereference(bss->ies); param = kzalloc(sizeof(*param), GFP_KERNEL); if (!param) -- GitLab From 745003b5917b610352f52fe0d11ef658d6471ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Fri, 5 Jan 2024 08:57:33 +0100 Subject: [PATCH 0831/2290] wifi: wilc1000: fix RCU usage in connect path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 205c50306acf58a335eb19fa84e40140f4fe814f ] With lockdep enabled, calls to the connect function from cfg802.11 layer lead to the following warning: ============================= WARNING: suspicious RCU usage 6.7.0-rc1-wt+ #333 Not tainted ----------------------------- drivers/net/wireless/microchip/wilc1000/hif.c:386 suspicious rcu_dereference_check() usage! [...] stack backtrace: CPU: 0 PID: 100 Comm: wpa_supplicant Not tainted 6.7.0-rc1-wt+ #333 Hardware name: Atmel SAMA5 unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x34/0x48 dump_stack_lvl from wilc_parse_join_bss_param+0x7dc/0x7f4 wilc_parse_join_bss_param from connect+0x2c4/0x648 connect from cfg80211_connect+0x30c/0xb74 cfg80211_connect from nl80211_connect+0x860/0xa94 nl80211_connect from genl_rcv_msg+0x3fc/0x59c genl_rcv_msg from netlink_rcv_skb+0xd0/0x1f8 netlink_rcv_skb from genl_rcv+0x2c/0x3c genl_rcv from netlink_unicast+0x3b0/0x550 netlink_unicast from netlink_sendmsg+0x368/0x688 netlink_sendmsg from ____sys_sendmsg+0x190/0x430 ____sys_sendmsg from ___sys_sendmsg+0x110/0x158 ___sys_sendmsg from sys_sendmsg+0xe8/0x150 sys_sendmsg from ret_fast_syscall+0x0/0x1c This warning is emitted because in the connect path, when trying to parse target BSS parameters, we dereference a RCU pointer whithout being in RCU critical section. Fix RCU dereference usage by moving it to a RCU read critical section. To avoid wrapping the whole wilc_parse_join_bss_param under the critical section, just use the critical section to copy ies data Fixes: c460495ee072 ("staging: wilc1000: fix incorrent type in initializer") Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240105075733.36331-3-alexis.lothore@bootlin.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/hif.c | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index 00ecf14afab01..5eb02902e875a 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -374,38 +374,49 @@ out: void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct cfg80211_crypto_settings *crypto) { - const struct cfg80211_bss_ies *ies = rcu_dereference(bss->ies); - const u8 *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie; + const u8 *ies_data, *tim_elm, *ssid_elm, *rates_ie, *supp_rates_ie; const u8 *ht_ie, *wpa_ie, *wmm_ie, *rsn_ie; struct ieee80211_p2p_noa_attr noa_attr; + const struct cfg80211_bss_ies *ies; struct wilc_join_bss_param *param; - u8 rates_len = 0; + u8 rates_len = 0, ies_len; int ret; param = kzalloc(sizeof(*param), GFP_KERNEL); if (!param) return NULL; + rcu_read_lock(); + ies = rcu_dereference(bss->ies); + ies_data = kmemdup(ies->data, ies->len, GFP_ATOMIC); + if (!ies_data) { + rcu_read_unlock(); + kfree(param); + return NULL; + } + ies_len = ies->len; + rcu_read_unlock(); + param->beacon_period = cpu_to_le16(bss->beacon_interval); param->cap_info = cpu_to_le16(bss->capability); param->bss_type = WILC_FW_BSS_TYPE_INFRA; param->ch = ieee80211_frequency_to_channel(bss->channel->center_freq); ether_addr_copy(param->bssid, bss->bssid); - ssid_elm = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); + ssid_elm = cfg80211_find_ie(WLAN_EID_SSID, ies_data, ies_len); if (ssid_elm) { if (ssid_elm[1] <= IEEE80211_MAX_SSID_LEN) memcpy(param->ssid, ssid_elm + 2, ssid_elm[1]); } - tim_elm = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len); + tim_elm = cfg80211_find_ie(WLAN_EID_TIM, ies_data, ies_len); if (tim_elm && tim_elm[1] >= 2) param->dtim_period = tim_elm[3]; memset(param->p_suites, 0xFF, 3); memset(param->akm_suites, 0xFF, 3); - rates_ie = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies->data, ies->len); + rates_ie = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies_data, ies_len); if (rates_ie) { rates_len = rates_ie[1]; if (rates_len > WILC_MAX_RATES_SUPPORTED) @@ -416,7 +427,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, if (rates_len < WILC_MAX_RATES_SUPPORTED) { supp_rates_ie = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, - ies->data, ies->len); + ies_data, ies_len); if (supp_rates_ie) { u8 ext_rates = supp_rates_ie[1]; @@ -431,11 +442,11 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, } } - ht_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies->data, ies->len); + ht_ie = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies_data, ies_len); if (ht_ie) param->ht_capable = true; - ret = cfg80211_get_p2p_attr(ies->data, ies->len, + ret = cfg80211_get_p2p_attr(ies_data, ies_len, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, (u8 *)&noa_attr, sizeof(noa_attr)); if (ret > 0) { @@ -459,7 +470,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, } wmm_ie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT, WLAN_OUI_TYPE_MICROSOFT_WMM, - ies->data, ies->len); + ies_data, ies_len); if (wmm_ie) { struct ieee80211_wmm_param_ie *ie; @@ -474,13 +485,13 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, wpa_ie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT, WLAN_OUI_TYPE_MICROSOFT_WPA, - ies->data, ies->len); + ies_data, ies_len); if (wpa_ie) { param->mode_802_11i = 1; param->rsn_found = true; } - rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); + rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies_data, ies_len); if (rsn_ie) { int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; @@ -514,6 +525,7 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, param->akm_suites[i] = crypto->akm_suites[i] & 0xFF; } + kfree(ies_data); return (void *)param; } -- GitLab From 3518cea837de4d106efa84ddac18a07b6de1384e Mon Sep 17 00:00:00 2001 From: Martin Kaistra Date: Thu, 11 Jan 2024 17:36:27 +0100 Subject: [PATCH 0832/2290] wifi: rtl8xxxu: add cancel_work_sync() for c2hcmd_work [ Upstream commit 1213acb478a7181cd73eeaf00db430f1e45b1361 ] The workqueue might still be running, when the driver is stopped. To avoid a use-after-free, call cancel_work_sync() in rtl8xxxu_stop(). Fixes: e542e66b7c2e ("rtl8xxxu: add bluetooth co-existence support for single antenna") Signed-off-by: Martin Kaistra Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://msgid.link/20240111163628.320697-2-martin.kaistra@linutronix.de Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 6dd5ec1e4d8c3..ccac47dd781d6 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -6542,6 +6542,7 @@ static void rtl8xxxu_stop(struct ieee80211_hw *hw) if (priv->usb_interrupts) rtl8xxxu_write32(priv, REG_USB_HIMR, 0); + cancel_work_sync(&priv->c2hcmd_work); cancel_delayed_work_sync(&priv->ra_watchdog); rtl8xxxu_free_rx_resources(priv); -- GitLab From 515cc676dfbce40d93c92b1ff3c1070e917f4e52 Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Mon, 15 Jan 2024 15:56:32 +0100 Subject: [PATCH 0833/2290] wifi: wilc1000: do not realloc workqueue everytime an interface is added MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 328efda22af81130c2ad981c110518cb29ff2f1d ] Commit 09ed8bfc5215 ("wilc1000: Rename workqueue from "WILC_wq" to "NETDEV-wq"") moved workqueue creation in wilc_netdev_ifc_init in order to set the interface name in the workqueue name. However, while the driver needs only one workqueue, the wilc_netdev_ifc_init is called each time we add an interface over a phy, which in turns overwrite the workqueue with a new one. This can be observed with the following commands: for i in $(seq 0 10) do iw phy phy0 interface add wlan1 type managed iw dev wlan1 del done ps -eo pid,comm|grep wlan 39 kworker/R-wlan0 98 kworker/R-wlan1 102 kworker/R-wlan1 105 kworker/R-wlan1 108 kworker/R-wlan1 111 kworker/R-wlan1 114 kworker/R-wlan1 117 kworker/R-wlan1 120 kworker/R-wlan1 123 kworker/R-wlan1 126 kworker/R-wlan1 129 kworker/R-wlan1 Fix this leakage by putting back hif_workqueue allocation in wilc_cfg80211_init. Regarding the workqueue name, it is indeed relevant to set it lowercase, however it is not attached to a specific netdev, so enforcing netdev name in the name is not so relevant. Still, enrich the name with the wiphy name to make it clear which phy is using the workqueue. Fixes: 09ed8bfc5215 ("wilc1000: Rename workqueue from "WILC_wq" to "NETDEV-wq"") Signed-off-by: Ajay Singh Co-developed-by: Alexis Lothoré Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240115-wilc_1000_fixes-v1-3-54d29463a738@bootlin.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 11 ++++++++++- drivers/net/wireless/microchip/wilc1000/netdev.c | 10 +--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index b545d93c6e374..2f75dc4b47975 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1810,15 +1810,24 @@ int wilc_cfg80211_init(struct wilc **wilc, struct device *dev, int io_type, INIT_LIST_HEAD(&wl->rxq_head.list); INIT_LIST_HEAD(&wl->vif_list); + wl->hif_workqueue = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, + wiphy_name(wl->wiphy)); + if (!wl->hif_workqueue) { + ret = -ENOMEM; + goto free_cfg; + } vif = wilc_netdev_ifc_init(wl, "wlan%d", WILC_STATION_MODE, NL80211_IFTYPE_STATION, false); if (IS_ERR(vif)) { ret = PTR_ERR(vif); - goto free_cfg; + goto free_hq; } return 0; +free_hq: + destroy_workqueue(wl->hif_workqueue); + free_cfg: wilc_wlan_cfg_deinit(wl); diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index e9f59de31b0b9..0e6eeeed2e086 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -977,13 +977,6 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, goto error; } - wl->hif_workqueue = alloc_ordered_workqueue("%s-wq", WQ_MEM_RECLAIM, - ndev->name); - if (!wl->hif_workqueue) { - ret = -ENOMEM; - goto unregister_netdev; - } - ndev->needs_free_netdev = true; vif->iftype = vif_type; vif->idx = wilc_get_available_idx(wl); @@ -996,12 +989,11 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, return vif; -unregister_netdev: +error: if (rtnl_locked) cfg80211_unregister_netdevice(ndev); else unregister_netdev(ndev); - error: free_netdev(ndev); return ERR_PTR(ret); } -- GitLab From 5a26e6d2a7ec0c672d21b9c4a1c914c1b70d6d0f Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Mon, 15 Jan 2024 15:56:34 +0100 Subject: [PATCH 0834/2290] wifi: wilc1000: fix multi-vif management when deleting a vif MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 12cfc9c8d3faf887a202c89bc312202445fca7e8 ] Adding then removing a second vif currently makes the first vif not working anymore. This is visible for example when we have a first interface connected to some access point: - create a wpa_supplicant.conf with some AP credentials - wpa_supplicant -Dnl80211 -c /etc/wpa_supplicant.conf -i wlan0 - dhclient wlan0 - iw phy phy0 interface add wlan1 type managed - iw dev wlan1 del wlan0 does not manage properly traffic anymore (eg: ping not working) This is due to vif mode being incorrectly reconfigured with some default values in del_virtual_intf, affecting by default first vif. Prevent first vif from being affected on second vif removal by removing vif mode change command in del_virtual_intf Fixes: 9bc061e88054 ("staging: wilc1000: added support to dynamically add/remove interfaces") Signed-off-by: Ajay Singh Co-developed-by: Alexis Lothoré Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240115-wilc_1000_fixes-v1-5-54d29463a738@bootlin.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 2f75dc4b47975..6f3245a43aef1 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1615,7 +1615,6 @@ static int del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) cfg80211_unregister_netdevice(vif->ndev); vif->monitor_flag = 0; - wilc_set_operation_mode(vif, 0, 0, 0); mutex_lock(&wl->vif_mutex); list_del_rcu(&vif->list); wl->vif_num--; -- GitLab From 6798cf0aaa7c73cee7fd3d13fc7110617080a0a6 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Sun, 3 Sep 2023 11:02:15 +0800 Subject: [PATCH 0835/2290] wifi: mwifiex: debugfs: Drop unnecessary error check for debugfs_create_dir() [ Upstream commit 50180c7f8e3de7c2d87f619131776598fcb1478d ] debugfs_create_dir() returns ERR_PTR and never return NULL. As Russell suggested, this patch removes the error checking for debugfs_create_dir(). This is because the DebugFS kernel API is developed in a way that the caller can safely ignore the errors that occur during the creation of DebugFS nodes. The debugfs APIs have a IS_ERR() judge in start_creating() which can handle it gracefully. So these checks are unnecessary. Fixes: 5e6e3a92b9a4 ("wireless: mwifiex: initial commit for Marvell mwifiex driver") Signed-off-by: Jinjie Ruan Suggested-by: Russell King (Oracle) Signed-off-by: Kalle Valo Link: https://msgid.link/20230903030216.1509013-3-ruanjinjie@huawei.com Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/debugfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index 63f232c723374..55ca5b287fe7f 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -964,9 +964,6 @@ mwifiex_dev_debugfs_init(struct mwifiex_private *priv) priv->dfs_dev_dir = debugfs_create_dir(priv->netdev->name, mwifiex_dfs_dir); - if (!priv->dfs_dev_dir) - return; - MWIFIEX_DFS_ADD_FILE(info); MWIFIEX_DFS_ADD_FILE(debug); MWIFIEX_DFS_ADD_FILE(getlog); -- GitLab From a81edfc5ec8b0b07e790265e4a9aee2135a2c0bf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 15 Jan 2024 12:03:03 +0100 Subject: [PATCH 0836/2290] ARM: dts: renesas: r8a73a4: Fix external clocks and clock rate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 090c4094574705b0afc7d37825cdc5d06f0e7e02 ] External clocks should be defined as zero-Hz clocks in the SoC .dtsi, and overridden in the board .dts when present. Correct the clock rate of extal1 from 25 to 26 MHz, to match the crystal oscillator present on the APE6-EVM board. Fixes: a76809a329d6ebae ("ARM: shmobile: r8a73a4: Common clock framework DT description") Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Link: https://lore.kernel.org/r/1692bc8cd465d62168cbf110522ad62a7af3f606.1705315614.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm/boot/dts/r8a73a4-ape6evm.dts | 12 ++++++++++++ arch/arm/boot/dts/r8a73a4.dtsi | 9 ++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/r8a73a4-ape6evm.dts b/arch/arm/boot/dts/r8a73a4-ape6evm.dts index e81a7213d3047..4282bafbb5043 100644 --- a/arch/arm/boot/dts/r8a73a4-ape6evm.dts +++ b/arch/arm/boot/dts/r8a73a4-ape6evm.dts @@ -209,6 +209,18 @@ status = "okay"; }; +&extal1_clk { + clock-frequency = <26000000>; +}; + +&extal2_clk { + clock-frequency = <48000000>; +}; + +&extalr_clk { + clock-frequency = <32768>; +}; + &pfc { scifa0_pins: scifa0 { groups = "scifa0_data"; diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index c39066967053f..d1f4cbd099efb 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -450,17 +450,20 @@ extalr_clk: extalr { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <32768>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; extal1_clk: extal1 { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <25000000>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; extal2_clk: extal2 { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <48000000>; + /* This value must be overridden by the board. */ + clock-frequency = <0>; }; fsiack_clk: fsiack { compatible = "fixed-clock"; -- GitLab From e72160cb6e23b78b41999d6885a34ce8db536095 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Wed, 17 Jan 2024 10:12:20 +0300 Subject: [PATCH 0837/2290] cpufreq: brcmstb-avs-cpufreq: add check for cpufreq_cpu_get's return value [ Upstream commit f661017e6d326ee187db24194cabb013d81bc2a6 ] cpufreq_cpu_get may return NULL. To avoid NULL-dereference check it and return 0 in case of error. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: de322e085995 ("cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs") Signed-off-by: Anastasia Belova Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index f644c5e325fb2..38ec0fedb247f 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -481,6 +481,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + if (!policy) + return 0; struct private_data *priv = policy->driver_data; cpufreq_cpu_put(policy); -- GitLab From b98ed6417e5958b8bae37798a0e0663e5b4317d2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Jul 2023 11:44:13 -0600 Subject: [PATCH 0838/2290] cpufreq: Explicitly include correct DT includes [ Upstream commit a70eb93a2477371638ef481aaae7bb7b760d3004 ] The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it as merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Signed-off-by: Rob Herring Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar Stable-dep-of: 788715b5f21c ("cpufreq: mediatek-hw: Wait for CPU supplies before probing") Signed-off-by: Sasha Levin --- drivers/cpufreq/armada-37xx-cpufreq.c | 4 +--- drivers/cpufreq/mediatek-cpufreq-hw.c | 3 ++- drivers/cpufreq/ppc_cbe_cpufreq.c | 2 +- drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 1 - drivers/cpufreq/qcom-cpufreq-nvmem.c | 1 - drivers/cpufreq/scpi-cpufreq.c | 2 +- drivers/cpufreq/sti-cpufreq.c | 2 +- drivers/cpufreq/ti-cpufreq.c | 2 +- drivers/cpufreq/vexpress-spc-cpufreq.c | 1 - 9 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index b74289a95a171..bea41ccabf1f0 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -14,10 +14,8 @@ #include #include #include +#include #include -#include -#include -#include #include #include #include diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index f0e0a35c7f217..212bbca8daf32 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -10,8 +10,9 @@ #include #include #include -#include +#include #include +#include #include #define LUT_MAX_ENTRIES 32U diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index e3313ce63b388..88afc49941b71 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 4fba3637b115c..6f0c32592416d 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index a577586b23be2..cb03bfb0435ea 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index fd2c16821d54c..ac719aca49b75 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 1a63aeea87112..9c542e723a157 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index f64180dd2005b..61ef653bcf56f 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index d295f405c4bb0..865e501648034 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include -- GitLab From 60b5b89e41a68bccad14ea7cecc8fe494ce48830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 10 Jan 2024 11:23:02 -0300 Subject: [PATCH 0839/2290] cpufreq: mediatek-hw: Wait for CPU supplies before probing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 788715b5f21c6455264fe00a1779e61bec407fe2 ] Before proceeding with the probe and enabling frequency scaling for the CPUs, make sure that all supplies feeding the CPUs have probed. This fixes an issue observed on MT8195-Tomato where if the mediatek-cpufreq-hw driver enabled the hardware (by writing to REG_FREQ_ENABLE) before the SPMI controller driver (spmi-mtk-pmif), behind which lies the big CPU supply, probed the platform would hang shortly after with "rcu: INFO: rcu_preempt detected stalls on CPUs/tasks" being printed in the log. Fixes: 4855e26bcf4d ("cpufreq: mediatek-hw: Add support for CPUFREQ HW") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/mediatek-cpufreq-hw.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 212bbca8daf32..42240a7d826da 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #define LUT_MAX_ENTRIES 32U @@ -296,7 +297,23 @@ static struct cpufreq_driver cpufreq_mtk_hw_driver = { static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) { const void *data; - int ret; + int ret, cpu; + struct device *cpu_dev; + struct regulator *cpu_reg; + + /* Make sure that all CPU supplies are available before proceeding. */ + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, + "Failed to get cpu%d device\n", cpu); + + cpu_reg = devm_regulator_get_optional(cpu_dev, "cpu"); + if (IS_ERR(cpu_reg)) + return dev_err_probe(&pdev->dev, PTR_ERR(cpu_reg), + "CPU%d regulator get failed\n", cpu); + } + data = of_device_get_match_data(&pdev->dev); if (!data) -- GitLab From f7dbf2c1675ae53c8f9b1ff77741daa08060873b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:55 +0000 Subject: [PATCH 0840/2290] sock_diag: annotate data-races around sock_diag_handlers[family] [ Upstream commit efd402537673f9951992aea4ef0f5ff51d858f4b ] __sock_diag_cmd() and sock_diag_bind() read sock_diag_handlers[family] without a lock held. Use READ_ONCE()/WRITE_ONCE() annotations to avoid potential issues. Fixes: 8ef874bfc729 ("sock_diag: Move the sock_ code to net/core/") Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/sock_diag.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index f7cf74cdd3db1..e6ea6764d10ab 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -190,7 +190,7 @@ int sock_diag_register(const struct sock_diag_handler *hndl) if (sock_diag_handlers[hndl->family]) err = -EBUSY; else - sock_diag_handlers[hndl->family] = hndl; + WRITE_ONCE(sock_diag_handlers[hndl->family], hndl); mutex_unlock(&sock_diag_table_mutex); return err; @@ -206,7 +206,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) mutex_lock(&sock_diag_table_mutex); BUG_ON(sock_diag_handlers[family] != hnld); - sock_diag_handlers[family] = NULL; + WRITE_ONCE(sock_diag_handlers[family], NULL); mutex_unlock(&sock_diag_table_mutex); } EXPORT_SYMBOL_GPL(sock_diag_unregister); @@ -224,7 +224,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX); - if (sock_diag_handlers[req->sdiag_family] == NULL) + if (READ_ONCE(sock_diag_handlers[req->sdiag_family]) == NULL) sock_load_diag_module(req->sdiag_family, 0); mutex_lock(&sock_diag_table_mutex); @@ -283,12 +283,12 @@ static int sock_diag_bind(struct net *net, int group) switch (group) { case SKNLGRP_INET_TCP_DESTROY: case SKNLGRP_INET_UDP_DESTROY: - if (!sock_diag_handlers[AF_INET]) + if (!READ_ONCE(sock_diag_handlers[AF_INET])) sock_load_diag_module(AF_INET, 0); break; case SKNLGRP_INET6_TCP_DESTROY: case SKNLGRP_INET6_UDP_DESTROY: - if (!sock_diag_handlers[AF_INET6]) + if (!READ_ONCE(sock_diag_handlers[AF_INET6])) sock_load_diag_module(AF_INET6, 0); break; } -- GitLab From d10dbf722d32bbf3e01ec754604b396142ad38a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:56 +0000 Subject: [PATCH 0841/2290] inet_diag: annotate data-races around inet_diag_table[] [ Upstream commit e50e10ae5d81ddb41547114bfdc5edc04422f082 ] inet_diag_lock_handler() reads inet_diag_table[proto] locklessly. Use READ_ONCE()/WRITE_ONCE() annotations to avoid potential issues. Fixes: d523a328fb02 ("[INET]: Fix inet_diag dead-lock regression") Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/inet_diag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f7426926a1041..8f690a6e61baa 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -57,7 +57,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto) return ERR_PTR(-ENOENT); } - if (!inet_diag_table[proto]) + if (!READ_ONCE(inet_diag_table[proto])) sock_load_diag_module(AF_INET, proto); mutex_lock(&inet_diag_table_mutex); @@ -1419,7 +1419,7 @@ int inet_diag_register(const struct inet_diag_handler *h) mutex_lock(&inet_diag_table_mutex); err = -EEXIST; if (!inet_diag_table[type]) { - inet_diag_table[type] = h; + WRITE_ONCE(inet_diag_table[type], h); err = 0; } mutex_unlock(&inet_diag_table_mutex); @@ -1436,7 +1436,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) return; mutex_lock(&inet_diag_table_mutex); - inet_diag_table[type] = NULL; + WRITE_ONCE(inet_diag_table[type], NULL); mutex_unlock(&inet_diag_table_mutex); } EXPORT_SYMBOL_GPL(inet_diag_unregister); -- GitLab From c1760abb10021dc140e064d74e3f49a48b9e5e40 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 16 Jan 2024 14:19:20 +0800 Subject: [PATCH 0842/2290] bpftool: Silence build warning about calloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f5f30386c78105cba520e443a6a9ee945ec1d066 ] There exists the following warning when building bpftool: CC prog.o prog.c: In function ‘profile_open_perf_events’: prog.c:2301:24: warning: ‘calloc’ sizes specified with ‘sizeof’ in the earlier argument and not in the later argument [-Wcalloc-transposed-args] 2301 | sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); | ^~~ prog.c:2301:24: note: earlier argument should specify number of elements, later size of each element Tested with the latest upstream GCC which contains a new warning option -Wcalloc-transposed-args. The first argument to calloc is documented to be number of elements in array, while the second argument is size of each element, just switch the first and second arguments of calloc() to silence the build warning, compile tested only. Fixes: 47c09d6a9f67 ("bpftool: Introduce "prog profile" command") Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240116061920.31172-1-yangtiezhu@loongson.cn Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/bpf/bpftool/prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 41c02b6f6f043..7e0b846e17eef 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2200,7 +2200,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj) int map_fd; profile_perf_events = calloc( - sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + obj->rodata->num_cpu * obj->rodata->num_metric, sizeof(int)); if (!profile_perf_events) { p_err("failed to allocate memory for perf_event array: %s", strerror(errno)); -- GitLab From bb21851465b3f1b9323217c4517f7aa37aaa4fd1 Mon Sep 17 00:00:00 2001 From: Andrey Grafin Date: Wed, 17 Jan 2024 16:06:18 +0300 Subject: [PATCH 0843/2290] libbpf: Apply map_set_def_max_entries() for inner_maps on creation [ Upstream commit f04deb90e516e8e48bf8693397529bc942a9e80b ] This patch allows to auto create BPF_MAP_TYPE_ARRAY_OF_MAPS and BPF_MAP_TYPE_HASH_OF_MAPS with values of BPF_MAP_TYPE_PERF_EVENT_ARRAY by bpf_object__load(). Previous behaviour created a zero filled btf_map_def for inner maps and tried to use it for a map creation but the linux kernel forbids to create a BPF_MAP_TYPE_PERF_EVENT_ARRAY map with max_entries=0. Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Signed-off-by: Andrey Grafin Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240117130619.9403-1-conquistador@yandex-team.ru Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2014b1250ea2..c71d4d0f5c6f3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -70,6 +70,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static int map_set_def_max_entries(struct bpf_map *map); static const char * const attach_type_name[] = { [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", @@ -4992,6 +4993,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { + err = map_set_def_max_entries(map->inner_map); + if (err) + return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", -- GitLab From 6ae1ec0043d5ab85995770c52fb81b21f2be4712 Mon Sep 17 00:00:00 2001 From: Andrey Grafin Date: Wed, 17 Jan 2024 16:06:19 +0300 Subject: [PATCH 0844/2290] selftest/bpf: Add map_in_maps with BPF_MAP_TYPE_PERF_EVENT_ARRAY values [ Upstream commit 40628f9fff73adecac77a9aa390f8016724cad99 ] Check that bpf_object__load() successfully creates map_in_maps with BPF_MAP_TYPE_PERF_EVENT_ARRAY values. These changes cover fix in the previous patch "libbpf: Apply map_set_def_max_entries() for inner_maps on creation". A command line output is: - w/o fix $ sudo ./test_maps libbpf: map 'mim_array_pe': failed to create inner map: -22 libbpf: map 'mim_array_pe': failed to create: Invalid argument(-22) libbpf: failed to load object './test_map_in_map.bpf.o' Failed to load test prog - with fix $ sudo ./test_maps ... test_maps: OK, 0 SKIPPED Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Signed-off-by: Andrey Grafin Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240117130619.9403-2-conquistador@yandex-team.ru Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- .../selftests/bpf/progs/test_map_in_map.c | 26 +++++++++++++++++++ tools/testing/selftests/bpf/test_maps.c | 6 ++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index f416032ba858b..b295f9b721bf8 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -21,6 +21,32 @@ struct { __type(value, __u32); } mim_hash SEC(".maps"); +/* The following three maps are used to test + * perf_event_array map can be an inner + * map of hash/array_of_maps. + */ +struct perf_event_array { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, __u32); + __type(value, __u32); +} inner_map0 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_array_pe SEC(".maps") = { + .values = {&inner_map0}}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_hash_pe SEC(".maps") = { + .values = {&inner_map0}}; + SEC("xdp") int xdp_mimtest0(struct xdp_md *ctx) { diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index b73152822aa28..81cd48cc80c23 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1190,7 +1190,11 @@ static void test_map_in_map(void) goto out_map_in_map; } - bpf_object__load(obj); + err = bpf_object__load(obj); + if (err) { + printf("Failed to load test prog\n"); + goto out_map_in_map; + } map = bpf_object__find_map_by_name(obj, "mim_array"); if (!map) { -- GitLab From 7ab56f24bf01ae3ffaac88fe1f41744f6a25a0b8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:52 -0800 Subject: [PATCH 0845/2290] af_unix: Annotate data-race of gc_in_progress in wait_for_unix_gc(). [ Upstream commit 31e03207119a535d0b0e3b3a7f91983aeb2cb14d ] gc_in_progress is changed under spin_lock(&unix_gc_lock), but wait_for_unix_gc() reads it locklessly. Let's use READ_ONCE(). Fixes: 5f23b734963e ("net: Fix soft lockups/OOM issues w/ unix garbage collector") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240123170856.41348-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/garbage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index ab2c83d58b62a..9bfffe2a7f020 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -198,7 +198,7 @@ void wait_for_unix_gc(void) if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && !READ_ONCE(gc_in_progress)) unix_gc(); - wait_event(unix_gc_wait, gc_in_progress == false); + wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress)); } /* The external entry point: unix_gc() */ -- GitLab From 1b3bae282ae892df62e6b2ff7fceed43f4b16ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 24 Jan 2024 17:31:43 -0300 Subject: [PATCH 0846/2290] cpufreq: mediatek-hw: Don't error out if supply is not found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eaffb10b51bf74415c9252fd8fb4dd77122501ee ] devm_regulator_get_optional() returns -ENODEV if no supply can be found. By introducing its usage, commit 788715b5f21c ("cpufreq: mediatek-hw: Wait for CPU supplies before probing") caused the driver to fail probe if no supply was present in any of the CPU DT nodes. Use devm_regulator_get() instead since the CPUs do require supplies even if not described in the DT. It will gracefully return a dummy regulator if none is found in the DT node, allowing probe to succeed. Fixes: 788715b5f21c ("cpufreq: mediatek-hw: Wait for CPU supplies before probing") Reported-by: kernelci.org bot Closes: https://linux.kernelci.org/test/case/id/65b0b169710edea22852a3fa/ Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 42240a7d826da..7f326bb5fd8de 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -308,7 +308,7 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, -EPROBE_DEFER, "Failed to get cpu%d device\n", cpu); - cpu_reg = devm_regulator_get_optional(cpu_dev, "cpu"); + cpu_reg = devm_regulator_get(cpu_dev, "cpu"); if (IS_ERR(cpu_reg)) return dev_err_probe(&pdev->dev, PTR_ERR(cpu_reg), "CPU%d regulator get failed\n", cpu); -- GitLab From 3248f4ae054c787a72905ed42ef736bad0d5140e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 26 Jan 2024 14:09:44 -0800 Subject: [PATCH 0847/2290] libbpf: Fix faccessat() usage on Android [ Upstream commit ad57654053805bf9a62602aaec74cc78edb6f235 ] Android implementation of libc errors out with -EINVAL in faccessat() if passed AT_EACCESS ([0]), this leads to ridiculous issue with libbpf refusing to load /sys/kernel/btf/vmlinux on Androids ([1]). Fix by detecting Android and redefining AT_EACCESS to 0, it's equivalent on Android. [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 Fixes: 6a4ab8869d0b ("libbpf: Fix the case of running as non-root with capabilities") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240126220944.2497665-1-andrii@kernel.org Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf_internal.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 377642ff51fce..8669f6e0f6e2f 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -17,6 +17,20 @@ #include #include "relo_core.h" +/* Android's libc doesn't support AT_EACCESS in faccessat() implementation + * ([0]), and just returns -EINVAL even if file exists and is accessible. + * See [1] for issues caused by this. + * + * So just redefine it to 0 on Android. + * + * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 + * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 + */ +#ifdef __ANDROID__ +#undef AT_EACCESS +#define AT_EACCESS 0 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -- GitLab From 4c820998a5a481e8cc99da48068c94d361391612 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 25 Jan 2024 13:05:10 -0800 Subject: [PATCH 0848/2290] pmdomain: qcom: rpmhpd: Drop SA8540P gfx.lvl [ Upstream commit 883957bee580b723fd87d49ac73e0c84fc03a446 ] On SA8295P and SA8540P gfx.lvl is not provdied by rpmh, but rather is handled by an external regulator (max20411). Drop gfx.lvl from the list of power-domains exposed on this platform. Fixes: f68f1cb3437d ("soc: qcom: rpmhpd: add sc8280xp & sa8540p rpmh power-domains") Reviewed-by: Dmitry Baryshkov Acked-by: Ulf Hansson Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240125-sa8295p-gpu-v4-4-7011c2a63037@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/soc/qcom/rpmhpd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index 9a90f241bb97f..6efe36aeb48e9 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -195,7 +195,6 @@ static struct rpmhpd *sa8540p_rpmhpds[] = { [SC8280XP_CX] = &cx, [SC8280XP_CX_AO] = &cx_ao, [SC8280XP_EBI] = &ebi, - [SC8280XP_GFX] = &gfx, [SC8280XP_LCX] = &lcx, [SC8280XP_LMX] = &lmx, [SC8280XP_MMCX] = &mmcx, -- GitLab From 8f38b401b4e05e768484b9bde356f46d4bb00247 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:48:58 +0100 Subject: [PATCH 0849/2290] arm64: dts: imx8mm-kontron: Disable pullups for I2C signals on OSM-S i.MX8MM [ Upstream commit 96293af54f6aa859015d8ca40a1437d3115ad50c ] There are external pullup resistors on the board and due to silicon errata ERR050080 let's disable the internal ones to prevent any unwanted behavior in case they wear out. Fixes: de9618e84f76 ("arm64: dts: Add support for Kontron SL/BL i.MX8MM OSM-S") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts | 4 ++-- arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts index 8b16bd68576c0..0730c22e5b6b9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts @@ -294,8 +294,8 @@ pinctrl_i2c4: i2c4grp { fsl,pins = < - MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x400001c3 - MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x400001c3 + MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x40000083 + MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x40000083 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi index 8d10f5b412978..9643d6ed9a7c7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi @@ -247,8 +247,8 @@ pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x400001c3 - MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3 + MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x40000083 + MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x40000083 >; }; -- GitLab From 333a02b3be6a3bc42a349c3bea0b6c835226286f Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:48:59 +0100 Subject: [PATCH 0850/2290] arm64: dts: imx8mm-kontron: Disable pullups for I2C signals on SL/BL i.MX8MM [ Upstream commit f19e5bb91d53264d7dac5d845a4825afadf72440 ] There are external pullup resistors on the board and due to silicon errata ERR050080 let's disable the internal ones to prevent any unwanted behavior in case they wear out. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts | 4 ++-- arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts index a079322a37931..8d0527bb6fa59 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts @@ -277,8 +277,8 @@ pinctrl_i2c4: i2c4grp { fsl,pins = < - MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x400001c3 - MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x400001c3 + MX8MM_IOMUXC_I2C4_SCL_I2C4_SCL 0x40000083 + MX8MM_IOMUXC_I2C4_SDA_I2C4_SDA 0x40000083 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi index 0679728d24899..884ae2ad35114 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-sl.dtsi @@ -237,8 +237,8 @@ pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x400001c3 - MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3 + MX8MM_IOMUXC_I2C1_SCL_I2C1_SCL 0x40000083 + MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x40000083 >; }; -- GitLab From dde02bf5fc9bb9ac7b112527d49f98aa34f97918 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:00 +0100 Subject: [PATCH 0851/2290] arm64: dts: imx8mm-kontron: Disable pullups for onboard UART signals on BL OSM-S board [ Upstream commit c6d9b5672a0e2c4b1079a50d2fc8780c40cfd3eb ] These signals are actively driven by the SoC or by the onboard transceiver. There's no need to enable the internal pull resistors and due to silicon errata ERR050080 let's disable the internal ones to prevent any unwanted behavior in case they wear out. Fixes: de9618e84f76 ("arm64: dts: Add support for Kontron SL/BL i.MX8MM OSM-S") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../dts/freescale/imx8mm-kontron-bl-osm-s.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts index 0730c22e5b6b9..1dd03ef0a7835 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts @@ -313,19 +313,19 @@ pinctrl_uart1: uart1grp { fsl,pins = < - MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x140 - MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x140 - MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x0 + MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x0 + MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x0 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x140 - MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x140 - MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x0 + MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x0 + MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x0 >; }; -- GitLab From ed2186ea406fdb2263635ce37b210025aa9d6d77 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:01 +0100 Subject: [PATCH 0852/2290] arm64: dts: imx8mm-kontron: Disable pullups for onboard UART signals on BL board [ Upstream commit 162aadaa0df8217b0cc49d919dd00022fef65e78 ] These signals are actively driven by the SoC or by the onboard transceiver. There's no need to enable the internal pull resistors and due to silicon errata ERR050080 let's disable the internal ones to prevent any unwanted behavior in case they wear out. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../boot/dts/freescale/imx8mm-kontron-bl.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts index 8d0527bb6fa59..bffa0ea4aa46a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts @@ -290,19 +290,19 @@ pinctrl_uart1: uart1grp { fsl,pins = < - MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x140 - MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x140 - MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x0 + MX8MM_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x0 + MX8MM_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x0 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x140 - MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x140 - MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x140 - MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x140 + MX8MM_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x0 + MX8MM_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x0 + MX8MM_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x0 + MX8MM_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x0 >; }; -- GitLab From a4116bd6ee5e1c1b65a61ed9221657615a2f45bf Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:02 +0100 Subject: [PATCH 0853/2290] arm64: dts: imx8mm-kontron: Disable pull resistors for SD card signals on BL OSM-S board [ Upstream commit 5a940ba3e4d7c8710c9073ff5d0ca4644d4da9db ] Some signals have external pullup resistors on the board and don't need the internal ones to be enabled. Due to silicon errata ERR050080 let's disable the internal pull resistors whererever possible and prevent any unwanted behavior in case they wear out. Fixes: de9618e84f76 ("arm64: dts: Add support for Kontron SL/BL i.MX8MM OSM-S") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../dts/freescale/imx8mm-kontron-bl-osm-s.dts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts index 1dd03ef0a7835..d9fa0deea7002 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl-osm-s.dts @@ -337,40 +337,40 @@ pinctrl_usdhc2: usdhc2grp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x190 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x194 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x196 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; }; -- GitLab From 795fb93bde7aa1f769e377ec28853eab0dec8181 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:03 +0100 Subject: [PATCH 0854/2290] arm64: dts: imx8mm-kontron: Disable pull resistors for SD card signals on BL board [ Upstream commit 008820524844326ffb3123cebceba1960c0ad0dc ] Some signals have external pullup resistors on the board and don't need the internal ones to be enabled. Due to silicon errata ERR050080 let's disable the internal pull resistors whererever possible and prevent any unwanted behavior in case they wear out. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../boot/dts/freescale/imx8mm-kontron-bl.dts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts index bffa0ea4aa46a..d54cddd65b526 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-bl.dts @@ -314,40 +314,40 @@ pinctrl_usdhc2: usdhc2grp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x190 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d0 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d0 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x194 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d4 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d4 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = < - MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x196 + MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96 MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x1d6 MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x1d6 MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6 - MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 - MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 + MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x19 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0xd0 >; }; }; -- GitLab From 4f4f1e2deef2e6aaf98e10847a08b7fc25796834 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 8 Jan 2024 09:49:04 +0100 Subject: [PATCH 0855/2290] arm64: dts: imx8mm-kontron: Fix interrupt for RTC on OSM-S i.MX8MM module [ Upstream commit 8d0f39b7d04d864e89b84063b124fd10aa4b8809 ] The level of the interrupt signal is active low instead. Fix this. Fixes: de9618e84f76 ("arm64: dts: Add support for Kontron SL/BL i.MX8MM OSM-S") Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi index 9643d6ed9a7c7..d5199ecb3f6c1 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-osm-s.dtsi @@ -205,7 +205,7 @@ reg = <0x52>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_rtc>; - interrupts-extended = <&gpio4 1 IRQ_TYPE_LEVEL_HIGH>; + interrupts-extended = <&gpio4 1 IRQ_TYPE_LEVEL_LOW>; trickle-diode-disable; }; }; -- GitLab From fed6a1df672eb0c281b17ba72ef78edcb724ce3a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Feb 2024 09:20:24 -0800 Subject: [PATCH 0856/2290] libbpf: Add missing LIBBPF_API annotation to libbpf_set_memlock_rlim API [ Upstream commit 93ee1eb85e28d1e35bb059c1f5965d65d5fc83c2 ] LIBBPF_API annotation seems missing on libbpf_set_memlock_rlim API, so add it to make this API callable from libbpf's shared library version. Fixes: e542f2c4cd16 ("libbpf: Auto-bump RLIMIT_MEMLOCK if kernel needs it for BPF") Fixes: ab9a5a05dc48 ("libbpf: fix up few libbpf.map problems") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240201172027.604869-3-andrii@kernel.org Signed-off-by: Sasha Levin --- tools/lib/bpf/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index fddc05c667b5d..874fe362375de 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -35,7 +35,7 @@ extern "C" { #endif -int libbpf_set_memlock_rlim(size_t memlock_bytes); +LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ -- GitLab From 74d0639261dd795dce958d1b14815bdcbb48a715 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 26 Jan 2024 15:02:17 +0100 Subject: [PATCH 0857/2290] wifi: ath9k: delay all of ath9k_wmi_event_tasklet() until init is complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 24355fcb0d4cbcb6ddda262596558e8cfba70f11 ] The ath9k_wmi_event_tasklet() used in ath9k_htc assumes that all the data structures have been fully initialised by the time it runs. However, because of the order in which things are initialised, this is not guaranteed to be the case, because the device is exposed to the USB subsystem before the ath9k driver initialisation is completed. We already committed a partial fix for this in commit: 8b3046abc99e ("ath9k_htc: fix NULL pointer dereference at ath9k_htc_tx_get_packet()") However, that commit only aborted the WMI_TXSTATUS_EVENTID command in the event tasklet, pairing it with an "initialisation complete" bit in the TX struct. It seems syzbot managed to trigger the race for one of the other commands as well, so let's just move the existing synchronisation bit to cover the whole tasklet (setting it at the end of ath9k_htc_probe_device() instead of inside ath9k_tx_init()). Link: https://lore.kernel.org/r/ed1d2c66-1193-4c81-9542-d514c29ba8b8.bugreport@ubisectech.com Fixes: 8b3046abc99e ("ath9k_htc: fix NULL pointer dereference at ath9k_htc_tx_get_packet()") Reported-by: Ubisectech Sirius Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://msgid.link/20240126140218.1033443-1-toke@toke.dk Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc.h | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 4 ++++ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 4 ---- drivers/net/wireless/ath/ath9k/wmi.c | 10 ++++++---- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 237f4ec2cffd7..6c33e898b3000 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -306,7 +306,6 @@ struct ath9k_htc_tx { DECLARE_BITMAP(tx_slot, MAX_TX_BUF_NUM); struct timer_list cleanup_timer; spinlock_t tx_lock; - bool initialized; }; struct ath9k_htc_tx_ctl { @@ -515,6 +514,7 @@ struct ath9k_htc_priv { unsigned long ps_usecount; bool ps_enabled; bool ps_idle; + bool initialized; #ifdef CONFIG_MAC80211_LEDS enum led_brightness brightness; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index 96a3185a96d75..b014185373f34 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -966,6 +966,10 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev, htc_handle->drv_priv = priv; + /* Allow ath9k_wmi_event_tasklet() to operate. */ + smp_wmb(); + priv->initialized = true; + return 0; err_init: diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index d6a3f001dacb9..2fdd27885f543 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -815,10 +815,6 @@ int ath9k_tx_init(struct ath9k_htc_priv *priv) skb_queue_head_init(&priv->tx.data_vo_queue); skb_queue_head_init(&priv->tx.tx_failed); - /* Allow ath9k_wmi_event_tasklet(WMI_TXSTATUS_EVENTID) to operate. */ - smp_wmb(); - priv->tx.initialized = true; - return 0; } diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c index 1476b42b52a91..805ad31edba2b 100644 --- a/drivers/net/wireless/ath/ath9k/wmi.c +++ b/drivers/net/wireless/ath/ath9k/wmi.c @@ -155,6 +155,12 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t) } spin_unlock_irqrestore(&wmi->wmi_lock, flags); + /* Check if ath9k_htc_probe_device() completed. */ + if (!data_race(priv->initialized)) { + kfree_skb(skb); + continue; + } + hdr = (struct wmi_cmd_hdr *) skb->data; cmd_id = be16_to_cpu(hdr->command_id); wmi_event = skb_pull(skb, sizeof(struct wmi_cmd_hdr)); @@ -169,10 +175,6 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t) &wmi->drv_priv->fatal_work); break; case WMI_TXSTATUS_EVENTID: - /* Check if ath9k_tx_init() completed. */ - if (!data_race(priv->tx.initialized)) - break; - spin_lock_bh(&priv->tx.tx_lock); if (priv->tx.flags & ATH9K_HTC_OP_TX_DRAIN) { spin_unlock_bh(&priv->tx.tx_lock); -- GitLab From 9cd961d993b3a52215b74b6c75059eff50078685 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Jan 2024 08:53:48 +0200 Subject: [PATCH 0858/2290] wifi: iwlwifi: mvm: report beacon protection failures [ Upstream commit 91380f768d7f6e3d003755defa792e9a00a1444a ] Andrei reports that we just silently drop beacons after we report the key counters, but never report to userspace, so wpa_supplicant cannot send the WNM action frame. Fix that. Fixes: b1fdc2505abc ("iwlwifi: mvm: advertise BIGTK client support if available") Reported-by: Andrei Otcheretianski Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240128084842.7d855442cdce.Iba90b26f893dc8c49bfb8be65373cd0a138af12c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index f268a31ce26d9..105f283b777d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -299,6 +299,7 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, u32 status, struct ieee80211_rx_status *stats) { + struct wireless_dev *wdev; struct iwl_mvm_sta *mvmsta; struct iwl_mvm_vif *mvmvif; u8 keyid; @@ -320,9 +321,15 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, if (!ieee80211_is_beacon(hdr->frame_control)) return 0; + if (!sta) + return -1; + + mvmsta = iwl_mvm_sta_from_mac80211(sta); + mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif); + /* key mismatch - will also report !MIC_OK but we shouldn't count it */ if (!(status & IWL_RX_MPDU_STATUS_KEY_VALID)) - return -1; + goto report; /* good cases */ if (likely(status & IWL_RX_MPDU_STATUS_MIC_OK && @@ -331,13 +338,6 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, return 0; } - if (!sta) - return -1; - - mvmsta = iwl_mvm_sta_from_mac80211(sta); - - mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif); - /* * both keys will have the same cipher and MIC length, use * whichever one is available @@ -346,11 +346,11 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, if (!key) { key = rcu_dereference(mvmvif->bcn_prot.keys[1]); if (!key) - return -1; + goto report; } if (len < key->icv_len + IEEE80211_GMAC_PN_LEN + 2) - return -1; + goto report; /* get the real key ID */ keyid = frame[len - key->icv_len - IEEE80211_GMAC_PN_LEN - 2]; @@ -364,7 +364,7 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, return -1; key = rcu_dereference(mvmvif->bcn_prot.keys[keyid - 6]); if (!key) - return -1; + goto report; } /* Report status to mac80211 */ @@ -372,6 +372,10 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta, ieee80211_key_mic_failure(key); else if (status & IWL_RX_MPDU_STATUS_REPLAY_ERROR) ieee80211_key_replay(key); +report: + wdev = ieee80211_vif_to_wdev(mvmsta->vif); + if (wdev->netdev) + cfg80211_rx_unprot_mlme_mgmt(wdev->netdev, (void *)hdr, len); return -1; } -- GitLab From c855a1a5b7e3de57e6b1b29563113d5e3bfdb89a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Jan 2024 08:53:53 +0200 Subject: [PATCH 0859/2290] wifi: iwlwifi: dbg-tlv: ensure NUL termination [ Upstream commit ea1d166fae14e05d49ffb0ea9fcd4658f8d3dcea ] The iwl_fw_ini_debug_info_tlv is used as a string, so we must ensure the string is terminated correctly before using it. Fixes: a9248de42464 ("iwlwifi: dbg_ini: add TLV allocation new API support") Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240128084842.be15e858ee89.Ibff93429cf999eafc7b26f3eef4c055dc84984a0@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 5979d904bbbd2..677c9e0b46f10 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -103,6 +103,12 @@ static int iwl_dbg_tlv_alloc_debug_info(struct iwl_trans *trans, if (le32_to_cpu(tlv->length) != sizeof(*debug_info)) return -EINVAL; + /* we use this as a string, ensure input was NUL terminated */ + if (strnlen(debug_info->debug_cfg_name, + sizeof(debug_info->debug_cfg_name)) == + sizeof(debug_info->debug_cfg_name)) + return -EINVAL; + IWL_DEBUG_FW(trans, "WRT: Loading debug cfg: %s\n", debug_info->debug_cfg_name); -- GitLab From 5666fe7b8518460572f65769cc3c7ef6262e4ca2 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 29 Jan 2024 21:21:49 +0200 Subject: [PATCH 0860/2290] wifi: iwlwifi: fix EWRD table validity check [ Upstream commit c8d8f3911135921ace8e939ea0956b55f74bf8a0 ] EWRD ACPI table contains up to 3 additional sar profiles. According to the BIOS spec, the table contains a n_profile variable indicating how many additional profiles exist in the table. Currently we check that n_profiles is not <= 0. But according to the BIOS spec, 0 is a valid value, and it can't be < 0 anyway because we receive that from ACPI as an unsigned integer. Fixes: 39c1a9728f93 ("iwlwifi: refactor the SAR tables from mvm to acpi") Signed-off-by: Miri Korenblit Reviewed-by: Gregory Greenman Link: https://msgid.link/20240129211905.448ea2f40814.Iffd2aadf8e8693e6cb599bee0406a800a0c1e081@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index f5fcc547de391..235963e1d7a9a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -725,7 +725,7 @@ read_table: * from index 1, so the maximum value allowed here is * ACPI_SAR_PROFILES_NUM - 1. */ - if (n_profiles <= 0 || n_profiles >= ACPI_SAR_PROFILE_NUM) { + if (n_profiles >= ACPI_SAR_PROFILE_NUM) { ret = -EINVAL; goto out_free; } -- GitLab From d80bac49aebf5b7011188290e94ba16343c6078b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Wed, 24 Jan 2024 21:58:57 +0100 Subject: [PATCH 0861/2290] gpio: vf610: allow disabling the vf610 driver [ Upstream commit f57595788244a838deec2d3be375291327cbc035 ] The vf610 gpio driver is enabled by default for all i.MX machines, without any option to disable it in a board-specific config file. Most i.MX chipsets have no hardware for this driver. Change the default to enable GPIO_VF610 for SOC_VF610 and disable it otherwise. Add a text description after the bool type, this makes the driver selectable by make config etc. Fixes: 30a35c07d9e9 ("gpio: vf610: drop the SOC_VF610 dependency for GPIO_VF610") Signed-off-by: Martin Kaiser Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3e8e5f4ffa59f..700f71c954956 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -679,7 +679,8 @@ config GPIO_UNIPHIER Say yes here to support UniPhier GPIOs. config GPIO_VF610 - def_bool y + bool "VF610 GPIO support" + default y if SOC_VF610 depends on ARCH_MXC select GPIOLIB_IRQCHIP help -- GitLab From a5fd802a1f5554037eec49a48a2eebc810d4a39b Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Wed, 20 Dec 2023 15:30:46 -0800 Subject: [PATCH 0862/2290] arm64: dts: imx8mm-venice-gw71xx: fix USB OTG VBUS [ Upstream commit ec2cb52fcfef5d58574f2cfbc9a99ffc20ae5a9d ] The GW71xx does not have a gpio controlled vbus regulator but it does require some pinctrl. Remove the regulator and move the valid pinctrl into the usbotg1 node. Fixes: bd306fdb4e60 ("arm64: dts: imx8mm-venice-gw71xx: fix USB OTG VBUS") Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../dts/freescale/imx8mm-venice-gw71xx.dtsi | 29 ++++++------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi index c557dbf4dcd60..2e90466db89a0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi @@ -47,17 +47,6 @@ gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; status = "okay"; }; - - reg_usb_otg1_vbus: regulator-usb-otg1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_reg_usb1_en>; - compatible = "regulator-fixed"; - regulator-name = "usb_otg1_vbus"; - gpio = <&gpio1 10 GPIO_ACTIVE_HIGH>; - enable-active-high; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - }; }; /* off-board header */ @@ -146,9 +135,10 @@ }; &usbotg1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; dr_mode = "otg"; over-current-active-low; - vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; @@ -206,14 +196,6 @@ >; }; - pinctrl_reg_usb1_en: regusb1grp { - fsl,pins = < - MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x41 - MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x141 - MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x41 - >; - }; - pinctrl_spi2: spi2grp { fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 @@ -236,4 +218,11 @@ MX8MM_IOMUXC_UART3_TXD_UART3_DCE_TX 0x140 >; }; + + pinctrl_usbotg1: usbotg1grp { + fsl,pins = < + MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x141 + MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x41 + >; + }; }; -- GitLab From 992cbc89b0ffe20bc906b6ebb0dfda6353476b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 3 Mar 2023 19:54:16 +0100 Subject: [PATCH 0863/2290] pwm: atmel-hlcdc: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5fce94170ad8a67b839f3dd8e8e8a87039ba0251 ] The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: Claudiu Beznea Signed-off-by: Thierry Reding Stable-dep-of: e25ac87d3f83 ("pwm: atmel-hlcdc: Fix clock imbalance related to suspend support") Signed-off-by: Sasha Levin --- drivers/pwm/pwm-atmel-hlcdc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index a43b2babc8093..96a709a9d49a8 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -278,15 +278,13 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) return 0; } -static int atmel_hlcdc_pwm_remove(struct platform_device *pdev) +static void atmel_hlcdc_pwm_remove(struct platform_device *pdev) { struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev); pwmchip_remove(&chip->chip); clk_disable_unprepare(chip->hlcdc->periph_clk); - - return 0; } static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = { @@ -301,7 +299,7 @@ static struct platform_driver atmel_hlcdc_pwm_driver = { .pm = &atmel_hlcdc_pwm_pm_ops, }, .probe = atmel_hlcdc_pwm_probe, - .remove = atmel_hlcdc_pwm_remove, + .remove_new = atmel_hlcdc_pwm_remove, }; module_platform_driver(atmel_hlcdc_pwm_driver); -- GitLab From 78b8952e1df233ae68e1dcc838272e65e3705232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 14 Jul 2023 22:56:15 +0200 Subject: [PATCH 0864/2290] pwm: atmel-hlcdc: Use consistent variable naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aecab554b6ffa9a94ba796031eb39ea20eb60fb3 ] In PWM drivers the variable name "chip" is usually only used for struct pwm_chip pointers. This driver however used "chip" for its driver data and pwm_chip pointers are named "chip", too, when there is no driver data around and "c" otherwise. Instead use "atmel" for driver data and always "chip" for pwm_chips. Signed-off-by: Uwe Kleine-König Reviewed-by: Claudiu Beznea [thierry.reding@gmail.com: replace ddata by atmel] Signed-off-by: Thierry Reding Stable-dep-of: e25ac87d3f83 ("pwm: atmel-hlcdc: Fix clock imbalance related to suspend support") Signed-off-by: Sasha Levin --- drivers/pwm/pwm-atmel-hlcdc.c | 65 ++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index 96a709a9d49a8..4d0b859d0ac13 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -38,11 +38,11 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip) return container_of(chip, struct atmel_hlcdc_pwm, chip); } -static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, +static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { - struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); - struct atmel_hlcdc *hlcdc = chip->hlcdc; + struct atmel_hlcdc_pwm *atmel = to_atmel_hlcdc_pwm(chip); + struct atmel_hlcdc *hlcdc = atmel->hlcdc; unsigned int status; int ret; @@ -54,7 +54,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, u32 pwmcfg; int pres; - if (!chip->errata || !chip->errata->slow_clk_erratum) { + if (!atmel->errata || !atmel->errata->slow_clk_erratum) { clk_freq = clk_get_rate(new_clk); if (!clk_freq) return -EINVAL; @@ -64,7 +64,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, } /* Errata: cannot use slow clk on some IP revisions */ - if ((chip->errata && chip->errata->slow_clk_erratum) || + if ((atmel->errata && atmel->errata->slow_clk_erratum) || clk_period_ns > state->period) { new_clk = hlcdc->sys_clk; clk_freq = clk_get_rate(new_clk); @@ -77,8 +77,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) { /* Errata: cannot divide by 1 on some IP revisions */ - if (!pres && chip->errata && - chip->errata->div1_clk_erratum) + if (!pres && atmel->errata && + atmel->errata->div1_clk_erratum) continue; if ((clk_period_ns << pres) >= state->period) @@ -90,7 +90,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, pwmcfg = ATMEL_HLCDC_PWMPS(pres); - if (new_clk != chip->cur_clk) { + if (new_clk != atmel->cur_clk) { u32 gencfg = 0; int ret; @@ -98,8 +98,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, if (ret) return ret; - clk_disable_unprepare(chip->cur_clk); - chip->cur_clk = new_clk; + clk_disable_unprepare(atmel->cur_clk); + atmel->cur_clk = new_clk; if (new_clk == hlcdc->sys_clk) gencfg = ATMEL_HLCDC_CLKPWMSEL; @@ -160,8 +160,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, if (ret) return ret; - clk_disable_unprepare(chip->cur_clk); - chip->cur_clk = NULL; + clk_disable_unprepare(atmel->cur_clk); + atmel->cur_clk = NULL; } return 0; @@ -183,31 +183,32 @@ static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = { #ifdef CONFIG_PM_SLEEP static int atmel_hlcdc_pwm_suspend(struct device *dev) { - struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev); + struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev); /* Keep the periph clock enabled if the PWM is still running. */ - if (pwm_is_enabled(&chip->chip.pwms[0])) - clk_disable_unprepare(chip->hlcdc->periph_clk); + if (pwm_is_enabled(&atmel->chip.pwms[0])) + clk_disable_unprepare(atmel->hlcdc->periph_clk); return 0; } static int atmel_hlcdc_pwm_resume(struct device *dev) { - struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev); + struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev); struct pwm_state state; int ret; - pwm_get_state(&chip->chip.pwms[0], &state); + pwm_get_state(&atmel->chip.pwms[0], &state); /* Re-enable the periph clock it was stopped during suspend. */ if (!state.enabled) { - ret = clk_prepare_enable(chip->hlcdc->periph_clk); + ret = clk_prepare_enable(atmel->hlcdc->periph_clk); if (ret) return ret; } - return atmel_hlcdc_pwm_apply(&chip->chip, &chip->chip.pwms[0], &state); + return atmel_hlcdc_pwm_apply(&atmel->chip, &atmel->chip.pwms[0], + &state); } #endif @@ -244,14 +245,14 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) { const struct of_device_id *match; struct device *dev = &pdev->dev; - struct atmel_hlcdc_pwm *chip; + struct atmel_hlcdc_pwm *atmel; struct atmel_hlcdc *hlcdc; int ret; hlcdc = dev_get_drvdata(dev->parent); - chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); - if (!chip) + atmel = devm_kzalloc(dev, sizeof(*atmel), GFP_KERNEL); + if (!atmel) return -ENOMEM; ret = clk_prepare_enable(hlcdc->periph_clk); @@ -260,31 +261,31 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node); if (match) - chip->errata = match->data; + atmel->errata = match->data; - chip->hlcdc = hlcdc; - chip->chip.ops = &atmel_hlcdc_pwm_ops; - chip->chip.dev = dev; - chip->chip.npwm = 1; + atmel->hlcdc = hlcdc; + atmel->chip.ops = &atmel_hlcdc_pwm_ops; + atmel->chip.dev = dev; + atmel->chip.npwm = 1; - ret = pwmchip_add(&chip->chip); + ret = pwmchip_add(&atmel->chip); if (ret) { clk_disable_unprepare(hlcdc->periph_clk); return ret; } - platform_set_drvdata(pdev, chip); + platform_set_drvdata(pdev, atmel); return 0; } static void atmel_hlcdc_pwm_remove(struct platform_device *pdev) { - struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev); + struct atmel_hlcdc_pwm *atmel = platform_get_drvdata(pdev); - pwmchip_remove(&chip->chip); + pwmchip_remove(&atmel->chip); - clk_disable_unprepare(chip->hlcdc->periph_clk); + clk_disable_unprepare(atmel->hlcdc->periph_clk); } static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = { -- GitLab From ecab386a8edb4f6b06dd089882cc3a2b0d8c0c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 26 Jan 2024 13:04:33 +0100 Subject: [PATCH 0865/2290] pwm: atmel-hlcdc: Fix clock imbalance related to suspend support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e25ac87d3f831fed002c34aadddaf4ebb4ea45ec ] The suspend callback disables the periph clock when the PWM is enabled and resume reenables this clock if the PWM was disabled before. Judging from the code comment it's suspend that is wrong here. Fix accordingly. Fixes: f9bb9da7c09d ("pwm: atmel-hlcdc: Implement the suspend/resume hooks") Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/b51ea92b0a45eff3dc83b08adefd43d930df996c.1706269232.git.u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Sasha Levin --- drivers/pwm/pwm-atmel-hlcdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index 4d0b859d0ac13..3e9c94a8d7f72 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -186,7 +186,7 @@ static int atmel_hlcdc_pwm_suspend(struct device *dev) struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev); /* Keep the periph clock enabled if the PWM is still running. */ - if (pwm_is_enabled(&atmel->chip.pwms[0])) + if (!pwm_is_enabled(&atmel->chip.pwms[0])) clk_disable_unprepare(atmel->hlcdc->periph_clk); return 0; -- GitLab From b4bb2291d6d4d54fea9f2c5d7b1e744ce0dac33d Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 2 Feb 2024 07:13:29 -0800 Subject: [PATCH 0866/2290] net: blackhole_dev: fix build warning for ethh set but not used [ Upstream commit 843a8851e89e2e85db04caaf88d8554818319047 ] lib/test_blackhole_dev.c sets a variable that is never read, causing this following building warning: lib/test_blackhole_dev.c:32:17: warning: variable 'ethh' set but not used [-Wunused-but-set-variable] Remove the variable struct ethhdr *ethh, which is unused. Fixes: 509e56b37cc3 ("blackhole_dev: add a selftest") Signed-off-by: Breno Leitao Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- lib/test_blackhole_dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c index 4c40580a99a36..f247089d63c08 100644 --- a/lib/test_blackhole_dev.c +++ b/lib/test_blackhole_dev.c @@ -29,7 +29,6 @@ static int __init test_blackholedev_init(void) { struct ipv6hdr *ip6h; struct sk_buff *skb; - struct ethhdr *ethh; struct udphdr *uh; int data_len; int ret; @@ -61,7 +60,7 @@ static int __init test_blackholedev_init(void) ip6h->saddr = in6addr_loopback; ip6h->daddr = in6addr_loopback; /* Ether */ - ethh = (struct ethhdr *)skb_push(skb, sizeof(struct ethhdr)); + skb_push(skb, sizeof(struct ethhdr)); skb_set_mac_header(skb, 0); skb->protocol = htons(ETH_P_IPV6); -- GitLab From 71cdbd1fcbe7034b077ed1509f26fd5317f49130 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 2 Feb 2024 10:35:47 +0800 Subject: [PATCH 0867/2290] wifi: ath11k: initialize rx_mcs_80 and rx_mcs_160 before use [ Upstream commit b802e7b7e771dee3377d071418281f8b64d2d832 ] Currently in ath11k_peer_assoc_h_he() rx_mcs_80 and rx_mcs_160 are used to calculate max_nss, see if (support_160) max_nss = min(rx_mcs_80, rx_mcs_160); else max_nss = rx_mcs_80; Kernel test robot complains on uninitialized symbols: drivers/net/wireless/ath/ath11k/mac.c:2321 ath11k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_80'. drivers/net/wireless/ath/ath11k/mac.c:2321 ath11k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_160'. drivers/net/wireless/ath/ath11k/mac.c:2323 ath11k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_80'. This is because there are some code paths that never set them, so the assignment of max_nss can come from uninitialized variables. This could result in some unknown issues since a wrong peer_nss might be passed to firmware. Change to initialize them to an invalid value at the beginning. This makes sense because even max_nss gets an invalid value, due to either or both of them being invalid, we can get an valid peer_nss with following guard: arg->peer_nss = min(sta->deflink.rx_nss, max_nss) Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.23 Fixes: 3db26ecf7114 ("ath11k: calculate the correct NSS of peer for HE capabilities") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401311243.NyXwWZxP-lkp@intel.com/ Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20240202023547.11141-1-quic_bqiang@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/mac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 21c6b36dc6ebb..51fc77e93de5c 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2112,6 +2112,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, mcs_160_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); + /* Initialize rx_mcs_160 to 9 which is an invalid value */ + rx_mcs_160 = 9; if (support_160) { for (i = 7; i >= 0; i--) { u8 mcs_160 = (mcs_160_map >> (2 * i)) & 3; @@ -2123,6 +2125,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, } } + /* Initialize rx_mcs_80 to 9 which is an invalid value */ + rx_mcs_80 = 9; for (i = 7; i >= 0; i--) { u8 mcs_80 = (mcs_80_map >> (2 * i)) & 3; -- GitLab From 4d99d267da3415db2124029cb5a6d2d955ca43f9 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 26 Jan 2024 15:53:34 +0800 Subject: [PATCH 0868/2290] wifi: libertas: fix some memleaks in lbs_allocate_cmd_buffer() [ Upstream commit 5f0e4aede01cb01fa633171f0533affd25328c3a ] In the for statement of lbs_allocate_cmd_buffer(), if the allocation of cmdarray[i].cmdbuf fails, both cmdarray and cmdarray[i].cmdbuf needs to be freed. Otherwise, there will be memleaks in lbs_allocate_cmd_buffer(). Fixes: 876c9d3aeb98 ("[PATCH] Marvell Libertas 8388 802.11b/g USB driver") Signed-off-by: Zhipeng Lu Signed-off-by: Kalle Valo Link: https://msgid.link/20240126075336.2825608-1-alexious@zju.edu.cn Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/libertas/cmd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/cmd.c b/drivers/net/wireless/marvell/libertas/cmd.c index 104d2b6dc9af6..5a525da434c28 100644 --- a/drivers/net/wireless/marvell/libertas/cmd.c +++ b/drivers/net/wireless/marvell/libertas/cmd.c @@ -1132,7 +1132,7 @@ int lbs_allocate_cmd_buffer(struct lbs_private *priv) if (!cmdarray[i].cmdbuf) { lbs_deb_host("ALLOC_CMD_BUF: ptempvirtualaddr is NULL\n"); ret = -1; - goto done; + goto free_cmd_array; } } @@ -1140,8 +1140,17 @@ int lbs_allocate_cmd_buffer(struct lbs_private *priv) init_waitqueue_head(&cmdarray[i].cmdwait_q); lbs_cleanup_and_insert_cmd(priv, &cmdarray[i]); } - ret = 0; + return 0; +free_cmd_array: + for (i = 0; i < LBS_NUM_CMD_BUFFERS; i++) { + if (cmdarray[i].cmdbuf) { + kfree(cmdarray[i].cmdbuf); + cmdarray[i].cmdbuf = NULL; + } + } + kfree(priv->cmd_array); + priv->cmd_array = NULL; done: return ret; } -- GitLab From a1f57a0127b89a6b6620514564aa7eaec16d9af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Fri, 2 Feb 2024 17:42:13 +0100 Subject: [PATCH 0869/2290] wifi: wfx: fix memory leak when starting AP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b8cfb7c819dd39965136a66fe3a7fde688d976fc ] Kmemleak reported this error: unreferenced object 0xd73d1180 (size 184): comm "wpa_supplicant", pid 1559, jiffies 13006305 (age 964.245s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 1e 00 01 00 00 00 00 00 ................ backtrace: [<5ca11420>] kmem_cache_alloc+0x20c/0x5ac [<127bdd74>] __alloc_skb+0x144/0x170 [] __netdev_alloc_skb+0x50/0x180 [<0f9fa1d5>] __ieee80211_beacon_get+0x290/0x4d4 [mac80211] [<7accd02d>] ieee80211_beacon_get_tim+0x54/0x18c [mac80211] [<41e25cc3>] wfx_start_ap+0xc8/0x234 [wfx] [<93a70356>] ieee80211_start_ap+0x404/0x6b4 [mac80211] [] nl80211_start_ap+0x76c/0x9e0 [cfg80211] [<47bd8b68>] genl_rcv_msg+0x198/0x378 [<453ef796>] netlink_rcv_skb+0xd0/0x130 [<6b7c977a>] genl_rcv+0x34/0x44 [<66b2d04d>] netlink_unicast+0x1b4/0x258 [] netlink_sendmsg+0x1e8/0x428 [] ____sys_sendmsg+0x1e0/0x274 [] ___sys_sendmsg+0x80/0xb4 [<69954f45>] __sys_sendmsg+0x64/0xa8 unreferenced object 0xce087000 (size 1024): comm "wpa_supplicant", pid 1559, jiffies 13006305 (age 964.246s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 10 00 07 40 00 00 00 00 00 00 00 00 00 00 00 00 ...@............ backtrace: [<9a993714>] __kmalloc_track_caller+0x230/0x600 [] kmalloc_reserve.constprop.0+0x30/0x74 [] __alloc_skb+0xa0/0x170 [] __netdev_alloc_skb+0x50/0x180 [<0f9fa1d5>] __ieee80211_beacon_get+0x290/0x4d4 [mac80211] [<7accd02d>] ieee80211_beacon_get_tim+0x54/0x18c [mac80211] [<41e25cc3>] wfx_start_ap+0xc8/0x234 [wfx] [<93a70356>] ieee80211_start_ap+0x404/0x6b4 [mac80211] [] nl80211_start_ap+0x76c/0x9e0 [cfg80211] [<47bd8b68>] genl_rcv_msg+0x198/0x378 [<453ef796>] netlink_rcv_skb+0xd0/0x130 [<6b7c977a>] genl_rcv+0x34/0x44 [<66b2d04d>] netlink_unicast+0x1b4/0x258 [] netlink_sendmsg+0x1e8/0x428 [] ____sys_sendmsg+0x1e0/0x274 [] ___sys_sendmsg+0x80/0xb4 However, since the kernel is build optimized, it seems the stack is not accurate. It appears the issue is related to wfx_set_mfp_ap(). The issue is obvious in this function: memory allocated by ieee80211_beacon_get() is never released. Fixing this leak makes kmemleak happy. Reported-by: Ulrich Mohr Co-developed-by: Ulrich Mohr Signed-off-by: Ulrich Mohr Fixes: 268bceec1684 ("staging: wfx: fix BA when device is AP and MFP is enabled") Signed-off-by: Jérôme Pouiller Signed-off-by: Kalle Valo Link: https://msgid.link/20240202164213.1606145-1-jerome.pouiller@silabs.com Signed-off-by: Sasha Levin --- drivers/net/wireless/silabs/wfx/sta.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c index 073e870b26415..871667650dbef 100644 --- a/drivers/net/wireless/silabs/wfx/sta.c +++ b/drivers/net/wireless/silabs/wfx/sta.c @@ -362,6 +362,7 @@ static int wfx_set_mfp_ap(struct wfx_vif *wvif) const int pairwise_cipher_suite_count_offset = 8 / sizeof(u16); const int pairwise_cipher_suite_size = 4 / sizeof(u16); const int akm_suite_size = 4 / sizeof(u16); + int ret = -EINVAL; const u16 *ptr; if (unlikely(!skb)) @@ -370,22 +371,26 @@ static int wfx_set_mfp_ap(struct wfx_vif *wvif) ptr = (u16 *)cfg80211_find_ie(WLAN_EID_RSN, skb->data + ieoffset, skb->len - ieoffset); if (unlikely(!ptr)) - return -EINVAL; + goto free_skb; ptr += pairwise_cipher_suite_count_offset; if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return -EINVAL; + goto free_skb; ptr += 1 + pairwise_cipher_suite_size * *ptr; if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return -EINVAL; + goto free_skb; ptr += 1 + akm_suite_size * *ptr; if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return -EINVAL; + goto free_skb; wfx_hif_set_mfp(wvif, *ptr & BIT(7), *ptr & BIT(6)); - return 0; + ret = 0; + +free_skb: + dev_kfree_skb(skb); + return ret; } int wfx_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, -- GitLab From d73b916bcc62d4b745728449e6e3343dce47476f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 7 Feb 2024 14:47:00 +0106 Subject: [PATCH 0870/2290] printk: Disable passing console lock owner completely during panic() [ Upstream commit d04d5882cd678b898a9d7c5aee6afbe9e6e77fcd ] The commit d51507098ff91 ("printk: disable optimistic spin during panic") added checks to avoid becoming a console waiter if a panic is in progress. However, the transition to panic can occur while there is already a waiter. The current owner should not pass the lock to the waiter because it might get stopped or blocked anytime. Also the panic context might pass the console lock owner to an already stopped waiter by mistake. It might happen when console_flush_on_panic() ignores the current lock owner, for example: CPU0 CPU1 ---- ---- console_lock_spinning_enable() console_trylock_spinning() [CPU1 now console waiter] NMI: panic() panic_other_cpus_shutdown() [stopped as console waiter] console_flush_on_panic() console_lock_spinning_enable() [print 1 record] console_lock_spinning_disable_and_check() [handover to stopped CPU1] This results in panic() not flushing the panic messages. Fix these problems by disabling all spinning operations completely during panic(). Another advantage is that it prevents possible deadlocks caused by "console_owner_lock". The panic() context does not need to take it any longer. The lockless checks are safe because the functions become NOPs when they see the panic in progress. All operations manipulating the state are still synchronized by the lock even when non-panic CPUs would notice the panic synchronously. The current owner might stay spinning. But non-panic() CPUs would get stopped anyway and the panic context will never start spinning. Fixes: dbdda842fe96 ("printk: Add console owner and waiter logic to load balance console writes") Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20240207134103.1357162-12-john.ogness@linutronix.de Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index cc53fb77f77cc..981cdb00b8722 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1797,10 +1797,23 @@ static bool console_waiter; */ static void console_lock_spinning_enable(void) { + /* + * Do not use spinning in panic(). The panic CPU wants to keep the lock. + * Non-panic CPUs abandon the flush anyway. + * + * Just keep the lockdep annotation. The panic-CPU should avoid + * taking console_owner_lock because it might cause a deadlock. + * This looks like the easiest way how to prevent false lockdep + * reports without handling races a lockless way. + */ + if (panic_in_progress()) + goto lockdep; + raw_spin_lock(&console_owner_lock); console_owner = current; raw_spin_unlock(&console_owner_lock); +lockdep: /* The waiter may spin on us after setting console_owner */ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); } @@ -1824,6 +1837,22 @@ static int console_lock_spinning_disable_and_check(void) { int waiter; + /* + * Ignore spinning waiters during panic() because they might get stopped + * or blocked at any time, + * + * It is safe because nobody is allowed to start spinning during panic + * in the first place. If there has been a waiter then non panic CPUs + * might stay spinning. They would get stopped anyway. The panic context + * will never start spinning and an interrupted spin on panic CPU will + * never continue. + */ + if (panic_in_progress()) { + /* Keep lockdep happy. */ + spin_release(&console_owner_dep_map, _THIS_IP_); + return 0; + } + raw_spin_lock(&console_owner_lock); waiter = READ_ONCE(console_waiter); console_owner = NULL; -- GitLab From 23f96f86de86c4416b75471b0eee3d883305b4c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 4 Feb 2024 22:20:43 +0100 Subject: [PATCH 0871/2290] pwm: sti: Fix capture for st,pwm-num-chan < st,capture-num-chan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5f623835584f1c8d1030666796f40c47a448ce0b ] The driver only used the number of pwm channels to set the pwm_chip's npwm member. The result is that if there are more capture channels than PWM channels specified in the device tree, only a part of the capture channel is usable. Fix that by passing the bigger channel count to the pwm framework. This makes it possible that the .apply() callback is called with .hwpwm >= pwm_num_devs, catch that case and return an error code. Fixes: c97267ae831d ("pwm: sti: Add PWM capture callback") Link: https://lore.kernel.org/r/20240204212043.2951852-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Sasha Levin --- drivers/pwm/pwm-sti.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index 652fdb8dc7bfa..0a7920cbd4949 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -395,8 +395,17 @@ out: static int sti_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { + struct sti_pwm_chip *pc = to_sti_pwmchip(chip); + struct sti_pwm_compat_data *cdata = pc->cdata; + struct device *dev = pc->dev; int err; + if (pwm->hwpwm >= cdata->pwm_num_devs) { + dev_err(dev, "device %u is not valid for pwm mode\n", + pwm->hwpwm); + return -EINVAL; + } + if (state->polarity != PWM_POLARITY_NORMAL) return -EINVAL; @@ -647,7 +656,7 @@ static int sti_pwm_probe(struct platform_device *pdev) pc->chip.dev = dev; pc->chip.ops = &sti_pwm_ops; - pc->chip.npwm = pc->cdata->pwm_num_devs; + pc->chip.npwm = max(cdata->pwm_num_devs, cdata->cpt_num_devs); for (i = 0; i < cdata->cpt_num_devs; i++) { struct sti_cpt_ddata *ddata = &cdata->ddata[i]; -- GitLab From 0697d4862d961c867e8a74ce9031e170664178a5 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Tue, 6 Feb 2024 13:46:09 +0100 Subject: [PATCH 0872/2290] tools/resolve_btfids: Refactor set sorting with types from btf_ids.h [ Upstream commit 9707ac4fe2f5bac6406d2403f8b8a64d7b3d8e43 ] Instead of using magic offsets to access BTF ID set data, leverage types from btf_ids.h (btf_id_set and btf_id_set8) which define the actual layout of the data. Thanks to this change, set sorting should also continue working if the layout changes. This requires to sync the definition of 'struct btf_id_set8' from include/linux/btf_ids.h to tools/include/linux/btf_ids.h. We don't sync the rest of the file at the moment, b/c that would require to also sync multiple dependent headers and we don't need any other defs from btf_ids.h. Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Acked-by: Daniel Xu Link: https://lore.kernel.org/bpf/ff7f062ddf6a00815fda3087957c4ce667f50532.1707223196.git.vmalik@redhat.com Stable-dep-of: 903fad439466 ("tools/resolve_btfids: Fix cross-compilation to non-host endianness") Signed-off-by: Sasha Levin --- tools/bpf/resolve_btfids/main.c | 35 ++++++++++++++++++++------------- tools/include/linux/btf_ids.h | 9 +++++++++ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 77058174082d7..cd42977c6a1f4 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ #include #define BTF_IDS_SECTION ".BTF_ids" -#define BTF_ID "__BTF_ID__" +#define BTF_ID_PREFIX "__BTF_ID__" #define BTF_STRUCT "struct" #define BTF_UNION "union" @@ -161,7 +162,7 @@ static int eprintf(int level, int var, const char *fmt, ...) static bool is_btf_id(const char *name) { - return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1); + return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1); } static struct btf_id *btf_id__find(struct rb_root *root, const char *name) @@ -441,7 +442,7 @@ static int symbols_collect(struct object *obj) * __BTF_ID__TYPE__vfs_truncate__0 * prefix = ^ */ - prefix = name + sizeof(BTF_ID) - 1; + prefix = name + sizeof(BTF_ID_PREFIX) - 1; /* struct */ if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) { @@ -649,19 +650,18 @@ static int cmp_id(const void *pa, const void *pb) static int sets_patch(struct object *obj) { Elf_Data *data = obj->efile.idlist; - int *ptr = data->d_buf; struct rb_node *next; next = rb_first(&obj->sets); while (next) { - unsigned long addr, idx; + struct btf_id_set8 *set8; + struct btf_id_set *set; + unsigned long addr, off; struct btf_id *id; - int *base; - int cnt; id = rb_entry(next, struct btf_id, rb_node); addr = id->addr[0]; - idx = addr - obj->efile.idlist_addr; + off = addr - obj->efile.idlist_addr; /* sets are unique */ if (id->addr_cnt != 1) { @@ -670,14 +670,21 @@ static int sets_patch(struct object *obj) return -1; } - idx = idx / sizeof(int); - base = &ptr[idx] + (id->is_set8 ? 2 : 1); - cnt = ptr[idx]; + if (id->is_set) { + set = data->d_buf + off; + qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id); + } else { + set8 = data->d_buf + off; + /* + * Make sure id is at the beginning of the pairs + * struct, otherwise the below qsort would not work. + */ + BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); + qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); + } pr_debug("sorting addr %5lu: cnt %6d [%s]\n", - (idx + 1) * sizeof(int), cnt, id->name); - - qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); + off, id->is_set ? set->cnt : set8->cnt, id->name); next = rb_next(next); } diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 2f882d5cb30f5..72535f00572f6 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -8,6 +8,15 @@ struct btf_id_set { u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ -- GitLab From b4907fb68d45a59cfb701ffde3da7cf92d0b9b5e Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Tue, 6 Feb 2024 13:46:10 +0100 Subject: [PATCH 0873/2290] tools/resolve_btfids: Fix cross-compilation to non-host endianness [ Upstream commit 903fad4394666bc23975c93fb58f137ce64b5192 ] The .BTF_ids section is pre-filled with zeroed BTF ID entries during the build and afterwards patched by resolve_btfids with correct values. Since resolve_btfids always writes in host-native endianness, it relies on libelf to do the translation when the target ELF is cross-compiled to a different endianness (this was introduced in commit 61e8aeda9398 ("bpf: Fix libelf endian handling in resolv_btfids")). Unfortunately, the translation will corrupt the flags fields of SET8 entries because these were written during vmlinux compilation and are in the correct endianness already. This will lead to numerous selftests failures such as: $ sudo ./test_verifier 502 502 #502/p sleepable fentry accept FAIL Failed to load prog 'Invalid argument'! bpf_fentry_test1 is not sleepable verification time 34 usec stack depth 0 processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 Summary: 0 PASSED, 0 SKIPPED, 1 FAILED Since it's not possible to instruct libelf to translate just certain values, let's manually bswap the flags (both global and entry flags) in resolve_btfids when needed, so that libelf then translates everything correctly. Fixes: ef2c6f370a63 ("tools/resolve_btfids: Add support for 8-byte BTF sets") Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/7b6bff690919555574ce0f13d2a5996cacf7bf69.1707223196.git.vmalik@redhat.com Signed-off-by: Sasha Levin --- tools/bpf/resolve_btfids/main.c | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index cd42977c6a1f4..ef0764d6891e4 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -90,6 +90,14 @@ #define ADDR_CNT 100 +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATANATIVE ELFDATA2MSB +#else +# error "Unknown machine endianness!" +#endif + struct btf_id { struct rb_node rb_node; char *name; @@ -117,6 +125,7 @@ struct object { int idlist_shndx; size_t strtabidx; unsigned long idlist_addr; + int encoding; } efile; struct rb_root sets; @@ -320,6 +329,7 @@ static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; size_t shdrstrndx; + GElf_Ehdr ehdr; int idx = 0; Elf *elf; int fd; @@ -351,6 +361,13 @@ static int elf_collect(struct object *obj) return -1; } + if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) { + pr_err("FAILED cannot get ELF header: %s\n", + elf_errmsg(-1)); + return -1; + } + obj->efile.encoding = ehdr.e_ident[EI_DATA]; + /* * Scan all the elf sections and look for save data * from .BTF_ids section and symbols. @@ -681,6 +698,24 @@ static int sets_patch(struct object *obj) */ BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); + + /* + * When ELF endianness does not match endianness of the + * host, libelf will do the translation when updating + * the ELF. This, however, corrupts SET8 flags which are + * already in the target endianness. So, let's bswap + * them to the host endianness and libelf will then + * correctly translate everything. + */ + if (obj->efile.encoding != ELFDATANATIVE) { + int i; + + set8->flags = bswap_32(set8->flags); + for (i = 0; i < set8->cnt; i++) { + set8->pairs[i].flags = + bswap_32(set8->pairs[i].flags); + } + } } pr_debug("sorting addr %5lu: cnt %6d [%s]\n", -- GitLab From 437af288ec7bbe5fe46b9c1e37fcb66c1859e228 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 18:02:09 +0200 Subject: [PATCH 0874/2290] wifi: iwlwifi: mvm: don't set replay counters to 0xff [ Upstream commit d5bd4041cd70faf26fc9a54bd6f172537bbe77f3 ] The firmware (later) actually uses the values even for keys that are invalid as far as the host is concerned, later in rekeying, and then only sets the low 48 bits since the PNs are only 48 bits over the air. It does, however, compare the full 64 bits later, obviously causing problems. Remove the memset and use kzalloc instead to avoid any old heap data leaking to the firmware. We already init all the other fields in the struct anyway. This leaves the data set to zero for any unused fields, so the firmware can look at them safely even if they're not used right now. Fixes: 79e561f0f05a ("iwlwifi: mvm: d3: implement RSC command version 5") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206175739.462101146fef.I10f3855b99417af4247cff04af78dcbc6cb75c9c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 2748459d12279..88f4f429d875c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -461,12 +461,10 @@ static int iwl_mvm_wowlan_config_rsc_tsc(struct iwl_mvm *mvm, struct wowlan_key_rsc_v5_data data = {}; int i; - data.rsc = kmalloc(sizeof(*data.rsc), GFP_KERNEL); + data.rsc = kzalloc(sizeof(*data.rsc), GFP_KERNEL); if (!data.rsc) return -ENOMEM; - memset(data.rsc, 0xff, sizeof(*data.rsc)); - for (i = 0; i < ARRAY_SIZE(data.rsc->mcast_key_id_map); i++) data.rsc->mcast_key_id_map[i] = IWL_MCAST_KEY_MAP_INVALID; -- GitLab From 8a2f812b4bfb67821489935105ca294db601f853 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 18 Jan 2024 13:03:39 +0100 Subject: [PATCH 0875/2290] s390/pai: fix attr_event_free upper limit for pai device drivers [ Upstream commit 225d09d6e5f3870560665a1829d2db79330b4c58 ] When the device drivers are initialized, a sysfs directory is created. This contains many attributes which are allocated with kzalloc(). Should it fail, the memory for the attributes already created is freed in attr_event_free(). Its second parameter is number of attribute elements to delete. This parameter is off by one. When i. e. the 10th attribute fails to get created, attributes numbered 0 to 9 should be deleted. Currently only attributes numbered 0 to 8 are deleted. Fixes: 39d62336f5c1 ("s390/pai: add support for cryptography counters") Reported-by: Sumanth Korikkar Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_pai_crypto.c | 2 +- arch/s390/kernel/perf_pai_ext.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 6826e2a69a216..f61a652046cfb 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -647,7 +647,7 @@ static int __init attr_event_init(void) for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) { ret = attr_event_init_one(attrs, i); if (ret) { - attr_event_free(attrs, i - 1); + attr_event_free(attrs, i); return ret; } } diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 74b53c531e0cd..b4d89654183a2 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -612,7 +612,7 @@ static int __init attr_event_init(void) for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) { ret = attr_event_init_one(attrs, i); if (ret) { - attr_event_free(attrs, i - 1); + attr_event_free(attrs, i); return ret; } } -- GitLab From 9b2ca91f6eac4488385454601531a98933f1da5a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Jan 2024 20:14:28 -0700 Subject: [PATCH 0876/2290] s390/vdso: drop '-fPIC' from LDFLAGS [ Upstream commit 0628c03934187be33942580e10bb9afcc61adeed ] '-fPIC' as an option to the linker does not do what it seems like it should. With ld.bfd, it is treated as '-f PIC', which does not make sense based on the meaning of '-f': -f SHLIB, --auxiliary SHLIB Auxiliary filter for shared object symbol table When building with ld.lld (currently under review in a GitHub pull request), it just errors out because '-f' means nothing and neither does '-fPIC': ld.lld: error: unknown argument '-fPIC' '-fPIC' was blindly copied from CFLAGS when the vDSO stopped being linked with '$(CC)', it should not be needed. Remove it to clear up the build failure with ld.lld. Fixes: 2b2a25845d53 ("s390/vdso: Use $(LD) instead of $(CC) to link vDSO") Link: https://github.com/llvm/llvm-project/pull/75643 Signed-off-by: Nathan Chancellor Reviewed-by: Fangrui Song Link: https://lore.kernel.org/r/20240130-s390-vdso-drop-fpic-from-ldflags-v1-1-094ad104fc55@kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/vdso32/Makefile | 2 +- arch/s390/kernel/vdso64/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 245bddfe9bc0e..cc513add48eb5 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_32 += -m31 -s KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \ +LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \ --hash-style=both --build-id=sha1 -melf_s390 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 1605ba45ac4c0..42d918d50a1ff 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -26,7 +26,7 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \ +ldflags-y := -shared -soname=linux-vdso64.so.1 \ --hash-style=both --build-id=sha1 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) -- GitLab From 4e58093897c90253752b9332bd85814c73c9e98a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 26 Jan 2024 17:36:16 +0100 Subject: [PATCH 0877/2290] selftests: forwarding: Add missing config entries [ Upstream commit 4acf4e62cd572b0c806035046b3698f5585ab821 ] The config file contains a partial kernel configuration to be used by `virtme-configkernel --custom'. The presumption is that the config file contains all Kconfig options needed by the selftests from the directory. In net/forwarding/config, many are missing, which manifests as spurious failures when running the selftests, with messages about unknown device types, qdisc kinds or classifier actions. Add the missing configurations. Tested the resulting configuration using virtme-ng as follows: # vng -b -f tools/testing/selftests/net/forwarding/config # vng --user root (within the VM:) # make -C tools/testing/selftests TARGETS=net/forwarding run_tests Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/025abded7ff9cea5874a7fe35dcd3fd41bf5e6ac.1706286755.git.petrm@nvidia.com Signed-off-by: Paolo Abeni Stable-dep-of: f0ddf15f0a74 ("selftests: forwarding: Add missing multicast routing config entries") Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/config | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 697994a9278bb..ba23435145827 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,14 +6,42 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_IPV6=y +CONFIG_IPV6_GRE=m +CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_SAMPLE=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_META=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPIP=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_TABLES=m CONFIG_VETH=m CONFIG_NAMESPACES=y CONFIG_NET_NS=y +CONFIG_VXLAN=m +CONFIG_XFRM_USER=m -- GitLab From 21af11fcb03f92decc0e0701b429f92044138129 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Feb 2024 18:55:38 +0200 Subject: [PATCH 0878/2290] selftests: forwarding: Add missing multicast routing config entries [ Upstream commit f0ddf15f0a74c27eb4b2271a90e69948acc3fa2c ] The two tests that make use of multicast routig (router.sh and router_multicast.sh) are currently failing in the netdev CI because the kernel is missing multicast routing support. Fix by adding the required config entries. Fixes: 6d4efada3b82 ("selftests: forwarding: Add multicast routing test") Signed-off-by: Ido Schimmel Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208165538.1303021-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/config | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index ba23435145827..8d7a1a004b7c3 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -9,6 +9,13 @@ CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_IPV6=y CONFIG_IPV6_GRE=m +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m -- GitLab From a03ede2282ebbd181bd6f5c38cbfcb5765afcd04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:56 +0000 Subject: [PATCH 0879/2290] ipv6: mcast: remove one synchronize_net() barrier in ipv6_mc_down() [ Upstream commit 17ef8efc00b34918b966388b2af0993811895a8c ] As discussed in the past (commit 2d3916f31891 ("ipv6: fix skb drops in igmp6_event_query() and igmp6_event_report()")) I think the synchronize_net() call in ipv6_mc_down() is not needed. Under load, synchronize_net() can last between 200 usec and 5 ms. KASAN seems to agree as well. Fixes: f185de28d9ae ("mld: add new workqueues for process mld events") Signed-off-by: Eric Dumazet Cc: Taehee Yoo Cc: Cong Wang Cc: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/mcast.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 566f3b7b957e9..a777695389403 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2722,7 +2722,6 @@ void ipv6_mc_down(struct inet6_dev *idev) /* Should stop work after group drop. or we will * start work again in mld_ifc_event() */ - synchronize_net(); mld_query_stop_work(idev); mld_report_stop_work(idev); -- GitLab From ef71a93eec0608d744b246431eb3f4723e0294c0 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Fri, 27 May 2022 12:53:54 +0800 Subject: [PATCH 0880/2290] arm64: dts: mt8183: kukui: Split out keyboard node and describe detachables [ Upstream commit 82492c4ef8f65f93cd4a35c4b52518935acbb2fa ] Kukui devices krane, kodana, and kakadu use detachable keyboards, which only have switches to be registered. Change the keyboard node's compatible of those boards to the newly introduced "google,cros-ec-keyb-switches", which won't include matrix properties. Signed-off-by: Hsin-Yi Wang Link: https://lore.kernel.org/r/20220527045353.2483042-1-hsinyi@chromium.org Signed-off-by: Matthias Brugger Stable-dep-of: 04bd6411f506 ("arm64: dts: mt8183: Move CrosEC base detection node to kukui-based DTs") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 2 ++ arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi | 6 ++++++ arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi | 6 ++++++ arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi | 6 ++++++ arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 1 - 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index dccf367c7ec6c..3d95625f1b0b4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -4,6 +4,8 @@ */ #include "mt8183-kukui.dtsi" +/* Must come after mt8183-kukui.dtsi to modify cros_ec */ +#include / { panel: panel { diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi index 50a0dd36b5fb3..a11adeb29b1f2 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi @@ -372,6 +372,12 @@ }; }; +&cros_ec { + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "GO_KAKADU"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi index 06f8c80bf5536..4864c39e53a4f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi @@ -339,6 +339,12 @@ }; }; +&cros_ec { + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "GO_KODAMA"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi index a7b0cb3ff7b0a..d5f41c6c98814 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi @@ -343,6 +343,12 @@ }; }; +&cros_ec { + keyboard-controller { + compatible = "google,cros-ec-keyb-switches"; + }; +}; + &qca_wifi { qcom,ath10k-calibration-variant = "LE_Krane"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index a428a581c93a8..de610874a9125 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -999,5 +999,4 @@ }; }; -#include #include -- GitLab From 6606534538e92d0fba40ef806a5e938b5f6355e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Tue, 16 Jan 2024 18:38:34 -0300 Subject: [PATCH 0881/2290] arm64: dts: mt8183: Move CrosEC base detection node to kukui-based DTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 04bd6411f506357fd1faedc2b2156e7ef206aa9a ] The cbas node is used to describe base detection functionality in the ChromeOS EC, which is used for units that have a detachable keyboard and thus rely on this functionality to switch between tablet and laptop mode. Despite the original commit having added the cbas node to the mt8183-kukui.dtsi, not all machines that include it are detachables. In fact all machines that include from mt8183-kukui-jacuzzi.dtsi are either clamshells (ie normal laptops) or convertibles, meaning the keyboard can be flipped but not detached. The detection for the keyboard getting flipped is handled by the driver bound to the keyboard-controller node in the EC. Move the base detection node from the base kukui dtsi to the dtsis where all machines are detachables, and thus actually make use of the node. Fixes: 4fa8492d1e5b ("arm64: dts: mt8183: add cbas node under cros_ec") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240116-mt8183-kukui-cbas-remove-v3-1-055e21406e86@collabora.com Signed-off-by: Matthias Brugger Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi | 4 ++++ arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi | 4 ++++ arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi | 4 ++++ arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 4 ---- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi index a11adeb29b1f2..0d3c7b8162ff0 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kakadu.dtsi @@ -373,6 +373,10 @@ }; &cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + keyboard-controller { compatible = "google,cros-ec-keyb-switches"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi index 4864c39e53a4f..e73113cb51f53 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-kodama.dtsi @@ -340,6 +340,10 @@ }; &cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + keyboard-controller { compatible = "google,cros-ec-keyb-switches"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi index d5f41c6c98814..181da69d18f46 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-krane.dtsi @@ -344,6 +344,10 @@ }; &cros_ec { + cbas { + compatible = "google,cros-cbas"; + }; + keyboard-controller { compatible = "google,cros-ec-keyb-switches"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index de610874a9125..1db97d94658b9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -896,10 +896,6 @@ google,usb-port-id = <0>; }; - cbas { - compatible = "google,cros-cbas"; - }; - typec { compatible = "google,cros-ec-typec"; #address-cells = <1>; -- GitLab From 5c77447aa44479aee694be4e099730643c29bcf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 1 Jan 2024 19:20:40 +0100 Subject: [PATCH 0882/2290] arm64: dts: mediatek: mt7986: add "#reset-cells" to infracfg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d993daff5962b2dd08f32a83bb1c0e5fa75732ea ] MT7986's Infrastructure System Configuration Controller includes reset controller. It can reset blocks as specified in the include/dt-bindings/reset/mt7986-resets.h . Add #reset-cells so it can be referenced properly. This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: infracfg@10001000: '#reset-cells' is a required property from schema $id: http://devicetree.org/schemas/arm/mediatek/mediatek,infracfg.yaml# Fixes: 1f9986b258c2 ("arm64: dts: mediatek: add clock support for mt7986a") Cc: Sam Shih Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20240101182040.28538-2-zajec5@gmail.com Signed-off-by: Matthias Brugger Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index fc338bd497f51..108931e796465 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -110,6 +110,7 @@ compatible = "mediatek,mt7986-infracfg", "syscon"; reg = <0 0x10001000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; wed_pcie: wed-pcie@10003000 { -- GitLab From fedd55b8a5819c2ae086b0f765fd92b79cdb0406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 7 Feb 2024 15:08:42 -0500 Subject: [PATCH 0883/2290] arm64: dts: mediatek: mt8192-asurada: Remove CrosEC base detection node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b49cabe631b0a25aaf8fc2ba81b5b9ea6ff01b7 ] The commit adding the ChromeOS EC to the Asurada Devicetree mistakenly added a base detection node. While tablet mode detection is supported by CrosEC and used by Hayato, it is done through the cros-ec-keyb driver. The base detection node, which is handled by the hid-google-hammer driver, also provides tablet mode detection but by checking base attachment status on the CrosEC, which is not supported for Asurada. Hence, remove the unused CrosEC base detection node for Asurada. Fixes: eb188a2aaa82 ("arm64: dts: mediatek: asurada: Add ChromeOS EC") Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240207-mt8192-asurada-cbas-remove-v1-1-04cb65951975@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index 50367da93cd79..c6080af1e4a30 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -819,10 +819,6 @@ #address-cells = <1>; #size-cells = <0>; - base_detection: cbas { - compatible = "google,cros-cbas"; - }; - cros_ec_pwm: pwm { compatible = "google,cros-ec-pwm"; #pwm-cells = <1>; -- GitLab From 4bc2befb93d181b0565f4cfb2ca26b5caf247952 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 28 Dec 2023 13:32:42 +0200 Subject: [PATCH 0884/2290] arm64: dts: mediatek: mt8192: fix vencoder clock name [ Upstream commit 76aac0f2a46847ed4a7a4fdd848dd66023c19ad1 ] Clock name should be `venc_sel` as per binding. Fix the warning message : arch/arm64/boot/dts/mediatek/mt8192-asurada-hayato-r1.dtb: vcodec@17020000: clock-names:0: 'venc_sel' was expected from schema $id: http://devicetree.org/schemas/media/mediatek,vcodec-encoder.yaml# Fixes: aa8f3711fc87 ("arm64: dts: mt8192: Add H264 venc device node") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231228113245.174706-4-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8192.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 2f40c6cc407c1..4ed8a0f187583 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -1539,7 +1539,7 @@ mediatek,scp = <&scp>; power-domains = <&spm MT8192_POWER_DOMAIN_VENC>; clocks = <&vencsys CLK_VENC_SET1_VENC>; - clock-names = "venc-set1"; + clock-names = "venc_sel"; assigned-clocks = <&topckgen CLK_TOP_VENC_SEL>; assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL_D4>; }; -- GitLab From 1e33bdd0239487ae5a414a5854b44e272a6cc875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 22 Jan 2024 14:23:57 +0100 Subject: [PATCH 0885/2290] arm64: dts: mediatek: mt7622: add missing "device_type" to memory nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 99d100e00144bc01b49a697f4bc4398f2f7e7ce4 ] This fixes: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: /: memory@40000000: 'device_type' is a required property from schema $id: http://devicetree.org/schemas/memory.yaml# arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dtb: /: memory@40000000: 'device_type' is a required property from schema $id: http://devicetree.org/schemas/memory.yaml# Signed-off-by: Rafał Miłecki Reviewed-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240122132357.31264-1-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 1 + arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 2c35ed0734a47..b1ddc491d2936 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -74,6 +74,7 @@ memory@40000000 { reg = <0 0x40000000 0 0x40000000>; + device_type = "memory"; }; reg_1p8v: regulator-1p8v { diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index f9313b697ac12..527dcb279ba52 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -56,6 +56,7 @@ memory@40000000 { reg = <0 0x40000000 0 0x20000000>; + device_type = "memory"; }; reg_1p8v: regulator-1p8v { -- GitLab From 8bfc6b840a9542f8d5ba00a710a60e6387d272d5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 6 Feb 2024 23:01:02 -0800 Subject: [PATCH 0886/2290] bpf: Mark bpf_spin_{lock,unlock}() helpers with notrace correctly [ Upstream commit 178c54666f9c4d2f49f2ea661d0c11b52f0ed190 ] Currently tracing is supposed not to allow for bpf_spin_{lock,unlock}() helper calls. This is to prevent deadlock for the following cases: - there is a prog (prog-A) calling bpf_spin_{lock,unlock}(). - there is a tracing program (prog-B), e.g., fentry, attached to bpf_spin_lock() and/or bpf_spin_unlock(). - prog-B calls bpf_spin_{lock,unlock}(). For such a case, when prog-A calls bpf_spin_{lock,unlock}(), a deadlock will happen. The related source codes are below in kernel/bpf/helpers.c: notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) notrace is supposed to prevent fentry prog from attaching to bpf_spin_{lock,unlock}(). But actually this is not the case and fentry prog can successfully attached to bpf_spin_lock(). Siddharth Chintamaneni reported the issue in [1]. The following is the macro definition for above BPF_CALL_1: #define BPF_CALL_x(x, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) #define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) The notrace attribute is actually applied to the static always_inline function ____bpf_spin_{lock,unlock}(). The actual callback function bpf_spin_{lock,unlock}() is not marked with notrace, hence allowing fentry prog to attach to two helpers, and this may cause the above mentioned deadlock. Siddharth Chintamaneni actually has a reproducer in [2]. To fix the issue, a new macro NOTRACE_BPF_CALL_1 is introduced which will add notrace attribute to the original function instead of the hidden always_inline function and this fixed the problem. [1] https://lore.kernel.org/bpf/CAE5sdEigPnoGrzN8WU7Tx-h-iFuMZgW06qp0KHWtpvoXxf1OAQ@mail.gmail.com/ [2] https://lore.kernel.org/bpf/CAE5sdEg6yUc_Jz50AnUXEEUh6O73yQ1Z6NV2srJnef0ZrQkZew@mail.gmail.com/ Fixes: d83525ca62cf ("bpf: introduce bpf_spin_lock") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240207070102.335167-1-yonghong.song@linux.dev Signed-off-by: Sasha Levin --- include/linux/filter.h | 21 ++++++++++++--------- kernel/bpf/helpers.c | 4 ++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index efc42a6e3aed0..face590b24e17 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -495,24 +495,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) -#define BPF_CALL_x(x, name, ...) \ +#define BPF_CALL_x(x, attr, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) -#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) -#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) -#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) -#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) -#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define __NOATTR +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__) + +#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 83f8f67e933df..758510b46d87b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -328,7 +328,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) __this_cpu_write(irqsave_flags, flags); } -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) { __bpf_spin_lock_irqsave(lock); return 0; @@ -350,7 +350,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) local_irq_restore(flags); } -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) { __bpf_spin_unlock_irqrestore(lock); return 0; -- GitLab From e2fed151d53d061495891e75681bf8eccb65b33e Mon Sep 17 00:00:00 2001 From: Craig Tatlor Date: Sat, 10 Feb 2024 17:45:40 +0100 Subject: [PATCH 0887/2290] ARM: dts: qcom: msm8974: correct qfprom node size [ Upstream commit 724c4bf0e4bf81dba77736afb93964c986c3c123 ] The qfprom actually is bigger than 0x1000, so adjust the reg. Note that the non-ECC-corrected qfprom can be found at 0xfc4b8000 (-0x4000). The current reg points to the ECC-corrected qfprom block which should have equivalent values at all offsets compared to the non-corrected version. [luca@z3ntu.xyz: extract to standalone patch and adjust for review comments] Fixes: c59ffb519357 ("arm: dts: msm8974: Add thermal zones, tsens and qfprom nodes") Signed-off-by: Craig Tatlor Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240210-msm8974-qfprom-v3-1-26c424160334@z3ntu.xyz Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-msm8974.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index c4b2e9ac24940..5ea45e486ed54 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -1134,7 +1134,7 @@ qfprom: qfprom@fc4bc000 { compatible = "qcom,msm8974-qfprom", "qcom,qfprom"; - reg = <0xfc4bc000 0x1000>; + reg = <0xfc4bc000 0x2100>; #address-cells = <1>; #size-cells = <1>; tsens_calib: calib@d0 { -- GitLab From a9545af2a533739ffb64d6c9a6fec6f13e2b505f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Mon, 12 Feb 2024 13:57:37 +0100 Subject: [PATCH 0888/2290] wifi: wilc1000: prevent use-after-free on vif when cleaning up all interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cb5942b77c05d54310a0420cac12935e9b6aa21c ] wilc_netdev_cleanup currently triggers a KASAN warning, which can be observed on interface registration error path, or simply by removing the module/unbinding device from driver: echo spi0.1 > /sys/bus/spi/drivers/wilc1000_spi/unbind ================================================================== BUG: KASAN: slab-use-after-free in wilc_netdev_cleanup+0x508/0x5cc Read of size 4 at addr c54d1ce8 by task sh/86 CPU: 0 PID: 86 Comm: sh Not tainted 6.8.0-rc1+ #117 Hardware name: Atmel SAMA5 unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x34/0x58 dump_stack_lvl from print_report+0x154/0x500 print_report from kasan_report+0xac/0xd8 kasan_report from wilc_netdev_cleanup+0x508/0x5cc wilc_netdev_cleanup from wilc_bus_remove+0xc8/0xec wilc_bus_remove from spi_remove+0x8c/0xac spi_remove from device_release_driver_internal+0x434/0x5f8 device_release_driver_internal from unbind_store+0xbc/0x108 unbind_store from kernfs_fop_write_iter+0x398/0x584 kernfs_fop_write_iter from vfs_write+0x728/0xf88 vfs_write from ksys_write+0x110/0x1e4 ksys_write from ret_fast_syscall+0x0/0x1c [...] Allocated by task 1: kasan_save_track+0x30/0x5c __kasan_kmalloc+0x8c/0x94 __kmalloc_node+0x1cc/0x3e4 kvmalloc_node+0x48/0x180 alloc_netdev_mqs+0x68/0x11dc alloc_etherdev_mqs+0x28/0x34 wilc_netdev_ifc_init+0x34/0x8ec wilc_cfg80211_init+0x690/0x910 wilc_bus_probe+0xe0/0x4a0 spi_probe+0x158/0x1b0 really_probe+0x270/0xdf4 __driver_probe_device+0x1dc/0x580 driver_probe_device+0x60/0x140 __driver_attach+0x228/0x5d4 bus_for_each_dev+0x13c/0x1a8 bus_add_driver+0x2a0/0x608 driver_register+0x24c/0x578 do_one_initcall+0x180/0x310 kernel_init_freeable+0x424/0x484 kernel_init+0x20/0x148 ret_from_fork+0x14/0x28 Freed by task 86: kasan_save_track+0x30/0x5c kasan_save_free_info+0x38/0x58 __kasan_slab_free+0xe4/0x140 kfree+0xb0/0x238 device_release+0xc0/0x2a8 kobject_put+0x1d4/0x46c netdev_run_todo+0x8fc/0x11d0 wilc_netdev_cleanup+0x1e4/0x5cc wilc_bus_remove+0xc8/0xec spi_remove+0x8c/0xac device_release_driver_internal+0x434/0x5f8 unbind_store+0xbc/0x108 kernfs_fop_write_iter+0x398/0x584 vfs_write+0x728/0xf88 ksys_write+0x110/0x1e4 ret_fast_syscall+0x0/0x1c [...] David Mosberger-Tan initial investigation [1] showed that this use-after-free is due to netdevice unregistration during vif list traversal. When unregistering a net device, since the needs_free_netdev has been set to true during registration, the netdevice object is also freed, and as a consequence, the corresponding vif object too, since it is attached to it as private netdevice data. The next occurrence of the loop then tries to access freed vif pointer to the list to move forward in the list. Fix this use-after-free thanks to two mechanisms: - navigate in the list with list_for_each_entry_safe, which allows to safely modify the list as we go through each element. For each element, remove it from the list with list_del_rcu - make sure to wait for RCU grace period end after each vif removal to make sure it is safe to free the corresponding vif too (through unregister_netdev) Since we are in a RCU "modifier" path (not a "reader" path), and because such path is expected not to be concurrent to any other modifier (we are using the vif_mutex lock), we do not need to use RCU list API, that's why we can benefit from list_for_each_entry_safe. [1] https://lore.kernel.org/linux-wireless/ab077dbe58b1ea5de0a3b2ca21f275a07af967d2.camel@egauge.net/ Fixes: 8399918f3056 ("staging: wilc1000: use RCU list to maintain vif interfaces list") Signed-off-by: Alexis Lothoré Signed-off-by: Kalle Valo Link: https://msgid.link/20240212-wilc_rework_deinit-v1-1-9203ae56c27f@bootlin.com Signed-off-by: Sasha Levin --- .../net/wireless/microchip/wilc1000/netdev.c | 28 +++++-------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index 0e6eeeed2e086..b714da48eaa17 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -878,8 +878,7 @@ static const struct net_device_ops wilc_netdev_ops = { void wilc_netdev_cleanup(struct wilc *wilc) { - struct wilc_vif *vif; - int srcu_idx, ifc_cnt = 0; + struct wilc_vif *vif, *vif_tmp; if (!wilc) return; @@ -889,32 +888,19 @@ void wilc_netdev_cleanup(struct wilc *wilc) wilc->firmware = NULL; } - srcu_idx = srcu_read_lock(&wilc->srcu); - list_for_each_entry_rcu(vif, &wilc->vif_list, list) { + list_for_each_entry_safe(vif, vif_tmp, &wilc->vif_list, list) { + mutex_lock(&wilc->vif_mutex); + list_del_rcu(&vif->list); + wilc->vif_num--; + mutex_unlock(&wilc->vif_mutex); + synchronize_srcu(&wilc->srcu); if (vif->ndev) unregister_netdev(vif->ndev); } - srcu_read_unlock(&wilc->srcu, srcu_idx); wilc_wfi_deinit_mon_interface(wilc, false); destroy_workqueue(wilc->hif_workqueue); - while (ifc_cnt < WILC_NUM_CONCURRENT_IFC) { - mutex_lock(&wilc->vif_mutex); - if (wilc->vif_num <= 0) { - mutex_unlock(&wilc->vif_mutex); - break; - } - vif = wilc_get_wl_to_vif(wilc); - if (!IS_ERR(vif)) - list_del_rcu(&vif->list); - - wilc->vif_num--; - mutex_unlock(&wilc->vif_mutex); - synchronize_srcu(&wilc->srcu); - ifc_cnt++; - } - wilc_wlan_cfg_deinit(wilc); wlan_deinit_locks(wilc); wiphy_unregister(wilc->wiphy); -- GitLab From fad9bcd4d754cc689c19dc04d2c44b82c1a5d6c8 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 13 Feb 2024 01:41:58 +0100 Subject: [PATCH 0889/2290] ACPI: processor_idle: Fix memory leak in acpi_processor_power_exit() [ Upstream commit e18afcb7b2a12b635ac10081f943fcf84ddacc51 ] After unregistering the CPU idle device, the memory associated with it is not freed, leading to a memory leak: unreferenced object 0xffff896282f6c000 (size 1024): comm "swapper/0", pid 1, jiffies 4294893170 hex dump (first 32 bytes): 00 00 00 00 0b 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 8836a742): [] kmalloc_trace+0x29d/0x340 [] acpi_processor_power_init+0xf3/0x1c0 [] __acpi_processor_start+0xd3/0xf0 [] acpi_processor_start+0x2c/0x50 [] really_probe+0xe2/0x480 [] __driver_probe_device+0x78/0x160 [] driver_probe_device+0x1f/0x90 [] __driver_attach+0xce/0x1c0 [] bus_for_each_dev+0x70/0xc0 [] bus_add_driver+0x112/0x210 [] driver_register+0x55/0x100 [] acpi_processor_driver_init+0x3b/0xc0 [] do_one_initcall+0x41/0x300 [] kernel_init_freeable+0x320/0x470 [] kernel_init+0x16/0x1b0 [] ret_from_fork+0x2d/0x50 Fix this by freeing the CPU idle device after unregistering it. Fixes: 3d339dcbb56d ("cpuidle / ACPI : move cpuidle_device field out of the acpi_processor_power structure") Signed-off-by: Armin Wolf Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/processor_idle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index fc5b5b2c9e819..6f613eef28879 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1431,6 +1431,8 @@ int acpi_processor_power_exit(struct acpi_processor *pr) acpi_processor_registered--; if (acpi_processor_registered == 0) cpuidle_unregister_driver(&acpi_idle_driver); + + kfree(dev); } pr->flags.power_setup_done = 0; -- GitLab From 031d2acc4217dc953769f2c324787b39a8aebbaa Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 16 Feb 2024 10:02:37 +0000 Subject: [PATCH 0890/2290] bus: tegra-aconnect: Update dependency to ARCH_TEGRA [ Upstream commit 4acd21a45c1446277e2abaece97d7fa7c2e692a9 ] Update the architecture dependency to be the generic Tegra because the driver works on the four latest Tegra generations not just Tegra210, if you build a kernel with a specific ARCH_TEGRA_xxx_SOC option that excludes Tegra210 you don't get this driver. Fixes: 46a88534afb59 ("bus: Add support for Tegra ACONNECT") Signed-off-by: Peter Robinson Cc: Jon Hunter Cc: Thierry Reding Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/bus/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 7bfe998f3514a..bdc7633905504 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -186,11 +186,12 @@ config SUNXI_RSB config TEGRA_ACONNECT tristate "Tegra ACONNECT Bus Driver" - depends on ARCH_TEGRA_210_SOC + depends on ARCH_TEGRA depends on OF && PM help Driver for the Tegra ACONNECT bus which is used to interface with - the devices inside the Audio Processing Engine (APE) for Tegra210. + the devices inside the Audio Processing Engine (APE) for + Tegra210 and later. config TEGRA_GMI tristate "Tegra Generic Memory Interface bus driver" -- GitLab From 386c2487754ffd6f7db0f8bee2c5dc4019385443 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 22 Jan 2024 17:34:00 -0600 Subject: [PATCH 0891/2290] iommu/amd: Mark interrupt as managed [ Upstream commit 0feda94c868d396fac3b3cb14089d2d989a07c72 ] On many systems that have an AMD IOMMU the following sequence of warnings is observed during bootup. ``` pci 0000:00:00.2 can't derive routing for PCI INT A pci 0000:00:00.2: PCI INT A: not connected ``` This series of events happens because of the IOMMU initialization sequence order and the lack of _PRT entries for the IOMMU. During initialization the IOMMU driver first enables the PCI device using pci_enable_device(). This will call acpi_pci_irq_enable() which will check if the interrupt is declared in a PCI routing table (_PRT) entry. According to the PCI spec [1] these routing entries are only required under PCI root bridges: The _PRT object is required under all PCI root bridges The IOMMU is directly connected to the root complex, so there is no parent bridge to look for a _PRT entry. The first warning is emitted since no entry could be found in the hierarchy. The second warning is then emitted because the interrupt hasn't yet been configured to any value. The pin was configured in pci_read_irq() but the byte in PCI_INTERRUPT_LINE return 0xff which means "Unknown". After that sequence of events pci_enable_msi() is called and this will allocate an interrupt. That is both of these warnings are totally harmless because the IOMMU uses MSI for interrupts. To avoid even trying to probe for a _PRT entry mark the IOMMU as IRQ managed. This avoids both warnings. Link: https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/06_Device_Configuration/Device_Configuration.html?highlight=_prt#prt-pci-routing-table [1] Signed-off-by: Mario Limonciello Fixes: cffe0a2b5a34 ("x86, irq: Keep balance of IOAPIC pin reference count") Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20240122233400.1802-1-mario.limonciello@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f6e64c9858021..cc94ac6662339 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2047,6 +2047,9 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) /* Prevent binding other PCI device drivers to IOMMU devices */ iommu->dev->match_driver = false; + /* ACPI _PRT won't have an IRQ for IOMMU */ + iommu->dev->irq_managed = 1; + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); -- GitLab From 093cec79f0bbbe59e2ac14e37709d1d094351954 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:05:37 +0100 Subject: [PATCH 0892/2290] wifi: brcmsmac: avoid function pointer casts [ Upstream commit e1ea6db35fc3ba5ff063f097385e9f7a88c25356 ] An old cleanup went a little too far and causes a warning with clang-16 and higher as it breaks control flow integrity (KCFI) rules: drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c:64:34: error: cast from 'void (*)(struct brcms_phy *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 64 | brcms_init_timer(physhim->wl, (void (*)(void *))fn, | ^~~~~~~~~~~~~~~~~~~~ Change this one instance back to passing a void pointer so it can be used with the timer callback interface. Fixes: d89a4c80601d ("staging: brcm80211: removed void * from softmac phy") Signed-off-by: Arnd Bergmann Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240213100548.457854-1-arnd@kernel.org Signed-off-by: Sasha Levin --- .../net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c | 3 ++- drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c | 5 ++--- drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c index ccc621b8ed9f2..4a1fe982a948e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c @@ -383,8 +383,9 @@ struct shared_phy *wlc_phy_shared_attach(struct shared_phy_params *shp) return sh; } -static void wlc_phy_timercb_phycal(struct brcms_phy *pi) +static void wlc_phy_timercb_phycal(void *ptr) { + struct brcms_phy *pi = ptr; uint delay = 5; if (PHY_PERICAL_MPHASE_PENDING(pi)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c index a0de5db0cd646..b723817915365 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.c @@ -57,12 +57,11 @@ void wlc_phy_shim_detach(struct phy_shim_info *physhim) } struct wlapi_timer *wlapi_init_timer(struct phy_shim_info *physhim, - void (*fn)(struct brcms_phy *pi), + void (*fn)(void *pi), void *arg, const char *name) { return (struct wlapi_timer *) - brcms_init_timer(physhim->wl, (void (*)(void *))fn, - arg, name); + brcms_init_timer(physhim->wl, fn, arg, name); } void wlapi_free_timer(struct wlapi_timer *t) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h index dd8774717adee..27d0934e600ed 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy_shim.h @@ -131,7 +131,7 @@ void wlc_phy_shim_detach(struct phy_shim_info *physhim); /* PHY to WL utility functions */ struct wlapi_timer *wlapi_init_timer(struct phy_shim_info *physhim, - void (*fn)(struct brcms_phy *pi), + void (*fn)(void *pi), void *arg, const char *name); void wlapi_free_timer(struct wlapi_timer *t); void wlapi_add_timer(struct wlapi_timer *t, uint ms, int periodic); -- GitLab From f95febbffe98601b748eda9eae3eec082b1c39e4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 8 Jan 2024 14:12:15 +0100 Subject: [PATCH 0893/2290] arm64: dts: qcom: sdm845-db845c: correct PCIe wake-gpios [ Upstream commit 584a327c5cffc36369b2a8953d9448826240f1ac ] Bindings allow a "wake", not "enable", GPIO. Schematics also use WAKE name for the pin: sdm845-db845c.dtb: pcie@1c00000: Unevaluated properties are not allowed ('enable-gpio' was unexpected) Fixes: 4a657c264b78 ("arm64: dts: qcom: db845c: Enable PCIe controllers") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240108131216.53867-1-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index 135ff4368c4a6..5c04c91b0ee2b 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -532,7 +532,7 @@ &pcie0 { status = "okay"; perst-gpios = <&tlmm 35 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 134 GPIO_ACTIVE_HIGH>; + wake-gpios = <&tlmm 134 GPIO_ACTIVE_HIGH>; vddpe-3v3-supply = <&pcie0_3p3v_dual>; -- GitLab From 9c23056893a4e359146c392f70808dc4362fa299 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Nov 2023 17:42:26 +0100 Subject: [PATCH 0894/2290] arm64: dts: qcom: sm8150: use 'gpios' suffix for PCI GPIOs [ Upstream commit af6f6778d34cb40e60368e288767f674cc0c5f60 ] Linux handles both versions, but bindings expect GPIO properties to have 'gpios' suffix instead of 'gpio': sa8155p-adp.dtb: pci@1c00000: Unevaluated properties are not allowed ('perst-gpio' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231111164229.63803-3-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 7c38989d0f7a ("arm64: dts: qcom: sm8150: correct PCIe wake-gpios") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index eb1a9369926d2..b829a9ebc5670 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -1822,7 +1822,7 @@ phys = <&pcie0_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 35 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; enable-gpio = <&tlmm 37 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; @@ -1925,7 +1925,7 @@ phys = <&pcie1_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 102 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 102 GPIO_ACTIVE_HIGH>; enable-gpio = <&tlmm 104 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; -- GitLab From 88611c1fdca4e9dc85c80a94863b69363147f9cb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 8 Jan 2024 14:12:16 +0100 Subject: [PATCH 0895/2290] arm64: dts: qcom: sm8150: correct PCIe wake-gpios [ Upstream commit 7c38989d0f7a35c83e7c4781271d42662903fa8d ] Bindings allow a "wake", not "enable", GPIO. Schematics also use WAKE name for the pin: sa8155p-adp.dtb: pcie@1c00000: Unevaluated properties are not allowed ('enable-gpio' was unexpected) Fixes: a1c86c680533 ("arm64: dts: qcom: sm8150: Add PCIe nodes") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240108131216.53867-2-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index b829a9ebc5670..9dccecd9fcaef 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -1823,7 +1823,7 @@ phy-names = "pciephy"; perst-gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; - enable-gpio = <&tlmm 37 GPIO_ACTIVE_HIGH>; + wake-gpios = <&tlmm 37 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie0_default_state>; -- GitLab From 2b718bb18f5fe24ada8311ad67e89141a0c062b0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 13 Feb 2024 23:39:47 +0100 Subject: [PATCH 0896/2290] powercap: dtpm_cpu: Fix error check against freq_qos_add_request() [ Upstream commit b50155cb0d609437236c88201206267835c6f965 ] The caller of the function freq_qos_add_request() checks again a non zero value but freq_qos_add_request() can return '1' if the request already exists. Therefore, the setup function fails while the QoS request actually did not failed. Fix that by changing the check against a negative value like all the other callers of the function. Fixes: 0e8f68d7f0485 ("Add CPU energy model based support") Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/powercap/dtpm_cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 9193c3b8edebe..ae7ee611978ba 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -219,7 +219,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) ret = freq_qos_add_request(&policy->constraints, &dtpm_cpu->qos_req, FREQ_QOS_MAX, pd->table[pd->nr_perf_states - 1].frequency); - if (ret) + if (ret < 0) goto out_dtpm_unregister; cpufreq_cpu_put(policy); -- GitLab From 4c51575705d2c2bc7d04be26079e204ee5be6f23 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Feb 2024 17:31:04 -0500 Subject: [PATCH 0897/2290] net: ena: Remove ena_select_queue [ Upstream commit 78e886ba2b549945ecada055ee0765f0ded5707a ] Avoid the following warnings by removing the ena_select_queue() function and rely on the net core to do the queue selection, The issue happen when an skb received from an interface with more queues than ena is forwarded to the ena interface. [ 1176.159959] eth0 selects TX queue 11, but real number of TX queues is 8 [ 1176.863976] eth0 selects TX queue 14, but real number of TX queues is 8 [ 1180.767877] eth0 selects TX queue 14, but real number of TX queues is 8 [ 1188.703742] eth0 selects TX queue 14, but real number of TX queues is 8 Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Kamal Heib Reviewed-by: Jacob Keller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 044b8afde69a0..9e82e7b9c3b72 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3174,22 +3174,6 @@ error_drop_packet: return NETDEV_TX_OK; } -static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - u16 qid; - /* we suspect that this is good for in--kernel network services that - * want to loop incoming skb rx to tx in normal user generated traffic, - * most probably we will not get to this - */ - if (skb_rx_queue_recorded(skb)) - qid = skb_get_rx_queue(skb); - else - qid = netdev_pick_tx(dev, skb, NULL); - - return qid; -} - static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -3359,7 +3343,6 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_open = ena_open, .ndo_stop = ena_close, .ndo_start_xmit = ena_start_xmit, - .ndo_select_queue = ena_select_queue, .ndo_get_stats64 = ena_get_stats64, .ndo_tx_timeout = ena_tx_timeout, .ndo_change_mtu = ena_change_mtu, -- GitLab From c20211d3df54e410f6ce097959174132594a6add Mon Sep 17 00:00:00 2001 From: Hsin-Te Yuan Date: Wed, 24 Jan 2024 07:51:57 +0000 Subject: [PATCH 0898/2290] arm64: dts: mt8195-cherry-tomato: change watchdog reset boot flow [ Upstream commit ef569d5db50e7edd709e482157769a5b3c367e22 ] The external output reset signal was originally disabled and sent from firmware. However, an unfixed bug in the firmware on tomato prevents the signal from being sent, causing the device to fail to boot. To fix this, enable external output reset signal to allow the device to reboot normally. Fixes: 5eb2e303ec6b ("arm64: dts: mediatek: Introduce MT8195 Cherry platform's Tomato") Signed-off-by: Hsin-Te Yuan Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240124-send-upstream-v3-1-5097c9862a73@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts | 4 ++++ arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts | 4 ++++ arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts index 3348ba69ff6cf..d86d193e5a75e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r1.dts @@ -13,3 +13,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts index 4669e9d917f8c..5356f53308e24 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r2.dts @@ -33,3 +33,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts index 5021edd02f7c1..fca3606cb951e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry-tomato-r3.dts @@ -34,3 +34,7 @@ &ts_10 { status = "okay"; }; + +&watchdog { + /delete-property/ mediatek,disable-extrst; +}; -- GitLab From 0d276d9f335f41d6524258d58c0c0241ef9a83a4 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 26 Jan 2024 12:23:25 +0000 Subject: [PATCH 0899/2290] firmware: arm_scmi: Fix double free in SMC transport cleanup path [ Upstream commit f1d71576d2c9ec8fdb822173fa7f3de79475e9bd ] When the generic SCMI code tears down a channel, it calls the chan_free callback function, defined by each transport. Since multiple protocols might share the same transport_info member, chan_free() might want to clean up the same member multiple times within the given SCMI transport implementation. In this case, it is SMC transport. This will lead to a NULL pointer dereference at the second time: | scmi_protocol scmi_dev.1: Enabled polling mode TX channel - prot_id:16 | arm-scmi firmware:scmi: SCMI Notifications - Core Enabled. | arm-scmi firmware:scmi: unable to communicate with SCMI | Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 | Mem abort info: | ESR = 0x0000000096000004 | EC = 0x25: DABT (current EL), IL = 32 bits | SET = 0, FnV = 0 | EA = 0, S1PTW = 0 | FSC = 0x04: level 0 translation fault | Data abort info: | ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 | CM = 0, WnR = 0, TnD = 0, TagAccess = 0 | GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 | user pgtable: 4k pages, 48-bit VAs, pgdp=0000000881ef8000 | [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 | Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP | Modules linked in: | CPU: 4 PID: 1 Comm: swapper/0 Not tainted 6.7.0-rc2-00124-g455ef3d016c9-dirty #793 | Hardware name: FVP Base RevC (DT) | pstate: 61400009 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) | pc : smc_chan_free+0x3c/0x6c | lr : smc_chan_free+0x3c/0x6c | Call trace: | smc_chan_free+0x3c/0x6c | idr_for_each+0x68/0xf8 | scmi_cleanup_channels.isra.0+0x2c/0x58 | scmi_probe+0x434/0x734 | platform_probe+0x68/0xd8 | really_probe+0x110/0x27c | __driver_probe_device+0x78/0x12c | driver_probe_device+0x3c/0x118 | __driver_attach+0x74/0x128 | bus_for_each_dev+0x78/0xe0 | driver_attach+0x24/0x30 | bus_add_driver+0xe4/0x1e8 | driver_register+0x60/0x128 | __platform_driver_register+0x28/0x34 | scmi_driver_init+0x84/0xc0 | do_one_initcall+0x78/0x33c | kernel_init_freeable+0x2b8/0x51c | kernel_init+0x24/0x130 | ret_from_fork+0x10/0x20 | Code: f0004701 910a0021 aa1403e5 97b91c70 (b9400280) | ---[ end trace 0000000000000000 ]--- Simply check for the struct pointer being NULL before trying to access its members, to avoid this situation. This was found when a transport doesn't really work (for instance no SMC service), the probe routines then tries to clean up, and triggers a crash. Signed-off-by: Andre Przywara Fixes: 1dc6558062da ("firmware: arm_scmi: Add smc/hvc transport") Reviewed-by: Cristian Marussi Link: https://lore.kernel.org/r/20240126122325.2039669-1-andre.przywara@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/smc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c index ac0bd51ef16a2..42ea308a2c1d5 100644 --- a/drivers/firmware/arm_scmi/smc.c +++ b/drivers/firmware/arm_scmi/smc.c @@ -171,6 +171,13 @@ static int smc_chan_free(int id, void *p, void *data) struct scmi_chan_info *cinfo = p; struct scmi_smc *scmi_info = cinfo->transport_info; + /* + * Different protocols might share the same chan info, so a previous + * smc_chan_free call might have already freed the structure. + */ + if (!scmi_info) + return 0; + /* Ignore any possible further reception on the IRQ path */ if (scmi_info->irq > 0) free_irq(scmi_info->irq, scmi_info); -- GitLab From 3936e0f81ac4c042e461a35ea8658bf5dbd8b9e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Sat, 17 Feb 2024 14:22:41 +0100 Subject: [PATCH 0900/2290] wifi: wilc1000: revert reset line logic flip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f3ec643947634bed41b97bd56b248f7c78498eab ] This reverts commit fcf690b0b47494df51d214db5c5a714a400b0257. When using a wilc1000 chip over a spi bus, users can optionally define a reset gpio and a chip enable gpio. The reset line of wilc1000 is active low, so to hold the chip in reset, a low (physical) value must be applied. The corresponding device tree binding documentation was introduced by commit f31ee3c0a555 ("wilc1000: Document enable-gpios and reset-gpios properties") and correctly indicates that the reset line is an active-low signal. The corresponding driver part, brought by commit ec031ac4792c ("wilc1000: Add reset/enable GPIO support to SPI driver") was applying the correct logic. But commit fcf690b0b474 ("wifi: wilc1000: use correct sequence of RESET for chip Power-UP/Down") eventually flipped this logic and started misusing the gpiod APIs, applying an inverted logic when powering up/down the chip (for example, setting the reset line to a logic "1" during power up, which in fact asserts the reset line when device tree describes the reset line as GPIO_ACTIVE_LOW). As a consequence, any platform currently using the driver in SPI mode must use a faulty reset line description in device tree, or else chip will be maintained in reset and will not even allow to bring up the chip. Fix reset line usage by inverting back the gpiod APIs usage, setting the reset line to the logic value "0" when powering the chip, and the logic value "1" when powering off the chip. Fixes: fcf690b0b474 ("wifi: wilc1000: use correct sequence of RESET for chip Power-UP/Down") Signed-off-by: Alexis Lothoré Acked-by: Conor Dooley Acked-by: Ajay Singh Signed-off-by: Kalle Valo Link: https://msgid.link/20240217-wilc_1000_reset_line-v2-1-b216f433d7d5@bootlin.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/spi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c index b0fc5e68feeca..5877e2c1fa0fc 100644 --- a/drivers/net/wireless/microchip/wilc1000/spi.c +++ b/drivers/net/wireless/microchip/wilc1000/spi.c @@ -191,11 +191,11 @@ static void wilc_wlan_power(struct wilc *wilc, bool on) /* assert ENABLE: */ gpiod_set_value(gpios->enable, 1); mdelay(5); - /* assert RESET: */ - gpiod_set_value(gpios->reset, 1); - } else { /* deassert RESET: */ gpiod_set_value(gpios->reset, 0); + } else { + /* assert RESET: */ + gpiod_set_value(gpios->reset, 1); /* deassert ENABLE: */ gpiod_set_value(gpios->enable, 0); } -- GitLab From 5425ac2428b34b6ca483e81aeee7ae93832e027b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Aug 2023 17:03:04 +0200 Subject: [PATCH 0901/2290] ARM: dts: arm: realview: Fix development chip ROM compatible value [ Upstream commit 3baa4c5143d65ebab2de0d99a395e5f4f1f46608 ] When the development chip ROM was added, the "direct-mapped" compatible value was already obsolete. In addition, the device node lacked the accompanying "probe-type" property, causing the old physmap_of_core driver to fall back to trying all available probe types. Unfortunately this fallback was lost when the DT and pdata cases were merged. Fix this by using the modern "mtd-rom" compatible value instead. Fixes: 5c3f5edbe0a1dff3 ("ARM: realview: add flash devices to the PB1176 DTS") Fixes: 642b1e8dbed7bbbf ("mtd: maps: Merge physmap_of.c into physmap-core.c") Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/boot/dts/arm-realview-pb1176.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts index efed325af88d2..d99bac02232b3 100644 --- a/arch/arm/boot/dts/arm-realview-pb1176.dts +++ b/arch/arm/boot/dts/arm-realview-pb1176.dts @@ -451,7 +451,7 @@ /* Direct-mapped development chip ROM */ pb1176_rom@10200000 { - compatible = "direct-mapped"; + compatible = "mtd-rom"; reg = <0x10200000 0x4000>; bank-width = <1>; }; -- GitLab From 96132cc2e159ccaa0619f11975016dc7d7694572 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 25 Oct 2022 23:06:29 +0100 Subject: [PATCH 0902/2290] arm64: dts: renesas: r9a07g043: Split out RZ/G2UL SoC specific parts [ Upstream commit b9a0be2054964026aa58966ce9724b672f210835 ] Move RZ/G2UL SoC specific parts to r9a07g043u.dtsi so that r9a07g043.dtsi can be shared with RZ/Five (RISC-V SoC). Below are the changes due to which SoC specific parts are moved to r9a07g043u.dtsi: - RZ/G2UL has Cortex-A55 (ARM64) whereas RZ/Five has AX45MP (RISC-V), - RZ/G2UL has GICv3 as interrupt controller whereas RZ/Five has PLIC, - RZ/G2UL has interrupts for SYSC block whereas interrupts are missing for SYSC block on RZ/Five, - RZ/G2UL has armv8-timer whereas RZ/Five has riscv-timer, - RZ/G2UL has PSCI whereas RZ/Five have OpenSBI. Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20221025220629.79321-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: 14fe225dd5fc ("arm64: dts: renesas: rzg2l: Add missing interrupts to IRQC nodes") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g043.dtsi | 54 +------------------ arch/arm64/boot/dts/renesas/r9a07g043u.dtsi | 60 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 53 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r9a07g043.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043.dtsi index a4738842f0646..7f88395ff7997 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043.dtsi @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* - * Device Tree Source for the RZ/G2UL SoC + * Device Tree Source for the RZ/Five and RZ/G2UL SoCs * * Copyright (C) 2022 Renesas Electronics Corp. */ @@ -68,36 +68,8 @@ }; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu0: cpu@0 { - compatible = "arm,cortex-a55"; - reg = <0>; - device_type = "cpu"; - #cooling-cells = <2>; - next-level-cache = <&L3_CA55>; - enable-method = "psci"; - clocks = <&cpg CPG_CORE R9A07G043_CLK_I>; - operating-points-v2 = <&cluster0_opp>; - }; - - L3_CA55: cache-controller-0 { - compatible = "cache"; - cache-unified; - cache-size = <0x40000>; - }; - }; - - psci { - compatible = "arm,psci-1.0", "arm,psci-0.2"; - method = "smc"; - }; - soc: soc { compatible = "simple-bus"; - interrupt-parent = <&gic>; #address-cells = <2>; #size-cells = <2>; ranges; @@ -545,12 +517,6 @@ sysc: system-controller@11020000 { compatible = "renesas,r9a07g043-sysc"; reg = <0 0x11020000 0 0x10000>; - interrupts = , - , - , - ; - interrupt-names = "lpm_int", "ca55stbydone_int", - "cm33stbyr_int", "ca55_deny"; status = "disabled"; }; @@ -603,16 +569,6 @@ dma-channels = <16>; }; - gic: interrupt-controller@11900000 { - compatible = "arm,gic-v3"; - #interrupt-cells = <3>; - #address-cells = <0>; - interrupt-controller; - reg = <0x0 0x11900000 0 0x40000>, - <0x0 0x11940000 0 0x60000>; - interrupts = ; - }; - sdhi0: mmc@11c00000 { compatible = "renesas,sdhi-r9a07g043", "renesas,rcar-gen3-sdhi"; @@ -893,12 +849,4 @@ }; }; }; - - timer { - compatible = "arm,armv8-timer"; - interrupts-extended = <&gic GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, - <&gic GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>; - }; }; diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi index 96f935bc2d4d1..b8bf06b512351 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi @@ -10,3 +10,63 @@ #define SOC_PERIPHERAL_IRQ(nr) GIC_SPI nr #include "r9a07g043.dtsi" + +/ { + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + compatible = "arm,cortex-a55"; + reg = <0>; + device_type = "cpu"; + #cooling-cells = <2>; + next-level-cache = <&L3_CA55>; + enable-method = "psci"; + clocks = <&cpg CPG_CORE R9A07G043_CLK_I>; + operating-points-v2 = <&cluster0_opp>; + }; + + L3_CA55: cache-controller-0 { + compatible = "cache"; + cache-unified; + cache-size = <0x40000>; + }; + }; + + psci { + compatible = "arm,psci-1.0", "arm,psci-0.2"; + method = "smc"; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts-extended = <&gic GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>, + <&gic GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(1) | IRQ_TYPE_LEVEL_LOW)>; + }; +}; + +&soc { + interrupt-parent = <&gic>; + + gic: interrupt-controller@11900000 { + compatible = "arm,gic-v3"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x11900000 0 0x40000>, + <0x0 0x11940000 0 0x60000>; + interrupts = ; + }; +}; + +&sysc { + interrupts = , + , + , + ; + interrupt-names = "lpm_int", "ca55stbydone_int", + "cm33stbyr_int", "ca55_deny"; +}; -- GitLab From 7f1d9f8bde624d34c6af527e043b7e57630a43d4 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 2 Jan 2023 22:18:13 +0000 Subject: [PATCH 0903/2290] arm64: dts: renesas: r9a07g043u: Add IRQC node [ Upstream commit 48ab6eddd8bbcf7e9c8ae27bf42d0b52a777aaba ] Add IRQC node to R9A07G043 (RZ/G2UL) SoC DTSI. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230102221815.273719-5-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: 14fe225dd5fc ("arm64: dts: renesas: rzg2l: Add missing interrupts to IRQC nodes") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g043u.dtsi | 68 +++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi index b8bf06b512351..a6e777aee02ee 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi @@ -51,6 +51,74 @@ &soc { interrupt-parent = <&gic>; + irqc: interrupt-controller@110a0000 { + compatible = "renesas,r9a07g043u-irqc", + "renesas,rzg2l-irqc"; + reg = <0 0x110a0000 0 0x10000>; + #interrupt-cells = <2>; + #address-cells = <0>; + interrupt-controller; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", + "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err"; + clocks = <&cpg CPG_MOD R9A07G043_IA55_CLK>, + <&cpg CPG_MOD R9A07G043_IA55_PCLK>; + clock-names = "clk", "pclk"; + power-domains = <&cpg>; + resets = <&cpg R9A07G043_IA55_RESETN>; + }; + gic: interrupt-controller@11900000 { compatible = "arm,gic-v3"; #interrupt-cells = <3>; -- GitLab From 76cfe86f2d20ce5e4b7d746ee1d140d10c3276e2 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 5 Feb 2024 14:44:20 +0000 Subject: [PATCH 0904/2290] arm64: dts: renesas: rzg2l: Add missing interrupts to IRQC nodes [ Upstream commit 14fe225dd5fcd5928583b0bcc34398a581f51602 ] The IRQC IP block supports Bus error and ECCRAM interrupts on RZ/G2L and alike SoC's (listed below). Update the IRQC nodes with the missing interrupts, and additionally, include the 'interrupt-names' properties in the IRQC nodes so that the driver can parse interrupts by name. - R9A07G043U - RZ/G2UL - R9A07G044L/R9A07G044LC - RZ/{G2L,G2LC} - R9A07G054 - RZ/V2L Fixes: 5edc51af5b30 ("arm64: dts: renesas: r9a07g044: Add IRQC node") Fixes: 48ab6eddd8bb ("arm64: dts: renesas: r9a07g043u: Add IRQC node") Fixes: 379478ab09e0 ("arm64: dts: renesas: r9a07g054: Add IRQC node") Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240205144421.51195-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r9a07g043u.dtsi | 12 +++++++++-- arch/arm64/boot/dts/renesas/r9a07g044.dtsi | 22 ++++++++++++++++++++- arch/arm64/boot/dts/renesas/r9a07g054.dtsi | 22 ++++++++++++++++++++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi index a6e777aee02ee..011d4c88f4ed9 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g043u.dtsi @@ -99,7 +99,13 @@ , , , - ; + , + , + , + , + , + , + ; interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3", "irq4", "irq5", "irq6", "irq7", @@ -111,7 +117,9 @@ "tint20", "tint21", "tint22", "tint23", "tint24", "tint25", "tint26", "tint27", "tint28", "tint29", "tint30", "tint31", - "bus-err"; + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; clocks = <&cpg CPG_MOD R9A07G043_IA55_CLK>, <&cpg CPG_MOD R9A07G043_IA55_PCLK>; clock-names = "clk", "pclk"; diff --git a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi index 7dbf6a6292f49..d26488b5a82df 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi @@ -698,7 +698,27 @@ , , , - ; + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>, <&cpg CPG_MOD R9A07G044_IA55_PCLK>; clock-names = "clk", "pclk"; diff --git a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi index e000510b90a42..b3d37ca942ee3 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi @@ -704,7 +704,27 @@ , , , - ; + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; clocks = <&cpg CPG_MOD R9A07G054_IA55_CLK>, <&cpg CPG_MOD R9A07G054_IA55_PCLK>; clock-names = "clk", "pclk"; -- GitLab From efab55e16c55c637f74ff58dcfddd18e0b3b56b6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 24 Oct 2022 12:03:52 +0200 Subject: [PATCH 0905/2290] arm64: dts: renesas: r8a779a0: Update to R-Car Gen4 compatible values [ Upstream commit a1ca409cc050166a9e8ed183c1d4192f511cf6a2 ] Despite the name, R-Car V3U is the first member of the R-Car Gen4 family. Hence update the compatible properties in various device nodes to include family-specific compatible values for R-Car Gen4 instead of R-Car Gen3: - EtherAVB, - MSIOF. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/387168aef20d399d4f4318f4ecab9c3b016fd6f2.1666605756.git.geert+renesas@glider.be Stable-dep-of: 0c51912331f8 ("arm64: dts: renesas: r8a779a0: Correct avb[01] reg sizes") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a779a0.dtsi | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi index ed9400f903c9e..41fbb9998cf82 100644 --- a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi @@ -656,7 +656,7 @@ avb0: ethernet@e6800000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6800000 0 0x800>; interrupts = , , @@ -704,7 +704,7 @@ avb1: ethernet@e6810000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6810000 0 0x800>; interrupts = , , @@ -752,7 +752,7 @@ avb2: ethernet@e6820000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6820000 0 0x1000>; interrupts = , , @@ -800,7 +800,7 @@ avb3: ethernet@e6830000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6830000 0 0x1000>; interrupts = , , @@ -848,7 +848,7 @@ avb4: ethernet@e6840000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6840000 0 0x1000>; interrupts = , , @@ -896,7 +896,7 @@ avb5: ethernet@e6850000 { compatible = "renesas,etheravb-r8a779a0", - "renesas,etheravb-rcar-gen3"; + "renesas,etheravb-rcar-gen4"; reg = <0 0xe6850000 0 0x1000>; interrupts = , , @@ -1019,7 +1019,7 @@ msiof0: spi@e6e90000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6e90000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 618>; @@ -1034,7 +1034,7 @@ msiof1: spi@e6ea0000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6ea0000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 619>; @@ -1049,7 +1049,7 @@ msiof2: spi@e6c00000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c00000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 620>; @@ -1064,7 +1064,7 @@ msiof3: spi@e6c10000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c10000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 621>; @@ -1079,7 +1079,7 @@ msiof4: spi@e6c20000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c20000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 622>; @@ -1094,7 +1094,7 @@ msiof5: spi@e6c28000 { compatible = "renesas,msiof-r8a779a0", - "renesas,rcar-gen3-msiof"; + "renesas,rcar-gen4-msiof"; reg = <0 0xe6c28000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 623>; -- GitLab From bea023d846d352e1fbfdd95337cd8bc2b75323fe Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 11 Feb 2024 15:21:30 +0100 Subject: [PATCH 0906/2290] arm64: dts: renesas: r8a779a0: Correct avb[01] reg sizes [ Upstream commit 0c51912331f8ba5ce5fb52f46e340945160672a3 ] All Ethernet AVB instances on R-Car V3U have registers related to UDP/IP support, but the declared register blocks for the first two instances are too small to cover them. Fix this by extending the register block sizes. Fixes: 5a633320f08b8c9b ("arm64: dts: renesas: r8a779a0: Add Ethernet-AVB support") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/ce6ce3c4b1495e02e7c1803fca810a7178a84500.1707660323.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a779a0.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi index 41fbb9998cf82..b677ef6705d94 100644 --- a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi @@ -657,7 +657,7 @@ avb0: ethernet@e6800000 { compatible = "renesas,etheravb-r8a779a0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6800000 0 0x800>; + reg = <0 0xe6800000 0 0x1000>; interrupts = , , , @@ -705,7 +705,7 @@ avb1: ethernet@e6810000 { compatible = "renesas,etheravb-r8a779a0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6810000 0 0x800>; + reg = <0 0xe6810000 0 0x1000>; interrupts = , , , -- GitLab From 0820c84be6361af434ddba881d9b1a029165e5f8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 11 Feb 2024 15:21:31 +0100 Subject: [PATCH 0907/2290] arm64: dts: renesas: r8a779g0: Correct avb[01] reg sizes [ Upstream commit 7edbb5880dc3317a5eaec2166de71ff394598e6b ] All Ethernet AVB instances on R-Car V4H have registers related to UDP/IP support, but the declared register blocks for the first two instances are too small to cover them. Fix this by extending the register block sizes. Fixes: 848c82db56923a8b ("arm64: dts: renesas: r8a779g0: Add RAVB nodes") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/83437778614a7c96f4d8f1be98dffeee29bb4a0b.1707660323.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a779g0.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi index d58b18802cb01..868d1a3cbdf61 100644 --- a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi @@ -337,7 +337,7 @@ avb0: ethernet@e6800000 { compatible = "renesas,etheravb-r8a779g0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6800000 0 0x800>; + reg = <0 0xe6800000 0 0x1000>; interrupts = , , , @@ -384,7 +384,7 @@ avb1: ethernet@e6810000 { compatible = "renesas,etheravb-r8a779g0", "renesas,etheravb-rcar-gen4"; - reg = <0 0xe6810000 0 0x800>; + reg = <0 0xe6810000 0 0x1000>; interrupts = , , , -- GitLab From e16c33dd9967b7f20987bf653acc4f605836127b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:54 +0800 Subject: [PATCH 0908/2290] net: mctp: copy skb ext data when fragmenting [ Upstream commit 1394c1dec1c619a46867ed32791a29695372bff8 ] If we're fragmenting on local output, the original packet may contain ext data for the MCTP flows. We'll want this in the resulting fragment skbs too. So, do a skb_ext_copy() in the fragmentation path, and implement the MCTP-specific parts of an ext copy operation. Fixes: 67737c457281 ("mctp: Pass flow data & flow release events to drivers") Reported-by: Jian Zhang Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/skbuff.c | 8 ++++++++ net/mctp/route.c | 3 +++ 2 files changed, 11 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d4bd10f8723df..e38a4c7449f62 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6500,6 +6500,14 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, for (i = 0; i < sp->len; i++) xfrm_state_hold(sp->xvec[i]); } +#endif +#ifdef CONFIG_MCTP_FLOWS + if (old_active & (1 << SKB_EXT_MCTP)) { + struct mctp_flow *flow = skb_ext_get_ptr(old, SKB_EXT_MCTP); + + if (flow->key) + refcount_inc(&flow->key->refs); + } #endif __skb_ext_put(old); return new; diff --git a/net/mctp/route.c b/net/mctp/route.c index 0144d8ebdaefb..05ab4fddc82e9 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -843,6 +843,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, /* copy message payload */ skb_copy_bits(skb, pos, skb_transport_header(skb2), size); + /* we need to copy the extensions, for MCTP flow data */ + skb_ext_copy(skb2, skb); + /* do route */ rc = rt->output(rt, skb2); if (rc) -- GitLab From be52ee92ced6408addbdbb9aa930ee859acc1428 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 5 Dec 2023 10:26:15 -0800 Subject: [PATCH 0909/2290] pstore: inode: Convert mutex usage to guard(mutex) [ Upstream commit e2eeddefb046dbc771a6fa426f7f98fb25adfe68 ] Replace open-coded mutex handling with cleanup.h guard(mutex) and scoped_guard(mutex, ...). Cc: Guilherme G. Piccoli Cc: Tony Luck Cc: Link: https://lore.kernel.org/r/20231205182622.1329923-2-keescook@chromium.org Signed-off-by: Kees Cook Stable-dep-of: a43e0fc5e913 ("pstore: inode: Only d_invalidate() is needed") Signed-off-by: Sasha Levin --- fs/pstore/inode.c | 76 +++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index ffbadb8b3032d..05ddfb37b15e1 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -182,25 +182,21 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = d_inode(dentry)->i_private; struct pstore_record *record = p->record; - int rc = 0; if (!record->psi->erase) return -EPERM; /* Make sure we can't race while removing this file. */ - mutex_lock(&records_list_lock); - if (!list_empty(&p->list)) - list_del_init(&p->list); - else - rc = -ENOENT; - p->dentry = NULL; - mutex_unlock(&records_list_lock); - if (rc) - return rc; - - mutex_lock(&record->psi->read_mutex); - record->psi->erase(record); - mutex_unlock(&record->psi->read_mutex); + scoped_guard(mutex, &records_list_lock) { + if (!list_empty(&p->list)) + list_del_init(&p->list); + else + return -ENOENT; + p->dentry = NULL; + } + + scoped_guard(mutex, &record->psi->read_mutex) + record->psi->erase(record); return simple_unlink(dir, dentry); } @@ -292,19 +288,16 @@ static struct dentry *psinfo_lock_root(void) { struct dentry *root; - mutex_lock(&pstore_sb_lock); + guard(mutex)(&pstore_sb_lock); /* * Having no backend is fine -- no records appear. * Not being mounted is fine -- nothing to do. */ - if (!psinfo || !pstore_sb) { - mutex_unlock(&pstore_sb_lock); + if (!psinfo || !pstore_sb) return NULL; - } root = pstore_sb->s_root; inode_lock(d_inode(root)); - mutex_unlock(&pstore_sb_lock); return root; } @@ -319,19 +312,19 @@ int pstore_put_backend_records(struct pstore_info *psi) if (!root) return 0; - mutex_lock(&records_list_lock); - list_for_each_entry_safe(pos, tmp, &records_list, list) { - if (pos->record->psi == psi) { - list_del_init(&pos->list); - rc = simple_unlink(d_inode(root), pos->dentry); - if (WARN_ON(rc)) - break; - d_drop(pos->dentry); - dput(pos->dentry); - pos->dentry = NULL; + scoped_guard(mutex, &records_list_lock) { + list_for_each_entry_safe(pos, tmp, &records_list, list) { + if (pos->record->psi == psi) { + list_del_init(&pos->list); + rc = simple_unlink(d_inode(root), pos->dentry); + if (WARN_ON(rc)) + break; + d_drop(pos->dentry); + dput(pos->dentry); + pos->dentry = NULL; + } } } - mutex_unlock(&records_list_lock); inode_unlock(d_inode(root)); @@ -355,20 +348,20 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) if (WARN_ON(!inode_is_locked(d_inode(root)))) return -EINVAL; - rc = -EEXIST; + guard(mutex)(&records_list_lock); + /* Skip records that are already present in the filesystem. */ - mutex_lock(&records_list_lock); list_for_each_entry(pos, &records_list, list) { if (pos->record->type == record->type && pos->record->id == record->id && pos->record->psi == record->psi) - goto fail; + return -EEXIST; } rc = -ENOMEM; inode = pstore_get_inode(root->d_sb); if (!inode) - goto fail; + return -ENOMEM; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; scnprintf(name, sizeof(name), "%s-%s-%llu%s", @@ -395,7 +388,6 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) d_add(dentry, inode); list_add(&private->list, &records_list); - mutex_unlock(&records_list_lock); return 0; @@ -403,8 +395,6 @@ fail_private: free_pstore_private(private); fail_inode: iput(inode); -fail: - mutex_unlock(&records_list_lock); return rc; } @@ -450,9 +440,8 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) return -ENOMEM; - mutex_lock(&pstore_sb_lock); - pstore_sb = sb; - mutex_unlock(&pstore_sb_lock); + scoped_guard(mutex, &pstore_sb_lock) + pstore_sb = sb; pstore_get_records(0); @@ -467,17 +456,14 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type, static void pstore_kill_sb(struct super_block *sb) { - mutex_lock(&pstore_sb_lock); + guard(mutex)(&pstore_sb_lock); WARN_ON(pstore_sb && pstore_sb != sb); kill_litter_super(sb); pstore_sb = NULL; - mutex_lock(&records_list_lock); + guard(mutex)(&records_list_lock); INIT_LIST_HEAD(&records_list); - mutex_unlock(&records_list_lock); - - mutex_unlock(&pstore_sb_lock); } static struct file_system_type pstore_fs_type = { -- GitLab From db6e5e16f1ee9e3b01d2f71c7f0ba945f4bf0f4e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Feb 2024 09:48:46 -0800 Subject: [PATCH 0910/2290] pstore: inode: Only d_invalidate() is needed [ Upstream commit a43e0fc5e9134a46515de2f2f8d4100b74e50de3 ] Unloading a modular pstore backend with records in pstorefs would trigger the dput() double-drop warning: WARNING: CPU: 0 PID: 2569 at fs/dcache.c:762 dput.part.0+0x3f3/0x410 Using the combo of d_drop()/dput() (as mentioned in Documentation/filesystems/vfs.rst) isn't the right approach here, and leads to the reference counting problem seen above. Use d_invalidate() and update the code to not bother checking for error codes that can never happen. Suggested-by: Alexander Viro Fixes: 609e28bb139e ("pstore: Remove filesystem records when backend is unregistered") Signed-off-by: Kees Cook --- fs/pstore/inode.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 05ddfb37b15e1..ea3f104371d62 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -306,7 +306,6 @@ int pstore_put_backend_records(struct pstore_info *psi) { struct pstore_private *pos, *tmp; struct dentry *root; - int rc = 0; root = psinfo_lock_root(); if (!root) @@ -316,11 +315,8 @@ int pstore_put_backend_records(struct pstore_info *psi) list_for_each_entry_safe(pos, tmp, &records_list, list) { if (pos->record->psi == psi) { list_del_init(&pos->list); - rc = simple_unlink(d_inode(root), pos->dentry); - if (WARN_ON(rc)) - break; - d_drop(pos->dentry); - dput(pos->dentry); + d_invalidate(pos->dentry); + simple_unlink(d_inode(root), pos->dentry); pos->dentry = NULL; } } @@ -328,7 +324,7 @@ int pstore_put_backend_records(struct pstore_info *psi) inode_unlock(d_inode(root)); - return rc; + return 0; } /* -- GitLab From 4478f7e5be243b49e6c7f4dd67cbdc9b8b8436f6 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:45 +0800 Subject: [PATCH 0911/2290] arm64: dts: allwinner: h6: Add RX DMA channel for SPDIF [ Upstream commit 7b59348c11f3355e284d77bbe3d33632ddadcfc2 ] The SPDIF hardware found on the H6 supports both transmit and receive functions. However it is missing the RX DMA channel. Add the SPDIF hardware block's RX DMA channel. Also remove the by-default pinmux, since the end device can choose to implement either or both functionalities. Fixes: f95b598df419 ("arm64: dts: allwinner: Add SPDIF node for Allwinner H6") Signed-off-by: Chen-Yu Tsai Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20240127163247.384439-6-wens@kernel.org Signed-off-by: Jernej Skrabec Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts | 2 ++ arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi | 2 ++ arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi | 7 +++---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 9ec49ac2f6fd5..381d58cea092d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -291,6 +291,8 @@ }; &spdif { + pinctrl-names = "default"; + pinctrl-0 = <&spdif_tx_pin>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi index 4903d6358112d..855b7d43bc503 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-tanix.dtsi @@ -166,6 +166,8 @@ }; &spdif { + pinctrl-names = "default"; + pinctrl-0 = <&spdif_tx_pin>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index ca1d287a0a01d..d11e5041bae9a 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -406,6 +406,7 @@ function = "spi1"; }; + /omit-if-no-ref/ spdif_tx_pin: spdif-tx-pin { pins = "PH7"; function = "spdif"; @@ -655,10 +656,8 @@ clocks = <&ccu CLK_BUS_SPDIF>, <&ccu CLK_SPDIF>; clock-names = "apb", "spdif"; resets = <&ccu RST_BUS_SPDIF>; - dmas = <&dma 2>; - dma-names = "tx"; - pinctrl-names = "default"; - pinctrl-0 = <&spdif_tx_pin>; + dmas = <&dma 2>, <&dma 2>; + dma-names = "rx", "tx"; status = "disabled"; }; -- GitLab From 23bb0006c9dbf737153956c2069088c3bb2563df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Tue, 14 Mar 2023 19:06:04 +0100 Subject: [PATCH 0912/2290] ARM: dts: imx6dl-yapp4: Move phy reset into switch node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7da7b84fee58c85a6075022023d31edea40e81a1 ] Drop the phy-reset-duration and phy-reset-gpios deprecated properties and move reset-gpios under the switch node. Signed-off-by: Michal Vokáč Signed-off-by: Shawn Guo Stable-dep-of: 023bd910d3ab ("ARM: dts: imx6dl-yapp4: Fix typo in the QCA switch register address") Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index aacbf317feea6..cb1972f8e8d27 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -106,8 +106,6 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii-id"; - phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; - phy-reset-duration = <20>; phy-supply = <&sw2_reg>; status = "okay"; @@ -131,6 +129,7 @@ switch@10 { compatible = "qca,qca8334"; reg = <10>; + reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; switch_ports: ports { #address-cells = <1>; -- GitLab From 28b43ec7e4c00645efec8f7d239d649dfd6f71e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 14 Feb 2024 10:03:27 +0100 Subject: [PATCH 0913/2290] ARM: dts: imx6dl-yapp4: Fix typo in the QCA switch register address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 023bd910d3ab735459f84b22bb99fb9e00bd9d76 ] This change does not have any functional effect. The switch works just fine without this patch as it has full access to all the addresses on the bus. This is simply a clean-up to set the node name address and reg address to the same value. Fixes: 15b43e497ffd ("ARM: dts: imx6dl-yapp4: Use correct pseudo PHY address for the switch") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index cb1972f8e8d27..a655b945bf2df 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -128,7 +128,7 @@ switch@10 { compatible = "qca,qca8334"; - reg = <10>; + reg = <0x10>; reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; switch_ports: ports { -- GitLab From 681ba22555dc538a5073dc1491db8fd016a2eb95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Wed, 14 Feb 2024 10:03:28 +0100 Subject: [PATCH 0914/2290] ARM: dts: imx6dl-yapp4: Move the internal switch PHYs under the switch node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 79978bff2e4b8e05ebdf5fc3ee6b794002393484 ] We identified that the PHYs actually do not work since commit 7da7b84fee58 ("ARM: dts: imx6dl-yapp4: Move phy reset into switch node") as a coincidence of several circumstances. The reset signal is kept asserted by a pull-down resistor on the board unless it is deasserted by GPIO from the SoC. This is to keep the switch dead until it is configured properly by the kernel and user space. Prior to the referenced commit the switch was reset by the FEC driver and the reset GPIO was actively deasserted. The mdio-bus was scanned and the attached switch and its PHYs were found and configured. With the referenced commit the switch is reset by the qca8k driver. Because of another bug in the qca8k driver, functionality of the reset pin depends on its pre-kernel configuration. See commit c44fc98f0a8f ("net: dsa: qca8k: fix illegal usage of GPIO") The problem did not appear until we removed support for the switch and configuration of its reset pin from the bootloader. To fix that, properly describe the internal mdio-bus configuration of the qca8334 switch. The PHYs are internal to the switch and sit on its internal mdio-bus. Fixes: 7da7b84fee58 ("ARM: dts: imx6dl-yapp4: Move phy reset into switch node") Signed-off-by: Michal Vokáč Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 23 ++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index a655b945bf2df..4b7aee8958923 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -118,14 +118,6 @@ #address-cells = <1>; #size-cells = <0>; - phy_port2: phy@1 { - reg = <1>; - }; - - phy_port3: phy@2 { - reg = <2>; - }; - switch@10 { compatible = "qca,qca8334"; reg = <0x10>; @@ -150,15 +142,30 @@ eth2: port@2 { reg = <2>; label = "eth2"; + phy-mode = "internal"; phy-handle = <&phy_port2>; }; eth1: port@3 { reg = <3>; label = "eth1"; + phy-mode = "internal"; phy-handle = <&phy_port3>; }; }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + phy_port2: ethernet-phy@1 { + reg = <1>; + }; + + phy_port3: ethernet-phy@2 { + reg = <2>; + }; + }; }; }; }; -- GitLab From 24cc77b670ad74b67ff814d84dc87ce9fd2f5551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 23 Jan 2024 13:22:58 +0100 Subject: [PATCH 0915/2290] arm64: dts: marvell: reorder crypto interrupts on Armada SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ec55a22149d64f9ac41845d923b884d4a666bf4d ] Match order specified in binding documentation. It says "mem" should be the last interrupt. This fixes: arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:0: 'ring0' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:1: 'ring1' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:2: 'ring2' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:3: 'ring3' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:4: 'eip' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# arch/arm64/boot/dts/marvell/armada-3720-db.dtb: crypto@90000: interrupt-names:5: 'mem' was expected from schema $id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml# Signed-off-by: Rafał Miłecki Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 10 +++++----- arch/arm64/boot/dts/marvell/armada-cp11x.dtsi | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index df152c72276b8..cd28e1c45b70a 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -426,14 +426,14 @@ crypto: crypto@90000 { compatible = "inside-secure,safexcel-eip97ies"; reg = <0x90000 0x20000>; - interrupts = , - , + interrupts = , , , , - ; - interrupt-names = "mem", "ring0", "ring1", - "ring2", "ring3", "eip"; + , + ; + interrupt-names = "ring0", "ring1", "ring2", + "ring3", "eip", "mem"; clocks = <&nb_periph_clk 15>; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi index d6c0990a267d9..218c059b16d9c 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi @@ -506,14 +506,14 @@ CP11X_LABEL(crypto): crypto@800000 { compatible = "inside-secure,safexcel-eip197b"; reg = <0x800000 0x200000>; - interrupts = <87 IRQ_TYPE_LEVEL_HIGH>, - <88 IRQ_TYPE_LEVEL_HIGH>, + interrupts = <88 IRQ_TYPE_LEVEL_HIGH>, <89 IRQ_TYPE_LEVEL_HIGH>, <90 IRQ_TYPE_LEVEL_HIGH>, <91 IRQ_TYPE_LEVEL_HIGH>, - <92 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "mem", "ring0", "ring1", - "ring2", "ring3", "eip"; + <92 IRQ_TYPE_LEVEL_HIGH>, + <87 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "ring0", "ring1", "ring2", "ring3", + "eip", "mem"; clock-names = "core", "reg"; clocks = <&CP11X_LABEL(clk) 1 26>, <&CP11X_LABEL(clk) 1 17>; -- GitLab From 804db3c1ad5c53ffac8f66fa8a94a13c77f18537 Mon Sep 17 00:00:00 2001 From: David McFarland Date: Wed, 3 Jan 2024 12:55:18 -0400 Subject: [PATCH 0916/2290] ACPI: resource: Add Infinity laptops to irq1_edge_low_force_override [ Upstream commit e2605d4039a42a03000856b3229932455717b48b ] A user reported a keyboard problem similar to ones reported with other Zen laptops, on an Infinity E15-5A165-BM. Add board name matches for this model and one (untested) close relative to irq1_edge_low_force_override. Link: https://lemmy.ml/post/9864736 Link: https://www.infinitygaming.com.au/bios/ Link: https://lore.kernel.org/linux-acpi/20231006123304.32686-1-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki Stable-dep-of: 021a67d09615 ("ACPI: resource: Add MAIBENBEN X577 to irq1_edge_low_force_override") Signed-off-by: Sasha Levin --- drivers/acpi/resource.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 5ebeb0d7b6be0..35a98a5916f63 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -543,6 +543,18 @@ static const struct dmi_system_id lg_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "17U70P"), }, }, + { + /* Infinity E15-5A165-BM */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM5RG1E0009COM"), + }, + }, + { + /* Infinity E15-5A305-1M */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM5RGEE0016COM"), + }, + }, { } }; -- GitLab From c259c196ebd893cda95c435c2e9651924f9d48a5 Mon Sep 17 00:00:00 2001 From: "Alexey I. Froloff" Date: Fri, 16 Feb 2024 12:30:09 +0000 Subject: [PATCH 0917/2290] ACPI: resource: Do IRQ override on Lunnen Ground laptops [ Upstream commit e23ad54fef186aa66007895be1382c88f1ee2bf7 ] The Lunnen Ground 15 and 16 needs IRQ overriding for the keyboard to work. Adding an entries for these laptops to the override_table makes the internal keyboard functional. Signed-off-by: Alexey I. Froloff Signed-off-by: Rafael J. Wysocki Stable-dep-of: 021a67d09615 ("ACPI: resource: Add MAIBENBEN X577 to irq1_edge_low_force_override") Signed-off-by: Sasha Levin --- drivers/acpi/resource.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 35a98a5916f63..8420d97287f86 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -555,6 +555,20 @@ static const struct dmi_system_id lg_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "GM5RGEE0016COM"), }, }, + { + /* Lunnen Ground 15 / AMD Ryzen 5 5500U */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lunnen"), + DMI_MATCH(DMI_BOARD_NAME, "LLL5DAW"), + }, + }, + { + /* Lunnen Ground 16 / AMD Ryzen 7 5800U */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lunnen"), + DMI_MATCH(DMI_BOARD_NAME, "LL6FA"), + }, + }, { } }; -- GitLab From ed86e1fa7bad5eca03fae1673c122dfd3115678b Mon Sep 17 00:00:00 2001 From: Maxim Kudinov Date: Fri, 23 Feb 2024 19:24:08 +0300 Subject: [PATCH 0918/2290] ACPI: resource: Add MAIBENBEN X577 to irq1_edge_low_force_override [ Upstream commit 021a67d096154893cd1d883c7be0097e2ee327fd ] A known issue on some Zen laptops, keyboard stopped working due to commit 9946e39fe8d0 fael@kernel.org("ACPI: resource: skip IRQ override on AMD Zen platforms") on kernel 5.19.10. The ACPI IRQ override is required for this board due to buggy DSDT, thus adding the board vendor and name to irq1_edge_low_force_override fixes the issue. Fixes: 9946e39fe8d0 ("ACPI: resource: skip IRQ override on AMD Zen platforms") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217394 Link: https://lore.kernel.org/linux-acpi/20231006123304.32686-1-hdegoede@redhat.com/ Tested-by: Maxim Trofimov Signed-off-by: Maxim Kudinov Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8420d97287f86..1c5c1a269fbee 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -569,6 +569,13 @@ static const struct dmi_system_id lg_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "LL6FA"), }, }, + { + /* MAIBENBEN X577 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MAIBENBEN"), + DMI_MATCH(DMI_BOARD_NAME, "X577"), + }, + }, { } }; -- GitLab From 5bd963ff48e899c9f09d24c5ea58a77d65e4c96e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Feb 2024 17:35:27 +0100 Subject: [PATCH 0919/2290] ACPI: scan: Fix device check notification handling [ Upstream commit 793551c965116d9dfaf0550dacae1396a20efa69 ] It is generally invalid to fail a Device Check notification if the scan handler has not been attached to the given device after a bus rescan, because there may be valid reasons for the scan handler to refuse attaching to the device (for example, the device is not ready). For this reason, modify acpi_scan_device_check() to return 0 in that case without printing a warning. While at it, reduce the log level of the "already enumerated" message in the same function, because it is only interesting when debugging notification handling Fixes: 443fc8202272 ("ACPI / hotplug: Rework generic code to handle suprise removals") Signed-off-by: Rafael J. Wysocki Reviewed-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/acpi/scan.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 94154a849a3ea..293cdf486fd81 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -315,18 +315,14 @@ static int acpi_scan_device_check(struct acpi_device *adev) * again). */ if (adev->handler) { - dev_warn(&adev->dev, "Already enumerated\n"); - return -EALREADY; + dev_dbg(&adev->dev, "Already enumerated\n"); + return 0; } error = acpi_bus_scan(adev->handle); if (error) { dev_warn(&adev->dev, "Namespace scan failure\n"); return error; } - if (!adev->handler) { - dev_warn(&adev->dev, "Enumeration failure\n"); - error = -ENODEV; - } } else { error = acpi_scan_device_not_present(adev); } -- GitLab From 9e8486e46f8b0ef4a7d11c3402c2cea9b073ee13 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 27 Feb 2024 18:35:25 +0100 Subject: [PATCH 0920/2290] arm64: dts: rockchip: add missing interrupt-names for rk356x vdpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d1c44d9afa6f89aa0e10a191f30868eb12cd719f ] The video-codec@fdea0400 was missing the interrupt-names property that is part of the binding. Add it. Fixes: 944be6fba401 ("arm64: dts: rockchip: Add VPU support for RK3568/RK3566") Cc: Piotr Oniszczuk Acked-by: Uwe Kleine-König Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240227173526.710056-1-heiko@sntech.de Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk356x.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index f4d6dbbbddcd4..99ad6fc51b584 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -596,6 +596,7 @@ compatible = "rockchip,rk3568-vpu"; reg = <0x0 0xfdea0000 0x0 0x800>; interrupts = ; + interrupt-names = "vdpu"; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; clock-names = "aclk", "hclk"; iommus = <&vdpu_mmu>; -- GitLab From 47635b112a64b7b208224962471e7e42f110e723 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 27 Feb 2024 09:51:12 -0800 Subject: [PATCH 0921/2290] x86, relocs: Ignore relocations in .notes section [ Upstream commit aaa8736370db1a78f0e8434344a484f9fd20be3b ] When building with CONFIG_XEN_PV=y, .text symbols are emitted into the .notes section so that Xen can find the "startup_xen" entry point. This information is used prior to booting the kernel, so relocations are not useful. In fact, performing relocations against the .notes section means that the KASLR base is exposed since /sys/kernel/notes is world-readable. To avoid leaking the KASLR base without breaking unprivileged tools that are expecting to read /sys/kernel/notes, skip performing relocations in the .notes section. The values readable in .notes are then identical to those found in System.map. Reported-by: Guixiong Wei Closes: https://lore.kernel.org/all/20240218073501.54555-1-guixiongwei@gmail.com/ Fixes: 5ead97c84fa7 ("xen: Core Xen implementation") Fixes: da1a679cde9b ("Add /sys/kernel/notes") Reviewed-by: Juergen Gross Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- arch/x86/tools/relocs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 2925074b9a588..9a5b101c45023 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -653,6 +653,14 @@ static void print_absolute_relocs(void) if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } + /* + * Do not perform relocations in .notes section; any + * values there are meant for pre-boot consumption (e.g. + * startup_xen). + */ + if (sec_applies->shdr.sh_type == SHT_NOTE) { + continue; + } sh_symtab = sec_symtab->symtab; sym_strtab = sec_symtab->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { -- GitLab From 934212a623cbab851848b6de377eb476718c3e4c Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Tue, 2 Jan 2024 13:38:13 +0800 Subject: [PATCH 0922/2290] SUNRPC: fix some memleaks in gssx_dec_option_array [ Upstream commit 3cfcfc102a5e57b021b786a755a38935e357797d ] The creds and oa->data need to be freed in the error-handling paths after their allocation. So this patch add these deallocations in the corresponding paths. Fixes: 1d658336b05f ("SUNRPC: Add RPC based upcall mechanism for RPCGSS auth") Signed-off-by: Zhipeng Lu Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index d79f12c2550ac..cb32ab9a83952 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { - kfree(oa->data); - return -ENOMEM; + err = -ENOMEM; + goto free_oa; } oa->data[0].option.data = CREDS_VALUE; @@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, /* option buffer */ p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - return -ENOSPC; + if (unlikely(p == NULL)) { + err = -ENOSPC; + goto free_creds; + } length = be32_to_cpup(p); p = xdr_inline_decode(xdr, length); - if (unlikely(p == NULL)) - return -ENOSPC; + if (unlikely(p == NULL)) { + err = -ENOSPC; + goto free_creds; + } if (length == sizeof(CREDS_VALUE) && memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) { /* We have creds here. parse them */ err = gssx_dec_linux_creds(xdr, creds); if (err) - return err; + goto free_creds; oa->data[0].value.len = 1; /* presence */ } else { /* consume uninteresting buffer */ err = gssx_dec_buffer(xdr, &dummy); if (err) - return err; + goto free_creds; } } return 0; + +free_creds: + kfree(creds); +free_oa: + kfree(oa->data); + oa->data = NULL; + return err; } static int gssx_dec_status(struct xdr_stream *xdr, -- GitLab From 216712c69846f197cba22e8359825033c283abd2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 26 Feb 2024 22:37:39 +0100 Subject: [PATCH 0923/2290] mmc: wmt-sdmmc: remove an incorrect release_mem_region() call in the .remove function [ Upstream commit ae5004a40a262d329039b99b62bd3fe7645b66ad ] This looks strange to call release_mem_region() in a remove function without any request_mem_region() in the probe or "struct resource" somewhere. So remove the corresponding code. Fixes: 3a96dff0f828 ("mmc: SD/MMC Host Controller for Wondermedia WM8505/WM8650") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/bb0bb1ed1e18de55e8c0547625bde271e64b8c31.1708983064.git.christophe.jaillet@wanadoo.fr Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/wmt-sdmmc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c index 9aa3027ca25e4..f2abebb2d8574 100644 --- a/drivers/mmc/host/wmt-sdmmc.c +++ b/drivers/mmc/host/wmt-sdmmc.c @@ -886,7 +886,6 @@ static int wmt_mci_remove(struct platform_device *pdev) { struct mmc_host *mmc; struct wmt_mci_priv *priv; - struct resource *res; u32 reg_tmp; mmc = platform_get_drvdata(pdev); @@ -914,9 +913,6 @@ static int wmt_mci_remove(struct platform_device *pdev) clk_disable_unprepare(priv->clk_sdmmc); clk_put(priv->clk_sdmmc); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - mmc_free_host(mmc); dev_info(&pdev->dev, "WMT MCI device removed\n"); -- GitLab From 33fb18efaedef32d282c00596fa43ec906ccdd97 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Thu, 8 Feb 2024 11:46:28 +0800 Subject: [PATCH 0924/2290] ACPI: CPPC: enable AMD CPPC V2 support for family 17h processors [ Upstream commit a51ab63b297ce9e26e3ffb9be896018a42d5f32f ] As there are some AMD processors which only support CPPC V2 firmware and BIOS implementation, the amd_pstate driver will be failed to load when system booting with below kernel warning message: [ 0.477523] amd_pstate: the _CPC object is not present in SBIOS or ACPI disabled To make the amd_pstate driver can be loaded on those TR40 processors, it needs to match x86_model from 0x30 to 0x7F for family 17H. With the change, the system can load amd_pstate driver as expected. Reviewed-by: Mario Limonciello Reported-by: Gino Badouri Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218171 Fixes: fbd74d1689 ("ACPI: CPPC: Fix enabling CPPC on AMD systems with shared memory") Signed-off-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- arch/x86/kernel/acpi/cppc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 8d8752b44f113..ff8f25faca3dd 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -20,7 +20,7 @@ bool cpc_supported_by_cpu(void) (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f))) return true; else if (boot_cpu_data.x86 == 0x17 && - boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f) + boot_cpu_data.x86_model >= 0x30 && boot_cpu_data.x86_model <= 0x7f) return true; return boot_cpu_has(X86_FEATURE_CPPC); } -- GitLab From 2c727f83eb065b87cce306024b3b57d245b5129e Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Fri, 1 Mar 2024 00:35:09 +0200 Subject: [PATCH 0925/2290] wifi: rtw88: 8821c: Fix beacon loss and disconnect [ Upstream commit e1dfa21427baeb813f9a2f9ceab6b7d32c3ca425 ] Tenda U9 V2.0, which contains RTL8811CU, is practically unusable because of frequent disconnections: Feb 23 14:46:45 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-BEACON-LOSS Feb 23 14:46:46 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-DISCONNECTED bssid=90:55:de:__:__:__ reason=4 locally_generated=1 Feb 23 14:46:52 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-CONNECTED - Connection to 90:55:de:__:__:__ completed [id=0 id_str=] Feb 23 14:46:54 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-BEACON-LOSS Feb 23 14:46:55 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-DISCONNECTED bssid=90:55:de:__:__:__ reason=4 locally_generated=1 Feb 23 14:47:01 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-CONNECTED - Connection to 90:55:de:__:__:__ completed [id=0 id_str=] Feb 23 14:47:04 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-BEACON-LOSS Feb 23 14:47:05 ideapad2 wpa_supplicant[427]: wlp3s0f3u2: CTRL-EVENT-DISCONNECTED bssid=90:55:de:__:__:__ reason=4 locally_generated=1 This is caused by a mistake in the chip initialisation. This version of the chip requires loading an extra AGC table right after the main one, but the extra table is being loaded at the wrong time, in rtw_chip_board_info_setup(). Move the extra AGC table loading to the right place, in rtw_phy_load_tables(). The rtw_chip_board_info_setup() can only do "software" things, and rtw_phy_load_tables() can really do IO. Fixes: 5d6651fe8583 ("rtw88: 8821c: support RFE type2 wifi NIC") Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://msgid.link/276c31d8-b9a8-4e54-a3ac-09b74657aff7@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/main.c | 2 -- drivers/net/wireless/realtek/rtw88/phy.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 4c8164db4a9e4..81f3112923f1c 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1989,8 +1989,6 @@ static int rtw_chip_board_info_setup(struct rtw_dev *rtwdev) rtw_phy_setup_phy_cond(rtwdev, 0); rtw_phy_init_tx_power(rtwdev); - if (rfe_def->agc_btg_tbl) - rtw_load_table(rtwdev, rfe_def->agc_btg_tbl); rtw_load_table(rtwdev, rfe_def->phy_pg_tbl); rtw_load_table(rtwdev, rfe_def->txpwr_lmt_tbl); rtw_phy_tx_power_by_rate_config(hal); diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c index bd7d05e080848..fde7b532bc07e 100644 --- a/drivers/net/wireless/realtek/rtw88/phy.c +++ b/drivers/net/wireless/realtek/rtw88/phy.c @@ -1761,12 +1761,15 @@ static void rtw_load_rfk_table(struct rtw_dev *rtwdev) void rtw_phy_load_tables(struct rtw_dev *rtwdev) { + const struct rtw_rfe_def *rfe_def = rtw_get_rfe_def(rtwdev); const struct rtw_chip_info *chip = rtwdev->chip; u8 rf_path; rtw_load_table(rtwdev, chip->mac_tbl); rtw_load_table(rtwdev, chip->bb_tbl); rtw_load_table(rtwdev, chip->agc_tbl); + if (rfe_def->agc_btg_tbl) + rtw_load_table(rtwdev, rfe_def->agc_btg_tbl); rtw_load_rfk_table(rtwdev); for (rf_path = 0; rf_path < rtwdev->hal.rf_path_num; rf_path++) { -- GitLab From eeaa98f34d84b2f07d2b23ddec118aa409d9b7eb Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Fri, 1 Mar 2024 00:35:58 +0200 Subject: [PATCH 0926/2290] wifi: rtw88: 8821c: Fix false alarm count [ Upstream commit c238adbc578eeb70cbc8fdd1bef3666b0f585b13 ] total_fa_cnt is supposed to include cck_fa_cnt and ofdm_fa_cnt, not just ofdm_fa_cnt. Fixes: 960361238b86 ("rtw88: 8821c: add false alarm statistics") Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://msgid.link/f3cb6d17-e4e4-44a7-9c9b-72aed994b5c9@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 609a2b86330d8..50e3e46f7d8aa 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -674,9 +674,9 @@ static void rtw8821c_false_alarm_statistics(struct rtw_dev *rtwdev) dm_info->cck_fa_cnt = cck_fa_cnt; dm_info->ofdm_fa_cnt = ofdm_fa_cnt; + dm_info->total_fa_cnt = ofdm_fa_cnt; if (cck_enable) dm_info->total_fa_cnt += cck_fa_cnt; - dm_info->total_fa_cnt = ofdm_fa_cnt; crc32_cnt = rtw_read32(rtwdev, REG_CRC_CCK); dm_info->cck_ok_cnt = FIELD_GET(GENMASK(15, 0), crc32_cnt); -- GitLab From 48fba9d7f5716f9ed7f6b5b914bc191e3ee4d805 Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Tue, 5 Mar 2024 20:21:14 +0800 Subject: [PATCH 0927/2290] PCI: Make pci_dev_is_disconnected() helper public for other drivers [ Upstream commit 39714fd73c6b60a8d27bcc5b431afb0828bf4434 ] Make pci_dev_is_disconnected() public so that it can be called from Intel VT-d driver to quickly fix/workaround the surprise removal unplug hang issue for those ATS capable devices on PCIe switch downstream hotplug capable ports. Beside pci_device_is_present() function, this one has no config space space access, so is light enough to optimize the normal pure surprise removal and safe removal flow. Acked-by: Bjorn Helgaas Reviewed-by: Dan Carpenter Tested-by: Haorong Ye Signed-off-by: Ethan Zhao Link: https://lore.kernel.org/r/20240301080727.3529832-2-haifeng.zhao@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Stable-dep-of: 4fc82cd907ac ("iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected") Signed-off-by: Sasha Levin --- drivers/pci/pci.h | 5 ----- include/linux/pci.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index e1d02b7c60294..9950deeb047a7 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -357,11 +357,6 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) return 0; } -static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) -{ - return dev->error_state == pci_channel_io_perm_failure; -} - /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 diff --git a/include/linux/pci.h b/include/linux/pci.h index eccaf1abea79d..f5d89a4b811f1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2355,6 +2355,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) return NULL; } +static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +{ + return dev->error_state == pci_channel_io_perm_failure; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, -- GitLab From 34a7b30f56d30114bf4d436e4dc793afe326fbcf Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Tue, 5 Mar 2024 20:21:15 +0800 Subject: [PATCH 0928/2290] iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected [ Upstream commit 4fc82cd907ac075648789cc3a00877778aa1838b ] For those endpoint devices connect to system via hotplug capable ports, users could request a hot reset to the device by flapping device's link through setting the slot's link control register, as pciehp_ist() DLLSC interrupt sequence response, pciehp will unload the device driver and then power it off. thus cause an IOMMU device-TLB invalidation (Intel VT-d spec, or ATS Invalidation in PCIe spec r6.1) request for non-existence target device to be sent and deadly loop to retry that request after ITE fault triggered in interrupt context. That would cause following continuous hard lockup warning and system hang [ 4211.433662] pcieport 0000:17:01.0: pciehp: Slot(108): Link Down [ 4211.433664] pcieport 0000:17:01.0: pciehp: Slot(108): Card not present [ 4223.822591] NMI watchdog: Watchdog detected hard LOCKUP on cpu 144 [ 4223.822622] CPU: 144 PID: 1422 Comm: irq/57-pciehp Kdump: loaded Tainted: G S OE kernel version xxxx [ 4223.822623] Hardware name: vendorname xxxx 666-106, BIOS 01.01.02.03.01 05/15/2023 [ 4223.822623] RIP: 0010:qi_submit_sync+0x2c0/0x490 [ 4223.822624] Code: 48 be 00 00 00 00 00 08 00 00 49 85 74 24 20 0f 95 c1 48 8b 57 10 83 c1 04 83 3c 1a 03 0f 84 a2 01 00 00 49 8b 04 24 8b 70 34 <40> f6 c6 1 0 74 17 49 8b 04 24 8b 80 80 00 00 00 89 c2 d3 fa 41 39 [ 4223.822624] RSP: 0018:ffffc4f074f0bbb8 EFLAGS: 00000093 [ 4223.822625] RAX: ffffc4f040059000 RBX: 0000000000000014 RCX: 0000000000000005 [ 4223.822625] RDX: ffff9f3841315800 RSI: 0000000000000000 RDI: ffff9f38401a8340 [ 4223.822625] RBP: ffff9f38401a8340 R08: ffffc4f074f0bc00 R09: 0000000000000000 [ 4223.822626] R10: 0000000000000010 R11: 0000000000000018 R12: ffff9f384005e200 [ 4223.822626] R13: 0000000000000004 R14: 0000000000000046 R15: 0000000000000004 [ 4223.822626] FS: 0000000000000000(0000) GS:ffffa237ae400000(0000) knlGS:0000000000000000 [ 4223.822627] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4223.822627] CR2: 00007ffe86515d80 CR3: 000002fd3000a001 CR4: 0000000000770ee0 [ 4223.822627] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 4223.822628] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 4223.822628] PKRU: 55555554 [ 4223.822628] Call Trace: [ 4223.822628] qi_flush_dev_iotlb+0xb1/0xd0 [ 4223.822628] __dmar_remove_one_dev_info+0x224/0x250 [ 4223.822629] dmar_remove_one_dev_info+0x3e/0x50 [ 4223.822629] intel_iommu_release_device+0x1f/0x30 [ 4223.822629] iommu_release_device+0x33/0x60 [ 4223.822629] iommu_bus_notifier+0x7f/0x90 [ 4223.822630] blocking_notifier_call_chain+0x60/0x90 [ 4223.822630] device_del+0x2e5/0x420 [ 4223.822630] pci_remove_bus_device+0x70/0x110 [ 4223.822630] pciehp_unconfigure_device+0x7c/0x130 [ 4223.822631] pciehp_disable_slot+0x6b/0x100 [ 4223.822631] pciehp_handle_presence_or_link_change+0xd8/0x320 [ 4223.822631] pciehp_ist+0x176/0x180 [ 4223.822631] ? irq_finalize_oneshot.part.50+0x110/0x110 [ 4223.822632] irq_thread_fn+0x19/0x50 [ 4223.822632] irq_thread+0x104/0x190 [ 4223.822632] ? irq_forced_thread_fn+0x90/0x90 [ 4223.822632] ? irq_thread_check_affinity+0xe0/0xe0 [ 4223.822633] kthread+0x114/0x130 [ 4223.822633] ? __kthread_cancel_work+0x40/0x40 [ 4223.822633] ret_from_fork+0x1f/0x30 [ 4223.822633] Kernel panic - not syncing: Hard LOCKUP [ 4223.822634] CPU: 144 PID: 1422 Comm: irq/57-pciehp Kdump: loaded Tainted: G S OE kernel version xxxx [ 4223.822634] Hardware name: vendorname xxxx 666-106, BIOS 01.01.02.03.01 05/15/2023 [ 4223.822634] Call Trace: [ 4223.822634] [ 4223.822635] dump_stack+0x6d/0x88 [ 4223.822635] panic+0x101/0x2d0 [ 4223.822635] ? ret_from_fork+0x11/0x30 [ 4223.822635] nmi_panic.cold.14+0xc/0xc [ 4223.822636] watchdog_overflow_callback.cold.8+0x6d/0x81 [ 4223.822636] __perf_event_overflow+0x4f/0xf0 [ 4223.822636] handle_pmi_common+0x1ef/0x290 [ 4223.822636] ? __set_pte_vaddr+0x28/0x40 [ 4223.822637] ? flush_tlb_one_kernel+0xa/0x20 [ 4223.822637] ? __native_set_fixmap+0x24/0x30 [ 4223.822637] ? ghes_copy_tofrom_phys+0x70/0x100 [ 4223.822637] ? __ghes_peek_estatus.isra.16+0x49/0xa0 [ 4223.822637] intel_pmu_handle_irq+0xba/0x2b0 [ 4223.822638] perf_event_nmi_handler+0x24/0x40 [ 4223.822638] nmi_handle+0x4d/0xf0 [ 4223.822638] default_do_nmi+0x49/0x100 [ 4223.822638] exc_nmi+0x134/0x180 [ 4223.822639] end_repeat_nmi+0x16/0x67 [ 4223.822639] RIP: 0010:qi_submit_sync+0x2c0/0x490 [ 4223.822639] Code: 48 be 00 00 00 00 00 08 00 00 49 85 74 24 20 0f 95 c1 48 8b 57 10 83 c1 04 83 3c 1a 03 0f 84 a2 01 00 00 49 8b 04 24 8b 70 34 <40> f6 c6 10 74 17 49 8b 04 24 8b 80 80 00 00 00 89 c2 d3 fa 41 39 [ 4223.822640] RSP: 0018:ffffc4f074f0bbb8 EFLAGS: 00000093 [ 4223.822640] RAX: ffffc4f040059000 RBX: 0000000000000014 RCX: 0000000000000005 [ 4223.822640] RDX: ffff9f3841315800 RSI: 0000000000000000 RDI: ffff9f38401a8340 [ 4223.822641] RBP: ffff9f38401a8340 R08: ffffc4f074f0bc00 R09: 0000000000000000 [ 4223.822641] R10: 0000000000000010 R11: 0000000000000018 R12: ffff9f384005e200 [ 4223.822641] R13: 0000000000000004 R14: 0000000000000046 R15: 0000000000000004 [ 4223.822641] ? qi_submit_sync+0x2c0/0x490 [ 4223.822642] ? qi_submit_sync+0x2c0/0x490 [ 4223.822642] [ 4223.822642] qi_flush_dev_iotlb+0xb1/0xd0 [ 4223.822642] __dmar_remove_one_dev_info+0x224/0x250 [ 4223.822643] dmar_remove_one_dev_info+0x3e/0x50 [ 4223.822643] intel_iommu_release_device+0x1f/0x30 [ 4223.822643] iommu_release_device+0x33/0x60 [ 4223.822643] iommu_bus_notifier+0x7f/0x90 [ 4223.822644] blocking_notifier_call_chain+0x60/0x90 [ 4223.822644] device_del+0x2e5/0x420 [ 4223.822644] pci_remove_bus_device+0x70/0x110 [ 4223.822644] pciehp_unconfigure_device+0x7c/0x130 [ 4223.822644] pciehp_disable_slot+0x6b/0x100 [ 4223.822645] pciehp_handle_presence_or_link_change+0xd8/0x320 [ 4223.822645] pciehp_ist+0x176/0x180 [ 4223.822645] ? irq_finalize_oneshot.part.50+0x110/0x110 [ 4223.822645] irq_thread_fn+0x19/0x50 [ 4223.822646] irq_thread+0x104/0x190 [ 4223.822646] ? irq_forced_thread_fn+0x90/0x90 [ 4223.822646] ? irq_thread_check_affinity+0xe0/0xe0 [ 4223.822646] kthread+0x114/0x130 [ 4223.822647] ? __kthread_cancel_work+0x40/0x40 [ 4223.822647] ret_from_fork+0x1f/0x30 [ 4223.822647] Kernel Offset: 0x6400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Such issue could be triggered by all kinds of regular surprise removal hotplug operation. like: 1. pull EP(endpoint device) out directly. 2. turn off EP's power. 3. bring the link down. etc. this patch aims to work for regular safe removal and surprise removal unplug. these hot unplug handling process could be optimized for fix the ATS Invalidation hang issue by calling pci_dev_is_disconnected() in function devtlb_invalidation_with_pasid() to check target device state to avoid sending meaningless ATS Invalidation request to iommu when device is gone. (see IMPLEMENTATION NOTE in PCIe spec r6.1 section 10.3.1) For safe removal, device wouldn't be removed until the whole software handling process is done, it wouldn't trigger the hard lock up issue caused by too long ATS Invalidation timeout wait. In safe removal path, device state isn't set to pci_channel_io_perm_failure in pciehp_unconfigure_device() by checking 'presence' parameter, calling pci_dev_is_disconnected() in devtlb_invalidation_with_pasid() will return false there, wouldn't break the function. For surprise removal, device state is set to pci_channel_io_perm_failure in pciehp_unconfigure_device(), means device is already gone (disconnected) call pci_dev_is_disconnected() in devtlb_invalidation_with_pasid() will return true to break the function not to send ATS Invalidation request to the disconnected device blindly, thus avoid to trigger further ITE fault, and ITE fault will block all invalidation request to be handled. furthermore retry the timeout request could trigger hard lockup. safe removal (present) & surprise removal (not present) pciehp_ist() pciehp_handle_presence_or_link_change() pciehp_disable_slot() remove_board() pciehp_unconfigure_device(presence) { if (!presence) pci_walk_bus(parent, pci_dev_set_disconnected, NULL); } this patch works for regular safe removal and surprise removal of ATS capable endpoint on PCIe switch downstream ports. Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table interface") Reviewed-by: Dan Carpenter Tested-by: Haorong Ye Signed-off-by: Ethan Zhao Link: https://lore.kernel.org/r/20240301080727.3529832-3-haifeng.zhao@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/pasid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3f03039e5cce5..32432d82d7744 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -435,6 +435,9 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, if (!info || !info->ats_enabled) return; + if (pci_dev_is_disconnected(to_pci_dev(dev))) + return; + sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; pfsid = info->pfsid; -- GitLab From 8499af0616cf76e6cbe811107e3f5b33bd472041 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 20 Feb 2024 15:57:11 -0800 Subject: [PATCH 0929/2290] igb: Fix missing time sync events [ Upstream commit ee14cc9ea19ba9678177e2224a9c58cce5937c73 ] Fix "double" clearing of interrupts, which can cause external events or timestamps to be missed. The E1000_TSIRC Time Sync Interrupt Cause register can be cleared in two ways, by either reading it or by writing '1' into the specific cause bit. This is documented in section 8.16.1. The following flow was used: 1. read E1000_TSIRC into 'tsicr'; 2. handle the interrupts present into 'tsirc' and mark them in 'ack'; 3. write 'ack' into E1000_TSICR; As both (1) and (3) will clear the interrupt cause, if the same interrupt happens again between (1) and (3) it will be ignored, causing events to be missed. Remove the extra clear in (3). Fixes: 00c65578b47b ("igb: enable internal PPS for the i210") Acked-by: Richard Cochran Signed-off-by: Vinicius Costa Gomes Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 45ce4ed16146e..81d9a5338be5e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6926,44 +6926,31 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) static void igb_tsync_interrupt(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 ack = 0, tsicr = rd32(E1000_TSICR); + u32 tsicr = rd32(E1000_TSICR); struct ptp_clock_event event; if (tsicr & TSINTR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; if (adapter->ptp_caps.pps) ptp_clock_event(adapter->ptp_clock, &event); - ack |= TSINTR_SYS_WRAP; } if (tsicr & E1000_TSICR_TXTS) { /* retrieve hardware timestamp */ schedule_work(&adapter->ptp_tx_work); - ack |= E1000_TSICR_TXTS; } - if (tsicr & TSINTR_TT0) { + if (tsicr & TSINTR_TT0) igb_perout(adapter, 0); - ack |= TSINTR_TT0; - } - if (tsicr & TSINTR_TT1) { + if (tsicr & TSINTR_TT1) igb_perout(adapter, 1); - ack |= TSINTR_TT1; - } - if (tsicr & TSINTR_AUTT0) { + if (tsicr & TSINTR_AUTT0) igb_extts(adapter, 0); - ack |= TSINTR_AUTT0; - } - if (tsicr & TSINTR_AUTT1) { + if (tsicr & TSINTR_AUTT1) igb_extts(adapter, 1); - ack |= TSINTR_AUTT1; - } - - /* acknowledge the interrupts */ - wr32(E1000_TSICR, ack); } static irqreturn_t igb_msix_other(int irq, void *data) -- GitLab From e5f04ec4421752afb7707d2c0dfdcc470f548eeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Sun, 7 Jan 2024 19:02:47 +0100 Subject: [PATCH 0930/2290] Bluetooth: Remove HCI_POWER_OFF_TIMEOUT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 968667f2e0345a67a6eea5a502f4659085666564 ] With commit cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED"), the power off sequence got refactored so that this timeout was no longer necessary, let's remove the leftover define from the header too. Fixes: cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a674221d151db..c69e09909449f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -416,7 +416,6 @@ enum { #define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ -#define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ -- GitLab From 8beed376c9195427c750a0072ca4c323b0e9b016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Sun, 7 Jan 2024 19:02:48 +0100 Subject: [PATCH 0931/2290] Bluetooth: mgmt: Remove leftover queuing of power_off work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fee054b7579fe252f8b9e6c17b9c5bfdaa84dd7e ] Queuing of power_off work was introduced in these functions with commits 8b064a3ad377 ("Bluetooth: Clean up HCI state when doing power off") and c9910d0fb4fc ("Bluetooth: Fix disconnecting connections in non-connected states") in an effort to clean up state and do things like disconnecting devices before actually powering off the device. After that, commit a3172b7eb4a2 ("Bluetooth: Add timer to force power off") introduced a timeout to ensure that the device actually got powered off, even if some of the cleanup work would never complete. This code later got refactored with commit cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED"), which made powering off the device synchronous and removed the need for initiating the power_off work from other places. The timeout mentioned above got removed too, because we now also made use of the command timeout during power on/off. These days the power_off work still exists, but it only seems to only be used for HCI_AUTO_OFF functionality, which is why we never noticed those two leftover places where we queue power_off work. So let's remove that code. Fixes: cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ab63f807e3c80..a80bf9c42c2ef 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9766,14 +9766,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; - /* The connection is still in hci_conn_hash so test for 1 - * instead of 0 to know if this is the last one. - */ - if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) { - cancel_delayed_work(&hdev->power_off); - queue_work(hdev->req_workqueue, &hdev->power_off.work); - } - if (!mgmt_connected) return; @@ -9830,14 +9822,6 @@ void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, { struct mgmt_ev_connect_failed ev; - /* The connection is still in hci_conn_hash so test for 1 - * instead of 0 to know if this is the last one. - */ - if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) { - cancel_delayed_work(&hdev->power_off); - queue_work(hdev->req_workqueue, &hdev->power_off.work); - } - bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.status = mgmt_status(status); -- GitLab From 653a17a99d752ffde175d4bc96154f2a3642f400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Mon, 8 Jan 2024 23:46:06 +0100 Subject: [PATCH 0932/2290] Bluetooth: Remove superfluous call to hci_conn_check_pending() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 78e3639fc8031275010c3287ac548c0bc8de83b1 ] The "pending connections" feature was originally introduced with commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests") and 6bd57416127e ("[Bluetooth] Handling pending connect attempts after inquiry") to handle controllers supporting only a single connection request at a time. Later things were extended to also cancel ongoing inquiries on connect() with commit 89e65975fea5 ("Bluetooth: Cancel Inquiry before Create Connection"). With commit a9de9248064b ("[Bluetooth] Switch from OGF+OCF to using only opcodes"), hci_conn_check_pending() was introduced as a helper to consolidate a few places where we check for pending connections (indicated by the BT_CONNECT2 flag) and then try to connect. This refactoring commit also snuck in two more calls to hci_conn_check_pending(): - One is in the failure callback of hci_cs_inquiry(), this one probably makes sense: If we send an "HCI Inquiry" command and then immediately after a "Create Connection" command, the "Create Connection" command might fail before the "HCI Inquiry" command, and then we want to retry the "Create Connection" on failure of the "HCI Inquiry". - The other added call to hci_conn_check_pending() is in the event handler for the "Remote Name" event, this seems unrelated and is possibly a copy-paste error, so remove that one. Fixes: a9de9248064b ("[Bluetooth] Switch from OGF+OCF to using only opcodes") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 452d839c152fc..0cd093ec6486c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3567,8 +3567,6 @@ static void hci_remote_name_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - hci_conn_check_pending(hdev); - hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); -- GitLab From a96738eb215f4f685b16f7f19532fd0342dfdb51 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 8 Feb 2024 17:40:17 +0100 Subject: [PATCH 0933/2290] Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional() [ Upstream commit 56d074d26c5828773b00b2185dd7e1d08273b8e8 ] The optional variants for the gpiod_get() family of functions return NULL if the GPIO in question is not associated with this device. They return ERR_PTR() on any other error. NULL descriptors are graciously handled by GPIOLIB and can be safely passed to any of the GPIO consumer interfaces as they will return 0 and act as if the function succeeded. If one is using the optional variant, then there's no point in checking for NULL. Fixes: 6845667146a2 ("Bluetooth: hci_qca: Fix NULL vs IS_ERR_OR_NULL check in qca_serdev_probe") Signed-off-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 8bfef7f81b417..2acda547f4f3e 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2254,7 +2254,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en) && + if (IS_ERR(qcadev->bt_en) && (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); @@ -2263,7 +2263,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); - if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && + if (IS_ERR(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855 || data->soc_type == QCA_WCN7850)) @@ -2285,7 +2285,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en)) { + if (IS_ERR(qcadev->bt_en)) { dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); power_ctrl_enabled = false; } -- GitLab From 1023de27cd1d0d692e70fe6d6d5cee9fff9b9c84 Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Thu, 20 Apr 2023 20:23:36 +0800 Subject: [PATCH 0934/2290] Bluetooth: Cancel sync command before suspend and power off [ Upstream commit f419863588217f76eaf754e1dfce21ea7fcb026d ] Some of the sync commands might take a long time to complete, e.g. LE Create Connection when the peer device isn't responding might take 20 seconds before it times out. If suspend command is issued during this time, it will need to wait for completion since both commands are using the same sync lock. This patch cancel any running sync commands before attempting to suspend or adapter power off. Signed-off-by: Archie Pusaka Reviewed-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 3 +++ net/bluetooth/mgmt.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a8932d449eb63..a7e6ce2e61c5e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2838,6 +2838,9 @@ int hci_suspend_dev(struct hci_dev *hdev) if (mgmt_powering_down(hdev)) return 0; + /* Cancel potentially blocking sync operation before suspend */ + __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); hci_req_sync_unlock(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a80bf9c42c2ef..a657dc1d4ec7a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1401,6 +1401,10 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } + /* Cancel potentially blocking sync operation before power off */ + if (cp->val == 0x00) + __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); -- GitLab From ac7a47aaa7944efc94e4fc23cc438b7bd9cc222c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 21 Apr 2023 11:37:55 -0700 Subject: [PATCH 0935/2290] Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running [ Upstream commit d883a4669a1def6d121ccf5e64ad28260d1c9531 ] This makes sure hci_cmd_sync_queue only queue new work if HCI_RUNNING has been set otherwise there is a risk of commands being sent while turning off. Because hci_cmd_sync_queue can no longer queue work while HCI_RUNNING is not set it cannot be used to power on adapters so instead hci_cmd_sync_submit is introduced which bypass the HCI_RUNNING check, so it behaves like the old implementation. Link: https://lore.kernel.org/all/CAB4PzUpDMvdc8j2MdeSAy1KkAE-D3woprCwAdYWeOc-3v3c9Sw@mail.gmail.com/ Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_sync.h | 2 ++ net/bluetooth/hci_sync.c | 25 +++++++++++++++++++++++-- net/bluetooth/mgmt.c | 12 ++++++++---- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 17f5a4c32f36e..2fa976c466b80 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -41,6 +41,8 @@ void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a337340464567..31dd064d77a42 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -680,8 +680,12 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) } EXPORT_SYMBOL(hci_cmd_sync_cancel); -int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, - void *data, hci_cmd_sync_work_destroy_t destroy) +/* Submit HCI command to be run in as cmd_sync_work: + * + * - hdev must _not_ be unregistered + */ +int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; int err = 0; @@ -711,6 +715,23 @@ unlock: mutex_unlock(&hdev->unregister_lock); return err; } +EXPORT_SYMBOL(hci_cmd_sync_submit); + +/* Queue HCI command: + * + * - hdev must be running + */ +int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + /* Only queue command if hdev is running which means it had been opened + * and is either on init phase or is already up. + */ + if (!test_bit(HCI_RUNNING, &hdev->flags)) + return -ENETDOWN; + + return hci_cmd_sync_submit(hdev, func, data, destroy); +} EXPORT_SYMBOL(hci_cmd_sync_queue); int hci_update_eir_sync(struct hci_dev *hdev) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a657dc1d4ec7a..732b6cf45fbe4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1402,11 +1402,15 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, } /* Cancel potentially blocking sync operation before power off */ - if (cp->val == 0x00) + if (cp->val == 0x00) { __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); - - err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, - mgmt_set_powered_complete); + err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, + mgmt_set_powered_complete); + } else { + /* Use hci_cmd_sync_submit since hdev might not be running */ + err = hci_cmd_sync_submit(hdev, set_powered_sync, cmd, + mgmt_set_powered_complete); + } if (err < 0) mgmt_pending_remove(cmd); -- GitLab From 6083089ab00631617f9eac678df3ab050a9d837a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 26 Jun 2023 17:25:06 -0700 Subject: [PATCH 0936/2290] Bluetooth: hci_conn: Consolidate code for aborting connections [ Upstream commit a13f316e90fdb1fb6df6582e845aa9b3270f3581 ] This consolidates code for aborting connections using hci_cmd_sync_queue so it is synchronized with other threads, but because of the fact that some commands may block the cmd_sync_queue while waiting specific events this attempt to cancel those requests by using hci_cmd_sync_cancel. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 154 ++++++------------------------- net/bluetooth/hci_sync.c | 23 +++-- net/bluetooth/mgmt.c | 15 +-- 4 files changed, 47 insertions(+), 147 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 09c978f3d95dc..2538f3b96623b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -739,6 +739,7 @@ struct hci_conn { unsigned long flags; enum conn_reasons conn_reason; + __u8 abort_reason; __u32 clock; __u16 clock_accuracy; @@ -758,7 +759,6 @@ struct hci_conn { struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; - struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 12d36875358b9..f752a9f9bb9c7 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -175,57 +175,6 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_dev_put(hdev); } -static void le_scan_cleanup(struct work_struct *work) -{ - struct hci_conn *conn = container_of(work, struct hci_conn, - le_scan_cleanup); - struct hci_dev *hdev = conn->hdev; - struct hci_conn *c = NULL; - - BT_DBG("%s hcon %p", hdev->name, conn); - - hci_dev_lock(hdev); - - /* Check that the hci_conn is still around */ - rcu_read_lock(); - list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) { - if (c == conn) - break; - } - rcu_read_unlock(); - - if (c == conn) { - hci_connect_le_scan_cleanup(conn, 0x00); - hci_conn_cleanup(conn); - } - - hci_dev_unlock(hdev); - hci_dev_put(hdev); - hci_conn_put(conn); -} - -static void hci_connect_le_scan_remove(struct hci_conn *conn) -{ - BT_DBG("%s hcon %p", conn->hdev->name, conn); - - /* We can't call hci_conn_del/hci_conn_cleanup here since that - * could deadlock with another hci_conn_del() call that's holding - * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work). - * Instead, grab temporary extra references to the hci_dev and - * hci_conn and perform the necessary cleanup in a separate work - * callback. - */ - - hci_dev_hold(conn->hdev); - hci_conn_get(conn); - - /* Even though we hold a reference to the hdev, many other - * things might get cleaned up meanwhile, including the hdev's - * own workqueue, so we can't use that for scheduling. - */ - schedule_work(&conn->le_scan_cleanup); -} - static void hci_acl_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -672,13 +621,6 @@ static void hci_conn_timeout(struct work_struct *work) if (refcnt > 0) return; - /* LE connections in scanning state need special handling */ - if (conn->state == BT_CONNECT && conn->type == LE_LINK && - test_bit(HCI_CONN_SCANNING, &conn->flags)) { - hci_connect_le_scan_remove(conn); - return; - } - hci_abort_conn(conn, hci_proto_disconn_ind(conn)); } @@ -1050,7 +992,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); - INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup); atomic_set(&conn->refcnt, 0); @@ -2837,81 +2778,46 @@ u32 hci_conn_get_phy(struct hci_conn *conn) return phys; } -int hci_abort_conn(struct hci_conn *conn, u8 reason) +static int abort_conn_sync(struct hci_dev *hdev, void *data) { - int r = 0; + struct hci_conn *conn; + u16 handle = PTR_ERR(data); - if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) return 0; - switch (conn->state) { - case BT_CONNECTED: - case BT_CONFIG: - if (conn->type == AMP_LINK) { - struct hci_cp_disconn_phy_link cp; + return hci_abort_conn_sync(hdev, conn, conn->abort_reason); +} - cp.phy_handle = HCI_PHY_HANDLE(conn->handle); - cp.reason = reason; - r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, - sizeof(cp), &cp); - } else { - struct hci_cp_disconnect dc; +int hci_abort_conn(struct hci_conn *conn, u8 reason) +{ + struct hci_dev *hdev = conn->hdev; - dc.handle = cpu_to_le16(conn->handle); - dc.reason = reason; - r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, - sizeof(dc), &dc); - } + /* If abort_reason has already been set it means the connection is + * already being aborted so don't attempt to overwrite it. + */ + if (conn->abort_reason) + return 0; - conn->state = BT_DISCONN; + bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason); - break; - case BT_CONNECT: - if (conn->type == LE_LINK) { - if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - break; - r = hci_send_cmd(conn->hdev, - HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); - } else if (conn->type == ACL_LINK) { - if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) - break; - r = hci_send_cmd(conn->hdev, - HCI_OP_CREATE_CONN_CANCEL, - 6, &conn->dst); - } - break; - case BT_CONNECT2: - if (conn->type == ACL_LINK) { - struct hci_cp_reject_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - rej.reason = reason; - - r = hci_send_cmd(conn->hdev, - HCI_OP_REJECT_CONN_REQ, - sizeof(rej), &rej); - } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { - struct hci_cp_reject_sync_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - - /* SCO rejection has its own limited set of - * allowed error values (0x0D-0x0F) which isn't - * compatible with most values passed to this - * function. To be safe hard-code one of the - * values that's suitable for SCO. - */ - rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; + conn->abort_reason = reason; - r = hci_send_cmd(conn->hdev, - HCI_OP_REJECT_SYNC_CONN_REQ, - sizeof(rej), &rej); + /* If the connection is pending check the command opcode since that + * might be blocking on hci_cmd_sync_work while waiting its respective + * event so we need to hci_cmd_sync_cancel to cancel it. + */ + if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { + switch (hci_skb_event(hdev->sent_cmd)) { + case HCI_EV_LE_CONN_COMPLETE: + case HCI_EV_LE_ENHANCED_CONN_COMPLETE: + case HCI_EVT_LE_CIS_ESTABLISHED: + hci_cmd_sync_cancel(hdev, -ECANCELED); + break; } - break; - default: - conn->state = BT_CLOSED; - break; } - return r; + return hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), + NULL); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 31dd064d77a42..c03729c10fdd6 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5230,22 +5230,27 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, } static int hci_le_connect_cancel_sync(struct hci_dev *hdev, - struct hci_conn *conn) + struct hci_conn *conn, u8 reason) { + /* Return reason if scanning since the connection shall probably be + * cleanup directly. + */ if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - return 0; + return reason; - if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + if (conn->role == HCI_ROLE_SLAVE || + test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); } -static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn) +static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, + u8 reason) { if (conn->type == LE_LINK) - return hci_le_connect_cancel_sync(hdev, conn); + return hci_le_connect_cancel_sync(hdev, conn, reason); if (hdev->hci_ver < BLUETOOTH_VER_1_2) return 0; @@ -5298,9 +5303,11 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) case BT_CONFIG: return hci_disconnect_sync(hdev, conn, reason); case BT_CONNECT: - err = hci_connect_cancel_sync(hdev, conn); + err = hci_connect_cancel_sync(hdev, conn, reason); /* Cleanup hci_conn object if it cannot be cancelled as it - * likelly means the controller and host stack are out of sync. + * likelly means the controller and host stack are out of sync + * or in case of LE it was still scanning so it can be cleanup + * safely. */ if (err) { hci_dev_lock(hdev); @@ -6215,7 +6222,7 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) done: if (err == -ETIMEDOUT) - hci_le_connect_cancel_sync(hdev, conn); + hci_le_connect_cancel_sync(hdev, conn, 0x00); /* Re-enable advertising after the connection attempt is finished. */ hci_resume_advertising_sync(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 732b6cf45fbe4..fbd859e2d13ca 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3583,18 +3583,6 @@ unlock: return err; } -static int abort_conn_sync(struct hci_dev *hdev, void *data) -{ - struct hci_conn *conn; - u16 handle = PTR_ERR(data); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; - - return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); -} - static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -3645,8 +3633,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, le_addr_type(addr->type)); if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) - hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), - NULL); + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); unlock: hci_dev_unlock(hdev); -- GitLab From 1b6cfa4c760e5f3729e21b725c89f6ddb9be5abf Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 9 Jan 2024 13:45:40 -0500 Subject: [PATCH 0937/2290] Bluetooth: hci_core: Cancel request on command timeout [ Upstream commit 63298d6e752fc0ec7f5093860af8bc9f047b30c8 ] If command has timed out call __hci_cmd_sync_cancel to notify the hci_req since it will inevitably cause a timeout. This also rework the code around __hci_cmd_sync_cancel since it was wrongly assuming it needs to cancel timer as well, but sometimes the timers have not been started or in fact they already had timed out in which case they don't need to be cancel yet again. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_sync.h | 2 +- net/bluetooth/hci_core.c | 86 +++++++++++++++++++++----------- net/bluetooth/hci_request.c | 2 +- net/bluetooth/hci_sync.c | 20 ++++---- net/bluetooth/mgmt.c | 2 +- 5 files changed, 72 insertions(+), 40 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 2fa976c466b80..59d15b1a978ab 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -39,7 +39,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err); int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a7e6ce2e61c5e..edf7af2e13557 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1492,10 +1492,11 @@ static void hci_cmd_timeout(struct work_struct *work) cmd_timer.work); if (hdev->sent_cmd) { - struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; - u16 opcode = __le16_to_cpu(sent->opcode); + u16 opcode = hci_skb_opcode(hdev->sent_cmd); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); + + hci_cmd_sync_cancel_sync(hdev, ETIMEDOUT); } else { bt_dev_err(hdev, "command tx timeout"); } @@ -2822,6 +2823,23 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev) return ret; } +/* Cancel ongoing command synchronously: + * + * - Cancel command timer + * - Reset command counter + * - Cancel command request + */ +static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) +{ + bt_dev_dbg(hdev, "err 0x%2.2x", err); + + cancel_delayed_work_sync(&hdev->cmd_timer); + cancel_delayed_work_sync(&hdev->ncmd_timer); + atomic_set(&hdev->cmd_cnt, 1); + + hci_cmd_sync_cancel_sync(hdev, -err); +} + /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { @@ -2839,7 +2857,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + hci_cancel_cmd_sync(hdev, -EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4119,6 +4137,33 @@ static void hci_rx_work(struct work_struct *work) } } +static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) +{ + int err; + + bt_dev_dbg(hdev, "skb %p", skb); + + kfree_skb(hdev->sent_cmd); + + hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); + if (!hdev->sent_cmd) { + skb_queue_head(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); + return; + } + + err = hci_send_frame(hdev, skb); + if (err < 0) { + hci_cmd_sync_cancel_sync(hdev, err); + return; + } + + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); + + atomic_dec(&hdev->cmd_cnt); +} + static void hci_cmd_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work); @@ -4133,30 +4178,15 @@ static void hci_cmd_work(struct work_struct *work) if (!skb) return; - kfree_skb(hdev->sent_cmd); - - hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); - if (hdev->sent_cmd) { - int res; - if (hci_req_status_pend(hdev)) - hci_dev_set_flag(hdev, HCI_CMD_PENDING); - atomic_dec(&hdev->cmd_cnt); - - res = hci_send_frame(hdev, skb); - if (res < 0) - __hci_cmd_sync_cancel(hdev, -res); - - rcu_read_lock(); - if (test_bit(HCI_RESET, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - cancel_delayed_work(&hdev->cmd_timer); - else - queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, - HCI_CMD_TIMEOUT); - rcu_read_unlock(); - } else { - skb_queue_head(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); - } + hci_send_cmd_sync(hdev, skb); + + rcu_read_lock(); + if (test_bit(HCI_RESET, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + cancel_delayed_work(&hdev->cmd_timer); + else + queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, + HCI_CMD_TIMEOUT); + rcu_read_unlock(); } } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index f7e006a363829..4468647df6722 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -916,7 +916,7 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { - __hci_cmd_sync_cancel(hdev, ENODEV); + hci_cmd_sync_cancel_sync(hdev, ENODEV); cancel_interleave_scan(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c03729c10fdd6..d0029f10d9023 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -651,7 +651,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) mutex_unlock(&hdev->cmd_sync_work_lock); } -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) +void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -659,15 +659,17 @@ void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; - cancel_delayed_work_sync(&hdev->cmd_timer); - cancel_delayed_work_sync(&hdev->ncmd_timer); - atomic_set(&hdev->cmd_cnt, 1); - - wake_up_interruptible(&hdev->req_wait_q); + queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } } +EXPORT_SYMBOL(hci_cmd_sync_cancel); -void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) +/* Cancel ongoing command request synchronously: + * + * - Set result and mark status to HCI_REQ_CANCELED + * - Wakeup command sync thread + */ +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -675,10 +677,10 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; - queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); + wake_up_interruptible(&hdev->req_wait_q); } } -EXPORT_SYMBOL(hci_cmd_sync_cancel); +EXPORT_SYMBOL(hci_cmd_sync_cancel_sync); /* Submit HCI command to be run in as cmd_sync_work: * diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fbd859e2d13ca..4a35535f56607 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1403,7 +1403,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, /* Cancel potentially blocking sync operation before power off */ if (cp->val == 0x00) { - __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + hci_cmd_sync_cancel_sync(hdev, -EHOSTDOWN); err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); } else { -- GitLab From da77c1d39bc527b31890bfa0405763c82828defb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 16 Feb 2024 16:20:11 -0500 Subject: [PATCH 0938/2290] Bluetooth: hci_sync: Fix overwriting request callback [ Upstream commit 2615fd9a7c2507eb3be3fbe49dcec88a2f56454a ] In a few cases the stack may generate commands as responses to events which would happen to overwrite the sent_cmd, so this attempts to store the request in req_skb so even if sent_cmd is replaced with a new command the pending request will remain in stored in req_skb. Fixes: 6a98e3836fa2 ("Bluetooth: Add helper for serialized HCI command execution") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_core.c | 46 ++++++++++++++++++++++---------- net/bluetooth/hci_event.c | 18 ++++++------- net/bluetooth/hci_sync.c | 21 ++++++++++++--- 5 files changed, 61 insertions(+), 27 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2538f3b96623b..6bc6de5345261 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -549,6 +549,7 @@ struct hci_dev { __u32 req_status; __u32 req_result; struct sk_buff *req_skb; + struct sk_buff *req_rsp; void *smp_data; void *smp_bredr_data; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f752a9f9bb9c7..bac5a369d2bef 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2813,7 +2813,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) case HCI_EV_LE_CONN_COMPLETE: case HCI_EV_LE_ENHANCED_CONN_COMPLETE: case HCI_EVT_LE_CIS_ESTABLISHED: - hci_cmd_sync_cancel(hdev, -ECANCELED); + hci_cmd_sync_cancel(hdev, ECANCELED); break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index edf7af2e13557..e0c924df13b58 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1491,8 +1491,8 @@ static void hci_cmd_timeout(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_timer.work); - if (hdev->sent_cmd) { - u16 opcode = hci_skb_opcode(hdev->sent_cmd); + if (hdev->req_skb) { + u16 opcode = hci_skb_opcode(hdev->req_skb); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); @@ -2792,6 +2792,7 @@ void hci_release_dev(struct hci_dev *hdev) ida_simple_remove(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); + kfree_skb(hdev->req_skb); kfree_skb(hdev->recv_event); kfree(hdev); } @@ -3121,21 +3122,33 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_send); /* Get data from the previously sent command */ -void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) +static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode) { struct hci_command_hdr *hdr; - if (!hdev->sent_cmd) + if (!skb || skb->len < HCI_COMMAND_HDR_SIZE) return NULL; - hdr = (void *) hdev->sent_cmd->data; + hdr = (void *)skb->data; if (hdr->opcode != cpu_to_le16(opcode)) return NULL; - BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); + return skb->data + HCI_COMMAND_HDR_SIZE; +} - return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; +/* Get data from the previously sent command */ +void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) +{ + void *data; + + /* Check if opcode matches last sent command */ + data = hci_cmd_data(hdev->sent_cmd, opcode); + if (!data) + /* Check if opcode matches last request */ + data = hci_cmd_data(hdev->req_skb, opcode); + + return data; } /* Get data from last received event */ @@ -4031,17 +4044,19 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, if (!status && !hci_req_is_complete(hdev)) return; + skb = hdev->req_skb; + /* If this was the last command in a request the complete - * callback would be found in hdev->sent_cmd instead of the + * callback would be found in hdev->req_skb instead of the * command queue (hdev->cmd_q). */ - if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { - *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; + if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; return; } - if (bt_cb(hdev->sent_cmd)->hci.req_complete) { - *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; + if (skb && bt_cb(skb)->hci.req_complete) { + *req_complete = bt_cb(skb)->hci.req_complete; return; } @@ -4158,8 +4173,11 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (hci_req_status_pend(hdev)) - hci_dev_set_flag(hdev, HCI_CMD_PENDING); + if (hci_req_status_pend(hdev) && + !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { + kfree_skb(hdev->req_skb); + hdev->req_skb = skb_clone(skb, GFP_KERNEL); + } atomic_dec(&hdev->cmd_cnt); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0cd093ec6486c..6b746ab9f6d21 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4329,7 +4329,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, * (since for this kind of commands there will not be a command * complete event). */ - if (ev->status || (hdev->sent_cmd && !hci_skb_event(hdev->sent_cmd))) { + if (ev->status || (hdev->req_skb && !hci_skb_event(hdev->req_skb))) { hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { @@ -7147,10 +7147,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "subevent 0x%2.2x", ev->subevent); /* Only match event if command OGF is for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) == 0x08 && - hci_skb_event(hdev->sent_cmd) == ev->subevent) { - *opcode = hci_skb_opcode(hdev->sent_cmd); + if (hdev->req_skb && + hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 && + hci_skb_event(hdev->req_skb) == ev->subevent) { + *opcode = hci_skb_opcode(hdev->req_skb); hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, req_complete_skb); } @@ -7537,10 +7537,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) } /* Only match event if command OGF is not for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) != 0x08 && - hci_skb_event(hdev->sent_cmd) == event) { - hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->sent_cmd), + if (hdev->req_skb && + hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) != 0x08 && + hci_skb_event(hdev->req_skb) == event) { + hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->req_skb), status, &req_complete, &req_complete_skb); req_evt = event; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d0029f10d9023..65b2ad34179f8 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -31,6 +31,10 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; + /* Free the request command so it is not used as response */ + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + if (skb) { struct sock *sk = hci_skb_sk(skb); @@ -38,7 +42,7 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (sk) sock_put(sk); - hdev->req_skb = skb_get(skb); + hdev->req_rsp = skb_get(skb); } wake_up_interruptible(&hdev->req_wait_q); @@ -186,8 +190,8 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hdev->req_status = 0; hdev->req_result = 0; - skb = hdev->req_skb; - hdev->req_skb = NULL; + skb = hdev->req_rsp; + hdev->req_rsp = NULL; bt_dev_dbg(hdev, "end: err %d", err); @@ -4879,6 +4883,11 @@ int hci_dev_open_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + if (hdev->req_skb) { + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + } + clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); @@ -5040,6 +5049,12 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + /* Drop last request */ + if (hdev->req_skb) { + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + } + clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); -- GitLab From 68644bf5ec6baaff40fc39b3529c874bfda709bd Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 28 Feb 2024 10:49:26 -0500 Subject: [PATCH 0939/2290] Bluetooth: hci_core: Fix possible buffer overflow [ Upstream commit 81137162bfaa7278785b24c1fd2e9e74f082e8e4 ] struct hci_dev_info has a fixed size name[8] field so in the event that hdev->name is bigger than that strcpy would attempt to write past its size, so this fixes this problem by switching to use strscpy. Fixes: dcda165706b9 ("Bluetooth: hci_core: Fix build warnings") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e0c924df13b58..88e9d7e0865a2 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -908,7 +908,7 @@ int hci_get_dev_info(void __user *arg) else flags = hdev->flags; - strcpy(di.name, hdev->name); + strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); di.flags = flags; -- GitLab From cb8adca52f306563d958a863bb0cbae9c184d1ae Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 1 Mar 2024 12:58:11 -0500 Subject: [PATCH 0940/2290] Bluetooth: af_bluetooth: Fix deadlock [ Upstream commit f7b94bdc1ec107c92262716b073b3e816d4784fb ] Attemting to do sock_lock on .recvmsg may cause a deadlock as shown bellow, so instead of using sock_sock this uses sk_receive_queue.lock on bt_sock_ioctl to avoid the UAF: INFO: task kworker/u9:1:121 blocked for more than 30 seconds. Not tainted 6.7.6-lemon #183 Workqueue: hci0 hci_rx_work Call Trace: __schedule+0x37d/0xa00 schedule+0x32/0xe0 __lock_sock+0x68/0xa0 ? __pfx_autoremove_wake_function+0x10/0x10 lock_sock_nested+0x43/0x50 l2cap_sock_recv_cb+0x21/0xa0 l2cap_recv_frame+0x55b/0x30a0 ? psi_task_switch+0xeb/0x270 ? finish_task_switch.isra.0+0x93/0x2a0 hci_rx_work+0x33a/0x3f0 process_one_work+0x13a/0x2f0 worker_thread+0x2f0/0x410 ? __pfx_worker_thread+0x10/0x10 kthread+0xe0/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 2e07e8348ea4 ("Bluetooth: af_bluetooth: Fix Use-After-Free in bt_sock_recvmsg") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/af_bluetooth.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f1b7510359e4b..3f9ff02baafe3 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -264,14 +264,11 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; - lock_sock(sk); - skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - release_sock(sk); return err; } @@ -297,8 +294,6 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); - release_sock(sk); - if (flags & MSG_TRUNC) copied = skblen; @@ -521,10 +516,11 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (sk->sk_state == BT_LISTEN) return -EINVAL; - lock_sock(sk); + spin_lock(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; - release_sock(sk); + spin_unlock(&sk->sk_receive_queue.lock); + err = put_user(amount, (int __user *)arg); break; -- GitLab From 715264ad09fd4004e347cdb79fa58a4f2344f13f Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 2 Mar 2024 19:06:23 +0200 Subject: [PATCH 0941/2290] Bluetooth: fix use-after-free in accessing skb after sending it [ Upstream commit 947ec0d002dce8577b655793dcc6fc78d67b7cb6 ] hci_send_cmd_sync first sends skb and then tries to clone it. However, the driver may have already freed the skb at that point. Fix by cloning the sent_cmd cloned just above, instead of the original. Log: ================================================================ BUG: KASAN: slab-use-after-free in __copy_skb_header+0x1a/0x240 ... Call Trace: .. __skb_clone+0x59/0x2c0 hci_cmd_work+0x3b3/0x3d0 [bluetooth] process_one_work+0x459/0x900 ... Allocated by task 129: ... __alloc_skb+0x1ae/0x220 __hci_cmd_sync_sk+0x44c/0x7a0 [bluetooth] __hci_cmd_sync_status+0x24/0xb0 [bluetooth] set_cig_params_sync+0x778/0x7d0 [bluetooth] ... Freed by task 0: ... kmem_cache_free+0x157/0x3c0 __usb_hcd_giveback_urb+0x11e/0x1e0 usb_giveback_urb_bh+0x1ad/0x2a0 tasklet_action_common.isra.0+0x259/0x4a0 __do_softirq+0x15b/0x5a7 ================================================================ Fixes: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 88e9d7e0865a2..70f24dc75b596 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4176,7 +4176,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) if (hci_req_status_pend(hdev) && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); - hdev->req_skb = skb_clone(skb, GFP_KERNEL); + hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); } atomic_dec(&hdev->cmd_cnt); -- GitLab From 9c402819620a842cbfe39359a3ddfaac9adc8384 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 5 Mar 2024 07:59:27 +0000 Subject: [PATCH 0942/2290] sr9800: Add check for usbnet_get_endpoints [ Upstream commit 07161b2416f740a2cb87faa5566873f401440a61 ] Add check for usbnet_get_endpoints() and return the error if it fails in order to transfer the error. Signed-off-by: Chen Ni Reviewed-by: Simon Horman Fixes: 19a38d8e0aa3 ("USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support") Link: https://lore.kernel.org/r/20240305075927.261284-1-nichen@iscas.ac.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/sr9800.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index f5e19f3ef6cdd..4de5144821835 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -737,7 +737,9 @@ static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf) data->eeprom_len = SR9800_EEPROM_LEN; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + goto out; /* LED Setting Rule : * AABB:CCDD -- GitLab From 98a2feb8ece65f77e80e9b730f60d744f070b6fb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 2 Mar 2024 20:22:09 +0100 Subject: [PATCH 0943/2290] s390/cache: prevent rebuild of shared_cpu_list [ Upstream commit cb0cd4ee11142339f2d47eef6db274290b7a482d ] With commit 36bbc5b4ffab ("cacheinfo: Allow early detection and population of cache attributes") the shared cpu list for each cache level higher than L1 is rebuilt even if the list already has been set up. This is caused by the removal of the cpumask_empty() check within cache_shared_cpu_map_setup(). However architectures can enforce that the shared cpu list is not rebuilt by simply setting cpu_map_populated of the per cpu cache info structure to true, which is also the fix for this problem. Before: $ cat /sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_list 0-7 After: $ cat /sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_list 1 Fixes: 36bbc5b4ffab ("cacheinfo: Allow early detection and population of cache attributes") Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 7ee3651d00abe..732024ca005ad 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -166,5 +166,6 @@ int populate_cache_leaves(unsigned int cpu) ci_leaf_init(this_leaf++, pvt, ctype, level, cpu); } } + this_cpu_ci->cpu_map_populated = true; return 0; } -- GitLab From edf7990baa48de5097daa9ac02e06cb4c798a737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:35 +0100 Subject: [PATCH 0944/2290] bpf: Fix DEVMAP_HASH overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 281d464a34f540de166cee74b723e97ac2515ec3 ] The devmap code allocates a number hash buckets equal to the next power of two of the max_entries value provided when creating the map. When rounding up to the next power of two, the 32-bit variable storing the number of buckets can overflow, and the code checks for overflow by checking if the truncated 32-bit value is equal to 0. However, on 32-bit arches the rounding up itself can overflow mid-way through, because it ends up doing a left-shift of 32 bits on an unsigned long value. If the size of an unsigned long is four bytes, this is undefined behaviour, so there is no guarantee that we'll end up with a nice and tidy 0-value at the end. Syzbot managed to turn this into a crash on arm32 by creating a DEVMAP_HASH with max_entries > 0x80000000 and then trying to update it. Fix this by moving the overflow check to before the rounding up operation. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Link: https://lore.kernel.org/r/000000000000ed666a0611af6818@google.com Reported-and-tested-by: syzbot+8cd36f6b65f3cafd400a@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Message-ID: <20240307120340.99577-2-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/devmap.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index f9a87dcc5535b..e051cbb07dac0 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -131,13 +131,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) bpf_map_init_from_attr(&dtab->map, attr); if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { - dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); - - if (!dtab->n_buckets) /* Overflow check */ + /* hash table size must be power of 2; roundup_pow_of_two() can + * overflow into UB on 32-bit arches, so check that first + */ + if (dtab->map.max_entries > 1UL << 31) return -EINVAL; - } - if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); + dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, dtab->map.numa_node); if (!dtab->dev_index_head) -- GitLab From a83fdaeaea3677b83a53f72ace2d73a19bcd6d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:36 +0100 Subject: [PATCH 0945/2290] bpf: Fix hashtab overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6787d916c2cf9850c97a0a3f73e08c43e7d973b1 ] The hashtab code relies on roundup_pow_of_two() to compute the number of hash buckets, and contains an overflow check by checking if the resulting value is 0. However, on 32-bit arches, the roundup code itself can overflow by doing a 32-bit left-shift of an unsigned long value, which is undefined behaviour, so it is not guaranteed to truncate neatly. This was triggered by syzbot on the DEVMAP_HASH type, which contains the same check, copied from the hashtab code. So apply the same fix to hashtab, by moving the overflow check to before the roundup. Fixes: daaf427c6ab3 ("bpf: fix arraymap NULL deref and missing overflow and zero size checks") Signed-off-by: Toke Høiland-Jørgensen Message-ID: <20240307120340.99577-3-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/hashtab.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 88c71de0a0a95..0c74cc9012d5c 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -495,7 +495,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) num_possible_cpus()); } - /* hash table size must be power of 2 */ + /* hash table size must be power of 2; roundup_pow_of_two() can overflow + * into UB on 32-bit arches, so check that first + */ + err = -E2BIG; + if (htab->map.max_entries > 1UL << 31) + goto free_htab; + htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); htab->elem_size = sizeof(struct htab_elem) + @@ -505,10 +511,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) else htab->elem_size += round_up(htab->map.value_size, 8); - err = -E2BIG; - /* prevent zero size kmalloc and check for u32 overflow */ - if (htab->n_buckets == 0 || - htab->n_buckets > U32_MAX / sizeof(struct bucket)) + /* check for u32 overflow */ + if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) goto free_htab; err = -ENOMEM; -- GitLab From f06899582ccee09bd85d0696290e3eaca9aa042d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:37 +0100 Subject: [PATCH 0946/2290] bpf: Fix stackmap overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7a4b21250bf79eef26543d35bd390448646c536b ] The stackmap code relies on roundup_pow_of_two() to compute the number of hash buckets, and contains an overflow check by checking if the resulting value is 0. However, on 32-bit arches, the roundup code itself can overflow by doing a 32-bit left-shift of an unsigned long value, which is undefined behaviour, so it is not guaranteed to truncate neatly. This was triggered by syzbot on the DEVMAP_HASH type, which contains the same check, copied from the hashtab code. The commit in the fixes tag actually attempted to fix this, but the fix did not account for the UB, so the fix only works on CPUs where an overflow does result in a neat truncation to zero, which is not guaranteed. Checking the value before rounding does not have this problem. Fixes: 6183f4d3a0a2 ("bpf: Check for integer overflow when using roundup_pow_of_two()") Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Bui Quang Minh Message-ID: <20240307120340.99577-4-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/stackmap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index f86db3cf72123..f0fd936cef319 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -94,11 +94,14 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) } else if (value_size / 8 > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); - /* hash table size must be power of 2 */ - n_buckets = roundup_pow_of_two(attr->max_entries); - if (!n_buckets) + /* hash table size must be power of 2; roundup_pow_of_two() can overflow + * into UB on 32-bit arches, so check that first + */ + if (attr->max_entries > 1UL << 31) return ERR_PTR(-E2BIG); + n_buckets = roundup_pow_of_two(attr->max_entries); + cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); if (!smap) -- GitLab From 586e19c88a0cb58b6ff45ae085b3dd200d862153 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:35 +0800 Subject: [PATCH 0947/2290] iommu/vt-d: Retrieve IOMMU perfmon capability information [ Upstream commit a6a5006dad572a53b5df3f47e1471d207ae9ba49 ] The performance monitoring infrastructure, perfmon, is to support collection of information about key events occurring during operation of the remapping hardware, to aid performance tuning and debug. Each remapping hardware unit has capability registers that indicate support for performance monitoring features and enumerate the capabilities. Add alloc_iommu_pmu() to retrieve IOMMU perfmon capability information for each iommu unit. The information is stored in the iommu->pmu data structure. Capability registers are read-only, so it's safe to prefetch and store them in the pmu structure. This could avoid unnecessary VMEXIT when this code is running in the virtualization environment. Add free_iommu_pmu() to free the saved capability information when freeing the iommu unit. Add a kernel config option for the IOMMU perfmon feature. Unless a user explicitly uses the perf tool to monitor the IOMMU perfmon event, there isn't any impact for the existing IOMMU. Enable it by default. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-3-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Stable-dep-of: 70bad345e622 ("iommu: Fix compilation without CONFIG_IOMMU_INTEL") Signed-off-by: Sasha Levin --- drivers/iommu/intel/Kconfig | 11 +++ drivers/iommu/intel/Makefile | 1 + drivers/iommu/intel/dmar.c | 7 ++ drivers/iommu/intel/iommu.h | 43 ++++++++- drivers/iommu/intel/perfmon.c | 172 ++++++++++++++++++++++++++++++++++ drivers/iommu/intel/perfmon.h | 40 ++++++++ 6 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/intel/perfmon.c create mode 100644 drivers/iommu/intel/perfmon.h diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index b7dff5092fd21..12e1e90fdae13 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -96,4 +96,15 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON passing intel_iommu=sm_on to the kernel. If not sure, please use the default value. +config INTEL_IOMMU_PERF_EVENTS + def_bool y + bool "Intel IOMMU performance events" + depends on INTEL_IOMMU && PERF_EVENTS + help + Selecting this option will enable the performance monitoring + infrastructure in the Intel IOMMU. It collects information about + key events occurring during operation of the remapping hardware, + to aid performance tuning and debug. These are available on modern + processors which support Intel VT-d 4.0 and later. + endif # INTEL_IOMMU diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index fa0dae16441cb..7af3b8a4f2a00 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 418af1db0192d..4759f79ad7b94 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -34,6 +34,7 @@ #include "../irq_remapping.h" #include "perf.h" #include "trace.h" +#include "perfmon.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -1104,6 +1105,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (sts & DMA_GSTS_QIES) iommu->gcmd |= DMA_GCMD_QIE; + if (alloc_iommu_pmu(iommu)) + pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id); + raw_spin_lock_init(&iommu->register_lock); /* @@ -1131,6 +1135,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err_sysfs: iommu_device_sysfs_remove(&iommu->iommu); err_unmap: + free_iommu_pmu(iommu); unmap_iommu(iommu); error_free_seq_id: ida_free(&dmar_seq_ids, iommu->seq_id); @@ -1146,6 +1151,8 @@ static void free_iommu(struct intel_iommu *iommu) iommu_device_sysfs_remove(&iommu->iommu); } + free_iommu_pmu(iommu); + if (iommu->irq) { if (iommu->pr_irq) { free_irq(iommu->pr_irq, iommu); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index c99cb715bd9a2..c1348bedab3b3 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -125,6 +125,11 @@ #define DMAR_MTRR_PHYSMASK8_REG 0x208 #define DMAR_MTRR_PHYSBASE9_REG 0x210 #define DMAR_MTRR_PHYSMASK9_REG 0x218 +#define DMAR_PERFCAP_REG 0x300 +#define DMAR_PERFCFGOFF_REG 0x310 +#define DMAR_PERFOVFOFF_REG 0x318 +#define DMAR_PERFCNTROFF_REG 0x31c +#define DMAR_PERFEVNTCAP_REG 0x380 #define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ #define DMAR_VCMD_REG 0xe00 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ @@ -148,6 +153,7 @@ */ #define cap_esrtps(c) (((c) >> 63) & 1) #define cap_esirtps(c) (((c) >> 62) & 1) +#define cap_ecmds(c) (((c) >> 61) & 1) #define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) @@ -179,7 +185,8 @@ * Extended Capability Register */ -#define ecap_rps(e) (((e) >> 49) & 0x1) +#define ecap_pms(e) (((e) >> 51) & 0x1) +#define ecap_rps(e) (((e) >> 49) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) @@ -210,6 +217,22 @@ #define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) #define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ +/* + * Decoding Perf Capability Register + */ +#define pcap_num_cntr(p) ((p) & 0xffff) +#define pcap_cntr_width(p) (((p) >> 16) & 0x7f) +#define pcap_num_event_group(p) (((p) >> 24) & 0x1f) +#define pcap_filters_mask(p) (((p) >> 32) & 0x1f) +#define pcap_interrupt(p) (((p) >> 50) & 0x1) +/* The counter stride is calculated as 2 ^ (x+10) bytes */ +#define pcap_cntr_stride(p) (1ULL << ((((p) >> 52) & 0x7) + 10)) + +/* + * Decoding Perf Event Capability Register + */ +#define pecap_es(p) ((p) & 0xfffffff) + /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ @@ -561,6 +584,22 @@ struct dmar_domain { iommu core */ }; +struct iommu_pmu { + struct intel_iommu *iommu; + u32 num_cntr; /* Number of counters */ + u32 num_eg; /* Number of event group */ + u32 cntr_width; /* Counter width */ + u32 cntr_stride; /* Counter Stride */ + u32 filter; /* Bitmask of filter support */ + void __iomem *base; /* the PerfMon base address */ + void __iomem *cfg_reg; /* counter configuration base address */ + void __iomem *cntr_reg; /* counter 0 address*/ + void __iomem *overflow; /* overflow status register */ + + u64 *evcap; /* Indicates all supported events */ + u32 **cntr_evcap; /* Supported events of each counter. */ +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -608,6 +647,8 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; void *perf_statistic; + + struct iommu_pmu *pmu; }; /* PCI domain-device relationship */ diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c new file mode 100644 index 0000000000000..db5791a544551 --- /dev/null +++ b/drivers/iommu/intel/perfmon.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support Intel IOMMU PerfMon + * Copyright(c) 2023 Intel Corporation. + */ +#define pr_fmt(fmt) "DMAR: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include "iommu.h" +#include "perfmon.h" + +static inline void __iomem * +get_perf_reg_address(struct intel_iommu *iommu, u32 offset) +{ + u32 off = dmar_readl(iommu->reg + offset); + + return iommu->reg + off; +} + +int alloc_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu; + int i, j, ret; + u64 perfcap; + u32 cap; + + if (!ecap_pms(iommu->ecap)) + return 0; + + /* The IOMMU PMU requires the ECMD support as well */ + if (!cap_ecmds(iommu->cap)) + return -ENODEV; + + perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); + /* The performance monitoring is not supported. */ + if (!perfcap) + return -ENODEV; + + /* Sanity check for the number of the counters and event groups */ + if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) + return -ENODEV; + + /* The interrupt on overflow is required */ + if (!pcap_interrupt(perfcap)) + return -ENODEV; + + iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); + if (!iommu_pmu) + return -ENOMEM; + + iommu_pmu->num_cntr = pcap_num_cntr(perfcap); + iommu_pmu->cntr_width = pcap_cntr_width(perfcap); + iommu_pmu->filter = pcap_filters_mask(perfcap); + iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); + iommu_pmu->num_eg = pcap_num_event_group(perfcap); + + iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL); + if (!iommu_pmu->evcap) { + ret = -ENOMEM; + goto free_pmu; + } + + /* Parse event group capabilities */ + for (i = 0; i < iommu_pmu->num_eg; i++) { + u64 pcap; + + pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + + i * IOMMU_PMU_CAP_REGS_STEP); + iommu_pmu->evcap[i] = pecap_es(pcap); + } + + iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap) { + ret = -ENOMEM; + goto free_pmu_evcap; + } + for (i = 0; i < iommu_pmu->num_cntr; i++) { + iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap[i]) { + ret = -ENOMEM; + goto free_pmu_cntr_evcap; + } + /* + * Set to the global capabilities, will adjust according + * to per-counter capabilities later. + */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; + } + + iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); + iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); + iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); + + /* + * Check per-counter capabilities. All counters should have the + * same capabilities on Interrupt on Overflow Support and Counter + * Width. + */ + for (i = 0; i < iommu_pmu->num_cntr; i++) { + cap = dmar_readl(iommu_pmu->cfg_reg + + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTRCAP_OFFSET); + if (!iommu_cntrcap_pcc(cap)) + continue; + + /* + * It's possible that some counters have a different + * capability because of e.g., HW bug. Check the corner + * case here and simply drop those counters. + */ + if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || + !iommu_cntrcap_ios(cap)) { + iommu_pmu->num_cntr = i; + pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n", + iommu_pmu->num_cntr); + } + + /* Clear the pre-defined events group */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = 0; + + /* Override with per-counter event capabilities */ + for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { + cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTREVCAP_OFFSET + + (j * IOMMU_PMU_OFF_REGS_STEP)); + iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); + /* + * Some events may only be supported by a specific counter. + * Track them in the evcap as well. + */ + iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); + } + } + + iommu_pmu->iommu = iommu; + iommu->pmu = iommu_pmu; + + return 0; + +free_pmu_cntr_evcap: + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); +free_pmu_evcap: + kfree(iommu_pmu->evcap); +free_pmu: + kfree(iommu_pmu); + + return ret; +} + +void free_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) + return; + + if (iommu_pmu->evcap) { + int i; + + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); + } + kfree(iommu_pmu->evcap); + kfree(iommu_pmu); + iommu->pmu = NULL; +} diff --git a/drivers/iommu/intel/perfmon.h b/drivers/iommu/intel/perfmon.h new file mode 100644 index 0000000000000..4b0d9c1fea6ff --- /dev/null +++ b/drivers/iommu/intel/perfmon.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * PERFCFGOFF_REG, PERFFRZOFF_REG + * PERFOVFOFF_REG, PERFCNTROFF_REG + */ +#define IOMMU_PMU_NUM_OFF_REGS 4 +#define IOMMU_PMU_OFF_REGS_STEP 4 + +#define IOMMU_PMU_CFG_OFFSET 0x100 +#define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80 +#define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84 +#define IOMMU_PMU_CFG_SIZE 0x8 +#define IOMMU_PMU_CFG_FILTERS_OFFSET 0x4 + +#define IOMMU_PMU_CAP_REGS_STEP 8 + +#define iommu_cntrcap_pcc(p) ((p) & 0x1) +#define iommu_cntrcap_cw(p) (((p) >> 8) & 0xff) +#define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1) +#define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf) + +#define iommu_event_select(p) ((p) & 0xfffffff) +#define iommu_event_group(p) (((p) >> 28) & 0xf) + +#ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS +int alloc_iommu_pmu(struct intel_iommu *iommu); +void free_iommu_pmu(struct intel_iommu *iommu); +#else +static inline int +alloc_iommu_pmu(struct intel_iommu *iommu) +{ + return 0; +} + +static inline void +free_iommu_pmu(struct intel_iommu *iommu) +{ +} +#endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */ -- GitLab From 8c91a4bfc7f8f42e228ba91eb37d0d33c3450311 Mon Sep 17 00:00:00 2001 From: Bert Karwatzki Date: Thu, 7 Mar 2024 20:44:19 +0100 Subject: [PATCH 0948/2290] iommu: Fix compilation without CONFIG_IOMMU_INTEL [ Upstream commit 70bad345e622c23bb530016925c936ab04a646ac ] When the kernel is comiled with CONFIG_IRQ_REMAP=y but without CONFIG_IOMMU_INTEL compilation fails since commit def054b01a8678 with an undefined reference to device_rbtree_find(). This patch makes sure that intel specific code is only compiled with CONFIG_IOMMU_INTEL=y. Signed-off-by: Bert Karwatzki Fixes: 80a9b50c0b9e ("iommu/vt-d: Improve ITE fault handling if target device isn't present") Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240307194419.15801-1-spasswolf@web.de Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/Kconfig | 2 +- drivers/iommu/intel/Makefile | 2 ++ drivers/iommu/irq_remapping.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index dc5f7a156ff5e..dc19e7fb07cfe 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -192,7 +192,7 @@ source "drivers/iommu/intel/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI - select DMAR_TABLE + select DMAR_TABLE if INTEL_IOMMU help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index 7af3b8a4f2a00..29d26a4371327 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -5,5 +5,7 @@ obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o +ifdef CONFIG_INTEL_IOMMU obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +endif obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 83314b9d8f38b..ee59647c20501 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -99,7 +99,8 @@ int __init irq_remapping_prepare(void) if (disable_irq_remap) return -ENOSYS; - if (intel_irq_remap_ops.prepare() == 0) + if (IS_ENABLED(CONFIG_INTEL_IOMMU) && + intel_irq_remap_ops.prepare() == 0) remap_ops = &intel_irq_remap_ops; else if (IS_ENABLED(CONFIG_AMD_IOMMU) && amd_iommu_irq_ops.prepare() == 0) -- GitLab From 66e74f2f74a6930a93ef9fa581bf3ff77ba41692 Mon Sep 17 00:00:00 2001 From: Shiming Cheng Date: Thu, 7 Mar 2024 18:01:57 +0800 Subject: [PATCH 0949/2290] ipv6: fib6_rules: flush route cache when rule is changed [ Upstream commit c4386ab4f6c600f75fdfd21143f89bac3e625d0d ] When rule policy is changed, ipv6 socket cache is not refreshed. The sock's skb still uses a outdated route cache and was sent to a wrong interface. To avoid this error we should update fib node's version when rule is changed. Then skb's route will be reroute checked as route cache version is already different with fib node version. The route cache is refreshed to match the latest rule. Fixes: 101367c2f8c4 ("[IPV6]: Policy Routing Rules") Signed-off-by: Shiming Cheng Signed-off-by: Lena Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/fib6_rules.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 7c20038330104..be52b18e08a6b 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -449,6 +449,11 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } +static void fib6_rule_flush_cache(struct fib_rules_ops *ops) +{ + rt_genid_bump_ipv6(ops->fro_net); +} + static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), @@ -461,6 +466,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, + .flush_cache = fib6_rule_flush_cache, .nlgroup = RTNLGRP_IPV6_RULE, .owner = THIS_MODULE, .fro_net = &init_net, -- GitLab From 60044ab84836359534bd7153b92e9c1584140e4a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 10:07:16 +0000 Subject: [PATCH 0950/2290] net: ip_tunnel: make sure to pull inner header in ip_tunnel_rcv() [ Upstream commit b0ec2abf98267f14d032102551581c833b0659d3 ] Apply the same fix than ones found in : 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") 1ca1ba465e55 ("geneve: make sure to pull inner header in geneve_rx()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. syzbot reported: BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in ip_tunnel_rcv+0xed9/0x2ed0 net/ipv4/ip_tunnel.c:409 __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] ip_tunnel_rcv+0xed9/0x2ed0 net/ipv4/ip_tunnel.c:409 __ipgre_rcv+0x9bc/0xbc0 net/ipv4/ip_gre.c:389 ipgre_rcv net/ipv4/ip_gre.c:411 [inline] gre_rcv+0x423/0x19f0 net/ipv4/ip_gre.c:447 gre_rcv+0x2a4/0x390 net/ipv4/gre_demux.c:163 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 netif_receive_skb_internal net/core/dev.c:5734 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5793 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1556 tun_get_user+0x53b9/0x66e0 drivers/net/tun.c:2009 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2055 call_write_iter include/linux/fs.h:2087 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb6b/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: __alloc_pages+0x9a6/0xe00 mm/page_alloc.c:4590 alloc_pages_mpol+0x62b/0x9d0 mm/mempolicy.c:2133 alloc_pages+0x1be/0x1e0 mm/mempolicy.c:2204 skb_page_frag_refill+0x2bf/0x7c0 net/core/sock.c:2909 tun_build_skb drivers/net/tun.c:1686 [inline] tun_get_user+0xe0a/0x66e0 drivers/net/tun.c:1826 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2055 call_write_iter include/linux/fs.h:2087 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb6b/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ip_tunnel.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 328f9068c6a43..3445e576b05bc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -364,7 +364,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, bool log_ecn_error) { const struct iphdr *iph = ip_hdr(skb); - int err; + int nh, err; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -390,8 +390,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + iph = (struct iphdr *)(skb->head + nh); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) -- GitLab From 2a2ff709511617de9c6c072eeee82bcbbdfecaf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20L=27h=C3=B4pital?= Date: Thu, 7 Mar 2024 12:19:06 +0100 Subject: [PATCH 0951/2290] net: phy: fix phy_get_internal_delay accessing an empty array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4469c0c5b14a0919f5965c7ceac96b523eb57b79 ] The phy_get_internal_delay function could try to access to an empty array in the case that the driver is calling phy_get_internal_delay without defining delay_values and rx-internal-delay-ps or tx-internal-delay-ps is defined to 0 in the device-tree. This will lead to "unable to handle kernel NULL pointer dereference at virtual address 0". To avoid this kernel oops, the test should be delay >= 0. As there is already delay < 0 test just before, the test could only be size == 0. Fixes: 92252eec913b ("net: phy: Add a helper to return the index for of the internal delay") Co-developed-by: Enguerrand de Ribaucourt Signed-off-by: Enguerrand de Ribaucourt Signed-off-by: Kévin L'hôpital Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 944f76e6fc8eb..45b07004669d6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2893,7 +2893,7 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, if (delay < 0) return delay; - if (delay && size == 0) + if (size == 0) return delay; if (delay < delay_values[0] || delay > delay_values[size - 1]) { -- GitLab From 19af2ce84cd9a4d0c760fa913716393520ef8be4 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 7 Mar 2024 09:01:08 +0800 Subject: [PATCH 0952/2290] net: hns3: fix wrong judgment condition issue [ Upstream commit 07a1d6dc90baedcf5d713e2b003b9e387130ee30 ] In hns3_dcbnl_ieee_delapp, should check ieee_delapp not ieee_setapp. This path fix the wrong judgment. Fixes: 0ba22bcb222d ("net: hns3: add support config dscp map to tc") Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c index 3b6dbf158b98d..f72dc0cee30e5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c @@ -76,7 +76,7 @@ static int hns3_dcbnl_ieee_delapp(struct net_device *ndev, struct dcb_app *app) if (hns3_nic_resetting(ndev)) return -EBUSY; - if (h->kinfo.dcb_ops->ieee_setapp) + if (h->kinfo.dcb_ops->ieee_delapp) return h->kinfo.dcb_ops->ieee_delapp(h, app); return -EOPNOTSUPP; -- GitLab From b3cf70472a600bcb2efe24906bc9bc6014d4c6f6 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 7 Mar 2024 09:01:11 +0800 Subject: [PATCH 0953/2290] net: hns3: fix kernel crash when 1588 is received on HIP08 devices [ Upstream commit 0fbcf2366ba9888cf02eda23e35fde7f7fcc07c3 ] The HIP08 devices does not register the ptp devices, so the hdev->ptp is NULL, but the hardware can receive 1588 messages, and set the HNS3_RXD_TS_VLD_B bit, so, if match this case, the access of hdev->ptp->flags will cause a kernel crash: [ 5888.946472] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000018 [ 5888.946475] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000018 ... [ 5889.266118] pc : hclge_ptp_get_rx_hwts+0x40/0x170 [hclge] [ 5889.272612] lr : hclge_ptp_get_rx_hwts+0x34/0x170 [hclge] [ 5889.279101] sp : ffff800012c3bc50 [ 5889.283516] x29: ffff800012c3bc50 x28: ffff2040002be040 [ 5889.289927] x27: ffff800009116484 x26: 0000000080007500 [ 5889.296333] x25: 0000000000000000 x24: ffff204001c6f000 [ 5889.302738] x23: ffff204144f53c00 x22: 0000000000000000 [ 5889.309134] x21: 0000000000000000 x20: ffff204004220080 [ 5889.315520] x19: ffff204144f53c00 x18: 0000000000000000 [ 5889.321897] x17: 0000000000000000 x16: 0000000000000000 [ 5889.328263] x15: 0000004000140ec8 x14: 0000000000000000 [ 5889.334617] x13: 0000000000000000 x12: 00000000010011df [ 5889.340965] x11: bbfeff4d22000000 x10: 0000000000000000 [ 5889.347303] x9 : ffff800009402124 x8 : 0200f78811dfbb4d [ 5889.353637] x7 : 2200000000191b01 x6 : ffff208002a7d480 [ 5889.359959] x5 : 0000000000000000 x4 : 0000000000000000 [ 5889.366271] x3 : 0000000000000000 x2 : 0000000000000000 [ 5889.372567] x1 : 0000000000000000 x0 : ffff20400095c080 [ 5889.378857] Call trace: [ 5889.382285] hclge_ptp_get_rx_hwts+0x40/0x170 [hclge] [ 5889.388304] hns3_handle_bdinfo+0x324/0x410 [hns3] [ 5889.394055] hns3_handle_rx_bd+0x60/0x150 [hns3] [ 5889.399624] hns3_clean_rx_ring+0x84/0x170 [hns3] [ 5889.405270] hns3_nic_common_poll+0xa8/0x220 [hns3] [ 5889.411084] napi_poll+0xcc/0x264 [ 5889.415329] net_rx_action+0xd4/0x21c [ 5889.419911] __do_softirq+0x130/0x358 [ 5889.424484] irq_exit+0x134/0x154 [ 5889.428700] __handle_domain_irq+0x88/0xf0 [ 5889.433684] gic_handle_irq+0x78/0x2c0 [ 5889.438319] el1_irq+0xb8/0x140 [ 5889.442354] arch_cpu_idle+0x18/0x40 [ 5889.446816] default_idle_call+0x5c/0x1c0 [ 5889.451714] cpuidle_idle_call+0x174/0x1b0 [ 5889.456692] do_idle+0xc8/0x160 [ 5889.460717] cpu_startup_entry+0x30/0xfc [ 5889.465523] secondary_start_kernel+0x158/0x1ec [ 5889.470936] Code: 97ffab78 f9411c14 91408294 f9457284 (f9400c80) [ 5889.477950] SMP: stopping secondary CPUs [ 5890.514626] SMP: failed to stop secondary CPUs 0-69,71-95 [ 5890.522951] Starting crashdump kernel... Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index a40b1583f1149..0f06f95b09bc2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -120,7 +120,7 @@ void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb, u64 ns = nsec; u32 sec_h; - if (!test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags)) + if (!hdev->ptp || !test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags)) return; /* Since the BD does not have enough space for the higher 16 bits of -- GitLab From 8ec278830c1a13776f49f70041ad9cd1146a2af0 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Thu, 7 Mar 2024 09:01:14 +0800 Subject: [PATCH 0954/2290] net: hns3: fix port duplex configure error in IMP reset [ Upstream commit 11d80f79dd9f871a52feba4bf24b5ac39f448eb7 ] Currently, the mac port is fixed to configured as full dplex mode in hclge_mac_init() when driver initialization or reset restore. Users may change the mode to half duplex with ethtool, so it may cause the user configuration dropped after reset. To fix it, don't change the duplex mode when resetting. Fixes: 2d03eacc0b7e ("net: hns3: Only update mac configuation when necessary") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 48b0cb5ec5d29..27037ce795902 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2990,7 +2990,10 @@ static int hclge_mac_init(struct hclge_dev *hdev) int ret; hdev->support_sfp_query = true; - hdev->hw.mac.duplex = HCLGE_MAC_FULL; + + if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + hdev->hw.mac.duplex = HCLGE_MAC_FULL; + ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed, hdev->hw.mac.duplex, hdev->hw.mac.lane_num); if (ret) -- GitLab From f30e6322bc518ba6a9428bb3851223f09870001e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 6 Jul 2023 12:06:32 -0700 Subject: [PATCH 0955/2290] Bluetooth: MGMT: Fix always using HCI_MAX_AD_LENGTH [ Upstream commit 112b5090c21905531314fee41f691f0317bbf4f6 ] HCI_MAX_AD_LENGTH shall only be used if the controller doesn't support extended advertising, otherwise HCI_MAX_EXT_AD_LENGTH shall be used instead. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2ab3e8d67fc1 ("Bluetooth: Fix eir name length") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 4 ++++ net/bluetooth/hci_event.c | 12 +++++++----- net/bluetooth/mgmt.c | 6 +++--- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6bc6de5345261..53155cb703b5d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1710,6 +1710,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) +/* Maximum advertising length */ +#define max_adv_len(dev) \ + (ext_adv_capable(dev) ? HCI_MAX_EXT_AD_LENGTH : HCI_MAX_AD_LENGTH) + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789: * * C24: Mandatory if the LE Controller supports Connection State and either diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6b746ab9f6d21..b150dee88f35c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1761,7 +1761,7 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr, { struct discovery_state *d = &hdev->discovery; - if (len > HCI_MAX_AD_LENGTH) + if (len > max_adv_len(hdev)) return; bacpy(&d->last_adv_addr, bdaddr); @@ -6240,8 +6240,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, return; } - if (!ext_adv && len > HCI_MAX_AD_LENGTH) { - bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes"); + if (len > max_adv_len(hdev)) { + bt_dev_err_ratelimited(hdev, + "adv larger than maximum supported"); return; } @@ -6306,7 +6307,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved, type); - if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) { + if (!ext_adv && conn && type == LE_ADV_IND && + len <= max_adv_len(hdev)) { /* Store report for later inclusion by * mgmt_device_connected */ @@ -6447,7 +6449,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, info->length + 1)) break; - if (info->length <= HCI_MAX_AD_LENGTH) { + if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, NULL, 0, rssi, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4a35535f56607..1486fb9bb78f7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8436,8 +8436,8 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, supported_flags = get_supported_adv_flags(hdev); rp->supported_flags = cpu_to_le32(supported_flags); - rp->max_adv_data_len = HCI_MAX_AD_LENGTH; - rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; + rp->max_adv_data_len = max_adv_len(hdev); + rp->max_scan_rsp_len = max_adv_len(hdev); rp->max_instances = hdev->le_num_of_adv_sets; rp->num_instances = hdev->adv_instance_cnt; @@ -8473,7 +8473,7 @@ static u8 calculate_name_len(struct hci_dev *hdev) static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, bool is_adv_data) { - u8 max_len = HCI_MAX_AD_LENGTH; + u8 max_len = max_adv_len(hdev); if (is_adv_data) { if (adv_flags & (MGMT_ADV_FLAG_DISCOV | -- GitLab From 99f30e12e588f9982a6eb1916e53510bff25b3b8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 18 Aug 2023 14:19:27 -0700 Subject: [PATCH 0956/2290] Bluetooth: hci_core: Fix missing instances using HCI_MAX_AD_LENGTH [ Upstream commit db08722fc7d46168fe31d9b8a7b29229dd959f9f ] There a few instances still using HCI_MAX_AD_LENGTH instead of using max_adv_len which takes care of detecting what is the actual maximum length depending on if the controller supports EA or not. Fixes: 112b5090c219 ("Bluetooth: MGMT: Fix always using HCI_MAX_AD_LENGTH") Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 2ab3e8d67fc1 ("Bluetooth: Fix eir name length") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 6 +++--- net/bluetooth/eir.c | 2 +- net/bluetooth/mgmt.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 53155cb703b5d..c50a41f1782a4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -81,7 +81,7 @@ struct discovery_state { u8 last_adv_addr_type; s8 last_adv_rssi; u32 last_adv_flags; - u8 last_adv_data[HCI_MAX_AD_LENGTH]; + u8 last_adv_data[HCI_MAX_EXT_AD_LENGTH]; u8 last_adv_data_len; bool report_invalid_rssi; bool result_filtering; @@ -293,7 +293,7 @@ struct adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[HCI_MAX_AD_LENGTH]; + __u8 value[HCI_MAX_EXT_AD_LENGTH]; }; struct adv_rssi_thresholds { @@ -727,7 +727,7 @@ struct hci_conn { __u16 le_conn_interval; __u16 le_conn_latency; __u16 le_supv_timeout; - __u8 le_adv_data[HCI_MAX_AD_LENGTH]; + __u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH]; __u8 le_adv_data_len; __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH]; __u8 le_per_adv_data_len; diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 8a85f6cdfbc16..9214189279e80 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -33,7 +33,7 @@ u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) size_t complete_len; /* no space left for name (+ NULL + type + len) */ - if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) return ad_len; /* use complete name if present and fits */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1486fb9bb78f7..21c0924787e22 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5378,9 +5378,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_AD_LENGTH || - length > HCI_MAX_AD_LENGTH || - (offset + length) > HCI_MAX_AD_LENGTH) + if (offset >= HCI_MAX_EXT_AD_LENGTH || + length > HCI_MAX_EXT_AD_LENGTH || + (offset + length) > HCI_MAX_EXT_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); -- GitLab From 262a77d85e8b448ac83eb4e882e0f63056b19f21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Thu, 7 Mar 2024 17:42:05 +0100 Subject: [PATCH 0957/2290] Bluetooth: Fix eir name length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2ab3e8d67fc1d4a7638b769cf83023ec209fc0a9 ] According to Section 1.2 of Core Specification Supplement Part A the complete or short name strings are defined as utf8s, which should not include the trailing NULL for variable length array as defined in Core Specification Vol1 Part E Section 2.9.3. Removing the trailing NULL allows PTS to retrieve the random address based on device name, e.g. for SM/PER/KDU/BV-02-C, SM/PER/KDU/BV-08-C or GAP/BROB/BCST/BV-03-C. Fixes: f61851f64b17 ("Bluetooth: Fix append max 11 bytes of name to scan rsp data") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/eir.c | 29 +++++++---------------------- net/bluetooth/mgmt.c | 2 +- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 9214189279e80..1bc51e2b05a34 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -13,48 +13,33 @@ #define PNP_INFO_SVCLASS_ID 0x1200 -static u8 eir_append_name(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) -{ - u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; - - /* If data is already NULL terminated just pass it directly */ - if (data[data_len - 1] == '\0') - return eir_append_data(eir, eir_len, type, data, data_len); - - memcpy(name, data, HCI_MAX_SHORT_NAME_LENGTH); - name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; - - return eir_append_data(eir, eir_len, type, name, sizeof(name)); -} - u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { size_t short_len; size_t complete_len; - /* no space left for name (+ NULL + type + len) */ - if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + /* no space left for name (+ type + len) */ + if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 2) return ad_len; /* use complete name if present and fits */ complete_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) - return eir_append_name(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len + 1); + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len); /* use short name if present */ short_len = strnlen(hdev->short_name, sizeof(hdev->short_name)); if (short_len) - return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, hdev->short_name, - short_len == HCI_MAX_SHORT_NAME_LENGTH ? - short_len : short_len + 1); + short_len); /* use shortened full name if present, we already know that name * is longer then HCI_MAX_SHORT_NAME_LENGTH */ if (complete_len) - return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 21c0924787e22..716f6dc4934b7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8465,7 +8465,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, static u8 calculate_name_len(struct hci_dev *hdev) { - u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; + u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 2]; /* len + type + name */ return eir_append_local_name(hdev, buf, 0); } -- GitLab From 877cce250a7b55588318bedc4b0271fddaf28999 Mon Sep 17 00:00:00 2001 From: Tim Pambor Date: Tue, 5 Mar 2024 12:06:08 +0100 Subject: [PATCH 0958/2290] net: phy: dp83822: Fix RGMII TX delay configuration [ Upstream commit c8a5c731fd1223090af57da33838c671a7fc6a78 ] The logic for enabling the TX clock shift is inverse of enabling the RX clock shift. The TX clock shift is disabled when DP83822_TX_CLK_SHIFT is set. Correct the current behavior and always write the delay configuration to ensure consistent delay settings regardless of bootloader configuration. Reference: https://www.ti.com/lit/ds/symlink/dp83822i.pdf p. 69 Fixes: 8095295292b5 ("net: phy: DP83822: Add setting the fixed internal delay") Signed-off-by: Tim Pambor Link: https://lore.kernel.org/r/20240305110608.104072-1-tp@osasysteme.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/dp83822.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 267e6fd3d4448..57411ee1d8374 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -380,7 +380,7 @@ static int dp83822_config_init(struct phy_device *phydev) { struct dp83822_private *dp83822 = phydev->priv; struct device *dev = &phydev->mdio.dev; - int rgmii_delay; + int rgmii_delay = 0; s32 rx_int_delay; s32 tx_int_delay; int err = 0; @@ -390,30 +390,33 @@ static int dp83822_config_init(struct phy_device *phydev) rx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, true); - if (rx_int_delay <= 0) - rgmii_delay = 0; - else - rgmii_delay = DP83822_RX_CLK_SHIFT; + /* Set DP83822_RX_CLK_SHIFT to enable rx clk internal delay */ + if (rx_int_delay > 0) + rgmii_delay |= DP83822_RX_CLK_SHIFT; tx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, false); + + /* Set DP83822_TX_CLK_SHIFT to disable tx clk internal delay */ if (tx_int_delay <= 0) - rgmii_delay &= ~DP83822_TX_CLK_SHIFT; - else rgmii_delay |= DP83822_TX_CLK_SHIFT; - if (rgmii_delay) { - err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, rgmii_delay); - if (err) - return err; - } + err = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + DP83822_RX_CLK_SHIFT | DP83822_TX_CLK_SHIFT, rgmii_delay); + if (err) + return err; + + err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); - phy_set_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + if (err) + return err; } else { - phy_clear_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + err = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, + MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); + + if (err) + return err; } if (dp83822->fx_enabled) { -- GitLab From 20fd74fa884f30515b06f725e9207b064e1ea97b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 4 Mar 2024 16:48:28 +0530 Subject: [PATCH 0959/2290] OPP: debugfs: Fix warning around icc_get_name() [ Upstream commit 28330ceb953e39880ea77da4895bb902a1244860 ] If the kernel isn't built with interconnect support, icc_get_name() returns NULL and we get following warning: drivers/opp/debugfs.c: In function 'bw_name_read': drivers/opp/debugfs.c:43:42: error: '%.62s' directive argument is null [-Werror=format-overflow=] i = scnprintf(buf, sizeof(buf), "%.62s\n", icc_get_name(path)); Fix it. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402141313.81ltVF5g-lkp@intel.com/ Fixes: 0430b1d5704b0 ("opp: Expose bandwidth information via debugfs") Signed-off-by: Viresh Kumar Reviewed-by: Dhruva Gole Signed-off-by: Sasha Levin --- drivers/opp/debugfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 2c7fb683441ef..de81bbf4be100 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -37,10 +37,12 @@ static ssize_t bw_name_read(struct file *fp, char __user *userbuf, size_t count, loff_t *ppos) { struct icc_path *path = fp->private_data; + const char *name = icc_get_name(path); char buf[64]; - int i; + int i = 0; - i = scnprintf(buf, sizeof(buf), "%.62s\n", icc_get_name(path)); + if (name) + i = scnprintf(buf, sizeof(buf), "%.62s\n", name); return simple_read_from_buffer(userbuf, count, ppos, buf, i); } -- GitLab From dcdcf9a71730b35ba0096be166149e160fa743d0 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:49 +0000 Subject: [PATCH 0960/2290] tcp: fix incorrect parameter validation in the do_tcp_getsockopt() function [ Upstream commit 716edc9706deb3bb2ff56e2eeb83559cea8f22db ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Reviewed-by: Jason Xing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 86e7695d91adf..5a165e29f7be4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4102,11 +4102,11 @@ int do_tcp_getsockopt(struct sock *sk, int level, if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; -- GitLab From 6ccbd0227fb81b8a22b77795fd6ab2ff0d24db7a Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0961/2290] ipmr: fix incorrect parameter validation in the ip_mroute_getsockopt() function [ Upstream commit 5c3be3e0eb44b7f978bb6cbb20ad956adb93f736 ] The 'olr' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'olr' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ipmr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d5421c38c2aae..3ed9ed2bffd29 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1581,9 +1581,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, if (copy_from_sockptr(&olr, optlen, sizeof(int))) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; + + olr = min_t(unsigned int, olr, sizeof(int)); + if (copy_to_sockptr(optlen, &olr, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, olr)) -- GitLab From cfea1c9ad406f39a1c416ec981107bf5ce09707c Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0962/2290] l2tp: fix incorrect parameter validation in the pppol2tp_getsockopt() function [ Upstream commit 955e9876ba4ee26eeaab1b13517f5b2c88e73d55 ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reviewed-by: Tom Parkin Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_ppp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f011af6601c9c..6146e4e67bbb5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1356,11 +1356,11 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + err = -ENOTCONN; if (!sk->sk_user_data) goto end; -- GitLab From 6bb3c0473863f3c4f6430799fa45423a5b2ed75e Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0963/2290] udp: fix incorrect parameter validation in the udp_lib_getsockopt() function [ Upstream commit 4bb3ba7b74fceec6f558745b25a43c6521cf5506 ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Willem de Bruijn Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 87d759bab0012..7856b7a3e0ee9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2790,11 +2790,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case UDP_CORK: val = udp_test_bit(CORK, sk); -- GitLab From 440e278cb53b8dd6627c32e84950350083c39d35 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0964/2290] net: kcm: fix incorrect parameter validation in the kcm_getsockopt) function [ Upstream commit 3ed5f415133f9b7518fbe55ba9ae9a3f5e700929 ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/kcm/kcmsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 65845c59c0655..7d37bf4334d26 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1274,10 +1274,11 @@ static int kcm_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case KCM_RECV_DISABLE: val = kcm->rx_disabled; -- GitLab From b7c2b7eae2864b84644e535635fd88b0bcce0065 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: [PATCH 0965/2290] net/x25: fix incorrect parameter validation in the x25_getsockopt() function [ Upstream commit d6eb8de2015f0c24822e47356f839167ebde2945 ] The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/x25/af_x25.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5c7ad301d742e..5a8b2ea56564e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -460,12 +460,12 @@ static int x25_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) goto out; - len = min_t(unsigned int, len, sizeof(int)); - rc = -EINVAL; if (len < 0) goto out; + len = min_t(unsigned int, len, sizeof(int)); + rc = -EFAULT; if (put_user(len, optlen)) goto out; -- GitLab From c9b4e220dd18f79507803f38a55d53b483f6c9c3 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 8 Mar 2024 22:25:40 +0800 Subject: [PATCH 0966/2290] nfp: flower: handle acti_netdevs allocation failure [ Upstream commit 84e95149bd341705f0eca6a7fcb955c548805002 ] The kmalloc_array() in nfp_fl_lag_do_work() will return null, if the physical memory has run out. As a result, if we dereference the acti_netdevs, the null pointer dereference bugs will happen. This patch adds a check to judge whether allocation failure occurs. If it happens, the delayed work will be rescheduled and try again. Fixes: bb9a8d031140 ("nfp: flower: monitor and offload LAG groups") Signed-off-by: Duoming Zhou Reviewed-by: Louis Peens Link: https://lore.kernel.org/r/20240308142540.9674-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/flower/lag_conf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c index e92860e20a24a..c6a2c302a8c8b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -308,6 +308,11 @@ static void nfp_fl_lag_do_work(struct work_struct *work) acti_netdevs = kmalloc_array(entry->slave_cnt, sizeof(*acti_netdevs), GFP_KERNEL); + if (!acti_netdevs) { + schedule_delayed_work(&lag->work, + NFP_FL_LAG_DELAY); + continue; + } /* Include sanity check in the loop. It may be that a bond has * changed between processing the last notification and the -- GitLab From b605c3831fb47731a55a1e50e919c45c308f96e0 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 11 Mar 2024 12:27:22 +0000 Subject: [PATCH 0967/2290] bpf: hardcode BPF_PROG_PACK_SIZE to 2MB * num_possible_nodes() [ Upstream commit d6170e4aaf86424c24ce06e355b4573daa891b17 ] On some architectures like ARM64, PMD_SIZE can be really large in some configurations. Like with CONFIG_ARM64_64K_PAGES=y the PMD_SIZE is 512MB. Use 2MB * num_possible_nodes() as the size for allocations done through the prog pack allocator. On most architectures, PMD_SIZE will be equal to 2MB in case of 4KB pages and will be greater than 2MB for bigger page sizes. Fixes: ea2babac63d4 ("bpf: Simplify bpf_prog_pack_[size|mask]") Reported-by: "kernelci.org bot" Closes: https://lore.kernel.org/all/7e216c88-77ee-47b8-becc-a0f780868d3c@sirena.org.uk/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403092219.dhgcuz2G-lkp@intel.com/ Suggested-by: Song Liu Signed-off-by: Puranjay Mohan Message-ID: <20240311122722.86232-1-puranjay12@gmail.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 76bf1de261152..44abf88e1bb0d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -857,7 +857,12 @@ static LIST_HEAD(pack_list); * CONFIG_MMU=n. Use PAGE_SIZE in these cases. */ #ifdef PMD_SIZE -#define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes()) +/* PMD_SIZE is really big for some archs. It doesn't make sense to + * reserve too much memory in one allocation. Hardcode BPF_PROG_PACK_SIZE to + * 2MiB * num_possible_nodes(). On most architectures PMD_SIZE will be + * greater than or equal to 2MB. + */ +#define BPF_PROG_PACK_SIZE (SZ_2M * num_possible_nodes()) #else #define BPF_PROG_PACK_SIZE PAGE_SIZE #endif -- GitLab From 2b50b4f0d40dee8181148a39ca5bb5079a35777c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 11 Mar 2024 13:42:55 -0400 Subject: [PATCH 0968/2290] dm raid: fix false positive for requeue needed during reshape [ Upstream commit b25b8f4b8ecef0f48c05f0c3572daeabefe16526 ] An empty flush doesn't have a payload, so it should never be looked at when considering to possibly requeue a bio for the case when a reshape is in progress. Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target") Reported-by: Patrick Plenefisch Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-raid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 4b7528dc2fd08..7fbce214e00f5 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3325,14 +3325,14 @@ static int raid_map(struct dm_target *ti, struct bio *bio) struct mddev *mddev = &rs->md; /* - * If we're reshaping to add disk(s)), ti->len and + * If we're reshaping to add disk(s), ti->len and * mddev->array_sectors will differ during the process * (ti->len > mddev->array_sectors), so we have to requeue * bios with addresses > mddev->array_sectors here or * there will occur accesses past EOD of the component * data images thus erroring the raid set. */ - if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) + if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors)) return DM_MAPIO_REQUEUE; md_handle_request(mddev, bio); -- GitLab From ad10289f68f45649816cc68eb93f45fd5ec48a15 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 11 Mar 2024 15:06:39 +0100 Subject: [PATCH 0969/2290] dm: call the resume method on internal suspend [ Upstream commit 65e8fbde64520001abf1c8d0e573561b4746ef38 ] There is this reported crash when experimenting with the lvm2 testsuite. The list corruption is caused by the fact that the postsuspend and resume methods were not paired correctly; there were two consecutive calls to the origin_postsuspend function. The second call attempts to remove the "hash_list" entry from a list, while it was already removed by the first call. Fix __dm_internal_resume so that it calls the preresume and resume methods of the table's targets. If a preresume method of some target fails, we are in a tricky situation. We can't return an error because dm_internal_resume isn't supposed to return errors. We can't return success, because then the "resume" and "postsuspend" methods would not be paired correctly. So, we set the DMF_SUSPENDED flag and we fake normal suspend - it may confuse userspace tools, but it won't cause a kernel crash. ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:56! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 8343 Comm: dmsetup Not tainted 6.8.0-rc6 #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:__list_del_entry_valid_or_report+0x77/0xc0 RSP: 0018:ffff8881b831bcc0 EFLAGS: 00010282 RAX: 000000000000004e RBX: ffff888143b6eb80 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffff819053d0 RDI: 00000000ffffffff RBP: ffff8881b83a3400 R08: 00000000fffeffff R09: 0000000000000058 R10: 0000000000000000 R11: ffffffff81a24080 R12: 0000000000000001 R13: ffff88814538e000 R14: ffff888143bc6dc0 R15: ffffffffa02e4bb0 FS: 00000000f7c0f780(0000) GS:ffff8893f0a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000057fb5000 CR3: 0000000143474000 CR4: 00000000000006b0 Call Trace: ? die+0x2d/0x80 ? do_trap+0xeb/0xf0 ? __list_del_entry_valid_or_report+0x77/0xc0 ? do_error_trap+0x60/0x80 ? __list_del_entry_valid_or_report+0x77/0xc0 ? exc_invalid_op+0x49/0x60 ? __list_del_entry_valid_or_report+0x77/0xc0 ? asm_exc_invalid_op+0x16/0x20 ? table_deps+0x1b0/0x1b0 [dm_mod] ? __list_del_entry_valid_or_report+0x77/0xc0 origin_postsuspend+0x1a/0x50 [dm_snapshot] dm_table_postsuspend_targets+0x34/0x50 [dm_mod] dm_suspend+0xd8/0xf0 [dm_mod] dev_suspend+0x1f2/0x2f0 [dm_mod] ? table_deps+0x1b0/0x1b0 [dm_mod] ctl_ioctl+0x300/0x5f0 [dm_mod] dm_compat_ctl_ioctl+0x7/0x10 [dm_mod] __x64_compat_sys_ioctl+0x104/0x170 do_syscall_64+0x184/0x1b0 entry_SYSCALL_64_after_hwframe+0x46/0x4e RIP: 0033:0xf7e6aead ---[ end trace 0000000000000000 ]--- Fixes: ffcc39364160 ("dm: enhance internal suspend and resume interface") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0ec85d159bcde..29270f6f272f6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2897,6 +2897,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned int suspend static void __dm_internal_resume(struct mapped_device *md) { + int r; + struct dm_table *map; + BUG_ON(!md->internal_suspend_count); if (--md->internal_suspend_count) @@ -2905,12 +2908,23 @@ static void __dm_internal_resume(struct mapped_device *md) if (dm_suspended_md(md)) goto done; /* resume from nested suspend */ - /* - * NOTE: existing callers don't need to call dm_table_resume_targets - * (which may fail -- so best to avoid it for now by passing NULL map) - */ - (void) __dm_resume(md, NULL); - + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + r = __dm_resume(md, map); + if (r) { + /* + * If a preresume method of some target failed, we are in a + * tricky situation. We can't return an error to the caller. We + * can't fake success because then the "resume" and + * "postsuspend" methods would not be paired correctly, and it + * would break various targets, for example it would cause list + * corruption in the "origin" target. + * + * So, we fake normal suspend here, to make sure that the + * "resume" and "postsuspend" methods will be paired correctly. + */ + DMERR("Preresume method failed: %d", r); + set_bit(DMF_SUSPENDED, &md->flags); + } done: clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); smp_mb__after_atomic(); -- GitLab From 50c0ad785a780c72a2fdaba10b38c645ffb4eae6 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 24 Oct 2023 08:07:38 +0000 Subject: [PATCH 0970/2290] drm/tegra: dsi: Add missing check for of_find_device_by_node [ Upstream commit afe6fcb9775882230cd29b529203eabd5d2a638d ] Add check for the return value of of_find_device_by_node() and return the error if it fails in order to avoid NULL pointer dereference. Fixes: e94236cde4d5 ("drm/tegra: dsi: Add ganged mode support") Signed-off-by: Chen Ni Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20231024080738.825553-1-nichen@iscas.ac.cn Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index de1333dc0d867..0adce882f157b 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1534,9 +1534,11 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi) np = of_parse_phandle(dsi->dev->of_node, "nvidia,ganged-mode", 0); if (np) { struct platform_device *gangster = of_find_device_by_node(np); + of_node_put(np); + if (!gangster) + return -EPROBE_DEFER; dsi->slave = platform_get_drvdata(gangster); - of_node_put(np); if (!dsi->slave) { put_device(&gangster->dev); -- GitLab From f4a1a30f18373b05ec7fc5bd340a884b833f8d33 Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Wed, 4 Oct 2023 22:10:55 +0800 Subject: [PATCH 0971/2290] drm/tegra: dpaux: Fix PM disable depth imbalance in tegra_dpaux_probe [ Upstream commit 0800880f4eb789b7d299db40f2e86e056bd33a4e ] The pm_runtime_enable function increases the power disable depth, which means that we must perform a matching decrement on the error handling path to maintain balance within the given context. Additionally, we need to address the same issue for pm_runtime_get_sync. We fix this by invoking pm_runtime_disable and pm_runtime_put_sync when error returns. Fixes: 82b81b3ec1a7 ("drm/tegra: dpaux: Implement runtime PM") Signed-off-by: Zhang Shurong Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/tencent_B13DB7F6C0023C46157250A524966F326A09@qq.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dpaux.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index d773ef4854188..b563988fb6848 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -524,7 +524,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev) if (err < 0) { dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n", dpaux->irq, err); - return err; + goto err_pm_disable; } disable_irq(dpaux->irq); @@ -544,7 +544,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev) */ err = tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_I2C); if (err < 0) - return err; + goto err_pm_disable; #ifdef CONFIG_GENERIC_PINCONF dpaux->desc.name = dev_name(&pdev->dev); @@ -557,7 +557,8 @@ static int tegra_dpaux_probe(struct platform_device *pdev) dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux); if (IS_ERR(dpaux->pinctrl)) { dev_err(&pdev->dev, "failed to register pincontrol\n"); - return PTR_ERR(dpaux->pinctrl); + err = PTR_ERR(dpaux->pinctrl); + goto err_pm_disable; } #endif /* enable and clear all interrupts */ @@ -573,10 +574,15 @@ static int tegra_dpaux_probe(struct platform_device *pdev) err = devm_of_dp_aux_populate_ep_devices(&dpaux->aux); if (err < 0) { dev_err(dpaux->dev, "failed to populate AUX bus: %d\n", err); - return err; + goto err_pm_disable; } return 0; + +err_pm_disable: + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return err; } static int tegra_dpaux_remove(struct platform_device *pdev) -- GitLab From d3e8c2409a35b26bb4756819ace721ff7c8fd4a6 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Thu, 16 Sep 2021 18:56:40 +0800 Subject: [PATCH 0972/2290] drm/tegra: dsi: Make use of the helper function dev_err_probe() [ Upstream commit fc75e4fcbd1e4252a0481ebb23cd4516c127a8e2 ] When possible use dev_err_probe help to properly deal with the PROBE_DEFER error, the benefit is that DEFER issue will be logged in the devices_deferred debugfs file. And using dev_err_probe() can reduce code size, the error value gets printed. Signed-off-by: Cai Huoqing Signed-off-by: Thierry Reding Stable-dep-of: 830c1ded3563 ("drm/tegra: dsi: Fix some error handling paths in tegra_dsi_probe()") Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dsi.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 0adce882f157b..6cbba2adb6e5a 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1591,28 +1591,24 @@ static int tegra_dsi_probe(struct platform_device *pdev) } dsi->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(dsi->clk)) { - dev_err(&pdev->dev, "cannot get DSI clock\n"); - return PTR_ERR(dsi->clk); - } + if (IS_ERR(dsi->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk), + "cannot get DSI clock\n"); dsi->clk_lp = devm_clk_get(&pdev->dev, "lp"); - if (IS_ERR(dsi->clk_lp)) { - dev_err(&pdev->dev, "cannot get low-power clock\n"); - return PTR_ERR(dsi->clk_lp); - } + if (IS_ERR(dsi->clk_lp)) + return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_lp), + "cannot get low-power clock\n"); dsi->clk_parent = devm_clk_get(&pdev->dev, "parent"); - if (IS_ERR(dsi->clk_parent)) { - dev_err(&pdev->dev, "cannot get parent clock\n"); - return PTR_ERR(dsi->clk_parent); - } + if (IS_ERR(dsi->clk_parent)) + return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_parent), + "cannot get parent clock\n"); dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi"); - if (IS_ERR(dsi->vdd)) { - dev_err(&pdev->dev, "cannot get VDD supply\n"); - return PTR_ERR(dsi->vdd); - } + if (IS_ERR(dsi->vdd)) + return dev_err_probe(&pdev->dev, PTR_ERR(dsi->vdd), + "cannot get VDD supply\n"); err = tegra_dsi_setup_clocks(dsi); if (err < 0) { -- GitLab From a57bbd606a8a0b67cb4e7a95becba911393e01fb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:08 +0200 Subject: [PATCH 0973/2290] drm/tegra: dsi: Fix some error handling paths in tegra_dsi_probe() [ Upstream commit 830c1ded356369cd1303e8bb87ce3fea6e744de8 ] If an error occurs after calling tegra_output_probe(), tegra_output_remove() should be called as already done in the remove function. Fixes: dec727399a4b ("drm/tegra: Add DSI support") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/16820073278d031f6c474a08d5f22a255158585e.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dsi.c | 54 ++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 6cbba2adb6e5a..815e32e05f600 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1586,44 +1586,58 @@ static int tegra_dsi_probe(struct platform_device *pdev) if (!pdev->dev.pm_domain) { dsi->rst = devm_reset_control_get(&pdev->dev, "dsi"); - if (IS_ERR(dsi->rst)) - return PTR_ERR(dsi->rst); + if (IS_ERR(dsi->rst)) { + err = PTR_ERR(dsi->rst); + goto remove; + } } dsi->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(dsi->clk)) - return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk), - "cannot get DSI clock\n"); + if (IS_ERR(dsi->clk)) { + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk), + "cannot get DSI clock\n"); + goto remove; + } dsi->clk_lp = devm_clk_get(&pdev->dev, "lp"); - if (IS_ERR(dsi->clk_lp)) - return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_lp), - "cannot get low-power clock\n"); + if (IS_ERR(dsi->clk_lp)) { + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_lp), + "cannot get low-power clock\n"); + goto remove; + } dsi->clk_parent = devm_clk_get(&pdev->dev, "parent"); - if (IS_ERR(dsi->clk_parent)) - return dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_parent), - "cannot get parent clock\n"); + if (IS_ERR(dsi->clk_parent)) { + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_parent), + "cannot get parent clock\n"); + goto remove; + } dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi"); - if (IS_ERR(dsi->vdd)) - return dev_err_probe(&pdev->dev, PTR_ERR(dsi->vdd), - "cannot get VDD supply\n"); + if (IS_ERR(dsi->vdd)) { + err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->vdd), + "cannot get VDD supply\n"); + goto remove; + } err = tegra_dsi_setup_clocks(dsi); if (err < 0) { dev_err(&pdev->dev, "cannot setup clocks\n"); - return err; + goto remove; } regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); dsi->regs = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(dsi->regs)) - return PTR_ERR(dsi->regs); + if (IS_ERR(dsi->regs)) { + err = PTR_ERR(dsi->regs); + goto remove; + } dsi->mipi = tegra_mipi_request(&pdev->dev, pdev->dev.of_node); - if (IS_ERR(dsi->mipi)) - return PTR_ERR(dsi->mipi); + if (IS_ERR(dsi->mipi)) { + err = PTR_ERR(dsi->mipi); + goto remove; + } dsi->host.ops = &tegra_dsi_host_ops; dsi->host.dev = &pdev->dev; @@ -1654,6 +1668,8 @@ unregister: mipi_dsi_host_unregister(&dsi->host); mipi_free: tegra_mipi_free(dsi->mipi); +remove: + tegra_output_remove(&dsi->output); return err; } -- GitLab From 806dc32deac2810cca6785d8389f0e2dfdc19b55 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:09 +0200 Subject: [PATCH 0974/2290] drm/tegra: dsi: Fix missing pm_runtime_disable() in the error handling path of tegra_dsi_probe() [ Upstream commit 5286a9fc280c45b6b307ee1b07f7a997e042252c ] If an error occurs after calling pm_runtime_enable(), pm_runtime_disable() should be called as already done in the remove function. Fixes: ef8187d75265 ("drm/tegra: dsi: Implement runtime PM") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/ee4a15c9cd4b574a55cd67c30d2411239ba2cee9.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 815e32e05f600..7bb26655cb3cc 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1665,6 +1665,7 @@ static int tegra_dsi_probe(struct platform_device *pdev) return 0; unregister: + pm_runtime_disable(&pdev->dev); mipi_dsi_host_unregister(&dsi->host); mipi_free: tegra_mipi_free(dsi->mipi); -- GitLab From 8bc95d34408bf87b01d94edcc57a2ea5099f177d Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 10 Jul 2023 11:23:41 +0800 Subject: [PATCH 0975/2290] drm/tegra: hdmi: Convert to devm_platform_ioremap_resource() [ Upstream commit faae5646c13f4697fd2ba29b10e38f9be5aa890a ] Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230710032355.72914-5-frank.li@vivo.com Stable-dep-of: 643ae131b859 ("drm/tegra: hdmi: Fix some error handling paths in tegra_hdmi_probe()") Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/hdmi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index bf240767dad9f..f83d9041327f0 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -1776,7 +1776,6 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data) static int tegra_hdmi_probe(struct platform_device *pdev) { struct tegra_hdmi *hdmi; - struct resource *regs; int err; hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); @@ -1838,8 +1837,7 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) return err; - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - hdmi->regs = devm_ioremap_resource(&pdev->dev, regs); + hdmi->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hdmi->regs)) return PTR_ERR(hdmi->regs); -- GitLab From bb459d437d179fc8cb1f3ab8745b6f5cbde6fc08 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:10 +0200 Subject: [PATCH 0976/2290] drm/tegra: hdmi: Fix some error handling paths in tegra_hdmi_probe() [ Upstream commit 643ae131b8598fb2940c92c7d23fe62823a119c8 ] If an error occurs after calling tegra_output_probe(), tegra_output_remove() should be called as already done in the remove function. Fixes: 59d29c0ec93f ("drm/tegra: Allocate resources at probe time") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/9b7c564eb71977678b20abd73ee52001a51cf327.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/hdmi.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index f83d9041327f0..c66764c0bd250 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -1838,12 +1838,14 @@ static int tegra_hdmi_probe(struct platform_device *pdev) return err; hdmi->regs = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(hdmi->regs)) - return PTR_ERR(hdmi->regs); + if (IS_ERR(hdmi->regs)) { + err = PTR_ERR(hdmi->regs); + goto remove; + } err = platform_get_irq(pdev, 0); if (err < 0) - return err; + goto remove; hdmi->irq = err; @@ -1852,18 +1854,18 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", hdmi->irq, err); - return err; + goto remove; } platform_set_drvdata(pdev, hdmi); err = devm_pm_runtime_enable(&pdev->dev); if (err) - return err; + goto remove; err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); if (err) - return err; + goto remove; INIT_LIST_HEAD(&hdmi->client.list); hdmi->client.ops = &hdmi_client_ops; @@ -1873,10 +1875,14 @@ static int tegra_hdmi_probe(struct platform_device *pdev) if (err < 0) { dev_err(&pdev->dev, "failed to register host1x client: %d\n", err); - return err; + goto remove; } return 0; + +remove: + tegra_output_remove(&hdmi->output); + return err; } static int tegra_hdmi_remove(struct platform_device *pdev) -- GitLab From 29f922ae4a0ac7b2cd5de58c48e5fc9a2d48cdfd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:11 +0200 Subject: [PATCH 0977/2290] drm/tegra: rgb: Fix some error handling paths in tegra_dc_rgb_probe() [ Upstream commit bc456b5d93dbfdbd89f2a036f4f3d8026595f9e4 ] If an error occurs after calling tegra_output_probe(), tegra_output_remove() should be called as already done in the remove function. Fixes: 59d29c0ec93f ("drm/tegra: Allocate resources at probe time") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/0001f61eb89048bc36241629b564195689cf54b6.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/rgb.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index ff8fce36d2aa1..5e95943021887 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -214,26 +214,28 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) rgb->clk = devm_clk_get(dc->dev, NULL); if (IS_ERR(rgb->clk)) { dev_err(dc->dev, "failed to get clock\n"); - return PTR_ERR(rgb->clk); + err = PTR_ERR(rgb->clk); + goto remove; } rgb->clk_parent = devm_clk_get(dc->dev, "parent"); if (IS_ERR(rgb->clk_parent)) { dev_err(dc->dev, "failed to get parent clock\n"); - return PTR_ERR(rgb->clk_parent); + err = PTR_ERR(rgb->clk_parent); + goto remove; } err = clk_set_parent(rgb->clk, rgb->clk_parent); if (err < 0) { dev_err(dc->dev, "failed to set parent clock: %d\n", err); - return err; + goto remove; } rgb->pll_d_out0 = clk_get_sys(NULL, "pll_d_out0"); if (IS_ERR(rgb->pll_d_out0)) { err = PTR_ERR(rgb->pll_d_out0); dev_err(dc->dev, "failed to get pll_d_out0: %d\n", err); - return err; + goto remove; } if (dc->soc->has_pll_d2_out0) { @@ -241,13 +243,17 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) if (IS_ERR(rgb->pll_d2_out0)) { err = PTR_ERR(rgb->pll_d2_out0); dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err); - return err; + goto remove; } } dc->rgb = &rgb->output; return 0; + +remove: + tegra_output_remove(&rgb->output); + return err; } int tegra_dc_rgb_remove(struct tegra_dc *dc) -- GitLab From f3f407ccbe84a34de9be3195d22cdd5969f3fd9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:12 +0200 Subject: [PATCH 0978/2290] drm/tegra: rgb: Fix missing clk_put() in the error handling paths of tegra_dc_rgb_probe() [ Upstream commit 45c8034db47842b25a3ab6139d71e13b4e67b9b3 ] If clk_get_sys(..., "pll_d2_out0") fails, the clk_get_sys() call must be undone. Add the missing clk_put and a new 'put_pll_d_out0' label in the error handling path, and use it. Fixes: 0c921b6d4ba0 ("drm/tegra: dc: rgb: Allow changing PLLD rate on Tegra30+") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/0182895ead4e4730426616b0d9995954c960b634.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/rgb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 5e95943021887..86e55e5d12b39 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -243,7 +243,7 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) if (IS_ERR(rgb->pll_d2_out0)) { err = PTR_ERR(rgb->pll_d2_out0); dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err); - goto remove; + goto put_pll; } } @@ -251,6 +251,8 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) return 0; +put_pll: + clk_put(rgb->pll_d_out0); remove: tegra_output_remove(&rgb->output); return err; -- GitLab From c994fed05c2715e5bd6821a0a6c472aaeccf1f9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Sep 2023 17:22:13 +0200 Subject: [PATCH 0979/2290] drm/tegra: output: Fix missing i2c_put_adapter() in the error handling paths of tegra_output_probe() [ Upstream commit 2db4578ef6ffb2b52115ca0ebf897b60ec559556 ] If an error occurs after a successful of_get_i2c_adapter_by_node() call, it should be undone by a corresponding i2c_put_adapter(). Add the missing i2c_put_adapter() call. Fixes: 9be7d864cf07 ("drm/tegra: Implement panel support") Signed-off-by: Christophe JAILLET Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/b38604178991e1f08b2cda219103be266be2d680.1693667005.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/output.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 47d26b5d99456..7ccd010a821b7 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -139,8 +139,10 @@ int tegra_output_probe(struct tegra_output *output) GPIOD_IN, "HDMI hotplug detect"); if (IS_ERR(output->hpd_gpio)) { - if (PTR_ERR(output->hpd_gpio) != -ENOENT) - return PTR_ERR(output->hpd_gpio); + if (PTR_ERR(output->hpd_gpio) != -ENOENT) { + err = PTR_ERR(output->hpd_gpio); + goto put_i2c; + } output->hpd_gpio = NULL; } @@ -149,7 +151,7 @@ int tegra_output_probe(struct tegra_output *output) err = gpiod_to_irq(output->hpd_gpio); if (err < 0) { dev_err(output->dev, "gpiod_to_irq(): %d\n", err); - return err; + goto put_i2c; } output->hpd_irq = err; @@ -162,7 +164,7 @@ int tegra_output_probe(struct tegra_output *output) if (err < 0) { dev_err(output->dev, "failed to request IRQ#%u: %d\n", output->hpd_irq, err); - return err; + goto put_i2c; } output->connector.polled = DRM_CONNECTOR_POLL_HPD; @@ -176,6 +178,12 @@ int tegra_output_probe(struct tegra_output *output) } return 0; + +put_i2c: + if (output->ddc) + i2c_put_adapter(output->ddc); + + return err; } void tegra_output_remove(struct tegra_output *output) -- GitLab From e6ed73bb8e4bbae4ad85b710bc99cb1aa9d65e87 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Fri, 22 Dec 2023 18:41:54 +0100 Subject: [PATCH 0980/2290] drm/rockchip: inno_hdmi: Fix video timing [ Upstream commit 47a145c03484d33e65d773169d5ca1b9fe2a492e ] The controller wants the difference between *total and *sync_start in the HDMI_VIDEO_EXT_*DELAY registers. Otherwise the signal is very unstable for certain non-VIC modes. See downstream commit [0]. [0] https://github.com/rockchip-linux/kernel/commit/8eb559f2502c Fixes: 412d4ae6b7a5 ("drm/rockchip: hdmi: add Innosilicon HDMI support") Co-developed-by: Zheng Yang Signed-off-by: Zheng Yang Signed-off-by: Alex Bee Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231222174220.55249-4-knaerzche@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/inno_hdmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index f51774866f412..8f230f4c01bc3 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -411,7 +411,7 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi, hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HBLANK_L, value & 0xFF); hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HBLANK_H, (value >> 8) & 0xFF); - value = mode->hsync_start - mode->hdisplay; + value = mode->htotal - mode->hsync_start; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDELAY_L, value & 0xFF); hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDELAY_H, (value >> 8) & 0xFF); @@ -426,7 +426,7 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi, value = mode->vtotal - mode->vdisplay; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VBLANK, value & 0xFF); - value = mode->vsync_start - mode->vdisplay; + value = mode->vtotal - mode->vsync_start; hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VDELAY, value & 0xFF); value = mode->vsync_end - mode->vsync_start; -- GitLab From 758629d348b981f77385abd1bca6a026fb972458 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Wed, 8 Nov 2023 11:36:20 -0500 Subject: [PATCH 0981/2290] drm: Don't treat 0 as -1 in drm_fixp2int_ceil [ Upstream commit cf8837d7204481026335461629b84ac7f4538fa5 ] Unit testing this in VKMS shows that passing 0 into this function returns -1, which is highly counter- intuitive. Fix it by checking whether the input is >= 0 instead of > 0. Fixes: 64566b5e767f ("drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil") Signed-off-by: Harry Wentland Reviewed-by: Simon Ser Reviewed-by: Melissa Wen Signed-off-by: Melissa Wen Link: https://patchwork.freedesktop.org/patch/msgid/20231108163647.106853-2-harry.wentland@amd.com Signed-off-by: Sasha Levin --- include/drm/drm_fixed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index 03cb890690e83..6230088428cdb 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -94,7 +94,7 @@ static inline int drm_fixp2int_round(s64 a) static inline int drm_fixp2int_ceil(s64 a) { - if (a > 0) + if (a >= 0) return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE); else return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); -- GitLab From 40624af6674745e174c754a20d7c53c250e65e7a Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Mon, 4 Dec 2023 17:14:16 +0800 Subject: [PATCH 0982/2290] drm/vmwgfx: fix a memleak in vmw_gmrid_man_get_node [ Upstream commit 89709105a6091948ffb6ec2427954cbfe45358ce ] When ida_alloc_max fails, resources allocated before should be freed, including *res allocated by kmalloc and ttm_resource_init. Fixes: d3bcb4b02fe9 ("drm/vmwgfx: switch the TTM backends to self alloc") Signed-off-by: Zhipeng Lu Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20231204091416.3308430-1-alexious@zju.edu.cn Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 60e3cc537f365..b9e5c8cd31001 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -65,8 +65,11 @@ static int vmw_gmrid_man_get_node(struct ttm_resource_manager *man, ttm_resource_init(bo, place, *res); id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL); - if (id < 0) + if (id < 0) { + ttm_resource_fini(man, *res); + kfree(*res); return id; + } spin_lock(&gman->lock); -- GitLab From fe790f340eb746202c6a6b274e39989b97d23d9e Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 20 Nov 2023 13:29:48 +0100 Subject: [PATCH 0983/2290] drm/rockchip: lvds: do not overwrite error code [ Upstream commit 79b09453c4e369ca81cfb670d0136d089e3b92f0 ] ret variable stores the return value of drm_of_find_panel_or_bridge which can return error codes different from EPROBE_DEFER. Therefore, let's just return that error code instead of forcing it to EPROBE_DEFER. Fixes: 34cc0aa25456 ("drm/rockchip: Add support for Rockchip Soc LVDS") Cc: Quentin Schulz Signed-off-by: Quentin Schulz Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231120-rk-lvds-defer-msg-v2-1-9c59a5779cf9@theobroma-systems.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/rockchip_lvds.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index 68f6ebb33460b..1fde888cdd827 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -578,7 +578,6 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, goto err_put_port; } else if (ret) { DRM_DEV_ERROR(dev, "failed to find panel and bridge node\n"); - ret = -EPROBE_DEFER; goto err_put_port; } if (lvds->panel) -- GitLab From 06e38277151e5301b671614d273b8dc2d0486e5f Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 20 Nov 2023 13:29:49 +0100 Subject: [PATCH 0984/2290] drm/rockchip: lvds: do not print scary message when probing defer [ Upstream commit 52d11c863ac92e36a0365249f7f6d27ac48c78bc ] This scary message can misled the user into thinking something bad has happened and needs to be fixed, however it could simply be part of a normal boot process where EPROBE_DEFER is taken into account. Therefore, let's use dev_err_probe so that this message doesn't get shown (by default) when the return code is EPROBE_DEFER. Fixes: 34cc0aa25456 ("drm/rockchip: Add support for Rockchip Soc LVDS") Cc: Quentin Schulz Signed-off-by: Quentin Schulz Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231120-rk-lvds-defer-msg-v2-2-9c59a5779cf9@theobroma-systems.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/rockchip_lvds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index 1fde888cdd827..eb4a108c5bd2a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -577,7 +577,7 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, ret = -EINVAL; goto err_put_port; } else if (ret) { - DRM_DEV_ERROR(dev, "failed to find panel and bridge node\n"); + dev_err_probe(dev, ret, "failed to find panel and bridge node\n"); goto err_put_port; } if (lvds->panel) -- GitLab From 680c94312e8665da6517e4f4b8e4cf8e0ded64c7 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Wed, 20 Dec 2023 14:13:11 -0800 Subject: [PATCH 0985/2290] drm/panel-edp: use put_sync in unprepare [ Upstream commit 49ddab089611ae5ddd0201ddbbf633da75bfcc25 ] Some edp panel requires T10 (Delay from end of valid video data transmitted by the Source device to power-off) less than 500ms. Using autosuspend with delay set as 1000 violates this requirement. Use put_sync_suspend in unprepare to meet the spec. For other cases (such as getting EDID), it still uses autosuspend. Suggested-by: Douglas Anderson Fixes: 3235b0f20a0a ("drm/panel: panel-simple: Use runtime pm to avoid excessive unprepare / prepare") Signed-off-by: Hsin-Yi Wang Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231220221418.2610185-1-hsinyi@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-edp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 42584d8a9aeb6..bfcddd4aa9322 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -413,8 +413,7 @@ static int panel_edp_unprepare(struct drm_panel *panel) if (!p->prepared) return 0; - pm_runtime_mark_last_busy(panel->dev); - ret = pm_runtime_put_autosuspend(panel->dev); + ret = pm_runtime_put_sync_suspend(panel->dev); if (ret < 0) return ret; p->prepared = false; -- GitLab From f6d51a91b41704704e395de6839c667b0f810bbf Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 17 Jan 2024 15:13:28 +0800 Subject: [PATCH 0986/2290] drm/lima: fix a memleak in lima_heap_alloc [ Upstream commit 04ae3eb470e52a3c41babe85ff8cee195e4dcbea ] When lima_vm_map_bo fails, the resources need to be deallocated, or there will be memleaks. Fixes: 6aebc51d7aef ("drm/lima: support heap buffer creation") Signed-off-by: Zhipeng Lu Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20240117071328.3811480-1-alexious@zju.edu.cn Signed-off-by: Sasha Levin --- drivers/gpu/drm/lima/lima_gem.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index 0f1ca0b0db495..d72c5bf4e5ac1 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -75,29 +75,34 @@ int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm) } else { bo->base.sgt = kmalloc(sizeof(*bo->base.sgt), GFP_KERNEL); if (!bo->base.sgt) { - sg_free_table(&sgt); - return -ENOMEM; + ret = -ENOMEM; + goto err_out0; } } ret = dma_map_sgtable(dev, &sgt, DMA_BIDIRECTIONAL, 0); - if (ret) { - sg_free_table(&sgt); - kfree(bo->base.sgt); - bo->base.sgt = NULL; - return ret; - } + if (ret) + goto err_out1; *bo->base.sgt = sgt; if (vm) { ret = lima_vm_map_bo(vm, bo, old_size >> PAGE_SHIFT); if (ret) - return ret; + goto err_out2; } bo->heap_size = new_size; return 0; + +err_out2: + dma_unmap_sgtable(dev, &sgt, DMA_BIDIRECTIONAL, 0); +err_out1: + kfree(bo->base.sgt); + bo->base.sgt = NULL; +err_out0: + sg_free_table(&sgt); + return ret; } int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file, -- GitLab From 1eb749a9c30585a5758e7a10510ad3b5ca29d6ed Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 19 Dec 2023 05:07:21 +0200 Subject: [PATCH 0987/2290] ASoC: amd: acp: Add missing error handling in sof-mach [ Upstream commit d0ada20279db2649a7549a2b8a4a3379c59f238d ] Handle potential acp_sofdsp_dai_links_create() errors in ACP SOF machine driver's probe function. Note there is no need for an undo. While at it, switch to dev_err_probe(). Fixes: 9f84940f5004 ("ASoC: amd: acp: Add SOF audio support on Chrome board") Signed-off-by: Cristian Ciocaltea Reviewed-by: Emil Velikov Link: https://msgid.link/r/20231219030728.2431640-4-cristian.ciocaltea@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/acp/acp-sof-mach.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index f19f064a75272..972600d271586 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -114,16 +114,14 @@ static int acp_sof_probe(struct platform_device *pdev) card->num_controls = ARRAY_SIZE(acp_controls); card->drvdata = (struct acp_card_drvdata *)pdev->id_entry->driver_data; - acp_sofdsp_dai_links_create(card); + ret = acp_sofdsp_dai_links_create(card); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to create DAI links\n"); ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret) { - dev_err(&pdev->dev, - "devm_snd_soc_register_card(%s) failed: %d\n", - card->name, ret); - return ret; - } - + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to register card(%s)\n", card->name); return 0; } -- GitLab From b17195f1e01598045756038c7f4a0683a7084030 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 12 Jan 2024 09:32:56 +0000 Subject: [PATCH 0988/2290] dmaengine: tegra210-adma: Update dependency to ARCH_TEGRA [ Upstream commit 33b7db45533af240fe44e809f9dc4d604cf82d07 ] Update the architecture dependency to be the generic Tegra because the driver works on the four latest Tegra generations not just T210, if you build a kernel with a specific ARCH_TEGRA_xxx_SOC option that excludes 210 you don't get this driver. Fixes: 433de642a76c9 ("dmaengine: tegra210-adma: add support for Tegra186/Tegra194") Signed-off-by: Peter Robinson Cc: Jon Hunter Cc: Thierry Reding Cc: Sameer Pujar Cc: Laxman Dewangan Reviewed-by: Jon Hunter Link: https://lore.kernel.org/r/20240112093310.329642-2-pbrobinson@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/Kconfig | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 81de833ccd041..66ef0a1114845 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -665,16 +665,16 @@ config TEGRA20_APB_DMA config TEGRA210_ADMA tristate "NVIDIA Tegra210 ADMA support" - depends on (ARCH_TEGRA_210_SOC || COMPILE_TEST) + depends on (ARCH_TEGRA || COMPILE_TEST) select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Support for the NVIDIA Tegra210 ADMA controller driver. The - DMA controller has multiple DMA channels and is used to service - various audio clients in the Tegra210 audio processing engine - (APE). This DMA controller transfers data from memory to - peripheral and vice versa. It does not support memory to - memory data transfer. + Support for the NVIDIA Tegra210/Tegra186/Tegra194/Tegra234 ADMA + controller driver. The DMA controller has multiple DMA channels + and is used to service various audio clients in the Tegra210 + audio processing engine (APE). This DMA controller transfers + data from memory to peripheral and vice versa. It does not + support memory to memory data transfer. config TIMB_DMA tristate "Timberdale FPGA DMA support" -- GitLab From 8ba8db9786b55047df5ad3db3e01dd886687a77d Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 10:01:11 +0100 Subject: [PATCH 0989/2290] media: tc358743: register v4l2 async device only after successful setup [ Upstream commit 87399f1ff92203d65f1febf5919429f4bb613a02 ] Ensure the device has been setup correctly before registering the v4l2 async device, thus allowing userspace to access. Signed-off-by: Alexander Stein Reviewed-by: Robert Foss Fixes: 4c5211a10039 ("[media] tc358743: register v4l2 asynchronous subdevice") Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240110090111.458115-1-alexander.stein@ew.tq-group.com Signed-off-by: Sasha Levin --- drivers/media/i2c/tc358743.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c index 200841c1f5cf0..68628ccecd161 100644 --- a/drivers/media/i2c/tc358743.c +++ b/drivers/media/i2c/tc358743.c @@ -2094,9 +2094,6 @@ static int tc358743_probe(struct i2c_client *client) state->mbus_fmt_code = MEDIA_BUS_FMT_RGB888_1X24; sd->dev = &client->dev; - err = v4l2_async_register_subdev(sd); - if (err < 0) - goto err_hdl; mutex_init(&state->confctl_mutex); @@ -2154,6 +2151,10 @@ static int tc358743_probe(struct i2c_client *client) if (err) goto err_work_queues; + err = v4l2_async_register_subdev(sd); + if (err < 0) + goto err_work_queues; + v4l2_info(sd, "%s found @ 0x%x (%s)\n", client->name, client->addr << 1, client->adapter->name); -- GitLab From 43f4364c8f4ebf98da66acf11b65c44289d02deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 18 Jan 2024 13:08:15 +0200 Subject: [PATCH 0990/2290] PCI/DPC: Print all TLP Prefixes, not just the first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6568d82512b0a64809acff3d7a747362fa4288c8 ] The TLP Prefix Log Register consists of multiple DWORDs (PCIe r6.1 sec 7.9.14.13) but the loop in dpc_process_rp_pio_error() keeps reading from the first DWORD, so we print only the first PIO TLP Prefix (duplicated several times), and we never print the second, third, etc., Prefixes. Add the iteration count based offset calculation into the config read. Fixes: f20c4ea49ec4 ("PCI/DPC: Add eDPC support") Link: https://lore.kernel.org/r/20240118110815.3867-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: add user-visible details to commit log] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/dpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index a5d7c69b764e0..08800282825e1 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -231,7 +231,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { pci_read_config_dword(pdev, - cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, &prefix); pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); } clear_status: -- GitLab From 845a478304f0faadecae77c821a1574efdfe92dd Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Fri, 19 Jan 2024 04:03:02 +0000 Subject: [PATCH 0991/2290] perf record: Fix possible incorrect free in record__switch_output() [ Upstream commit aff10a165201f6f60cff225083ce301ad3f5d8f1 ] perf_data__switch() may not assign a legal value to 'new_filename'. In this case, 'new_filename' uses the on-stack value, which may cause a incorrect free and unexpected result. Fixes: 03724b2e9c45 ("perf record: Allow to limit number of reported perf.data files") Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20240119040304.3708522-2-yangjihong1@huawei.com Signed-off-by: Namhyung Kim Signed-off-by: Sasha Levin --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7314183cdcb6c..b9b0fda8374e2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1785,8 +1785,8 @@ static int record__switch_output(struct record *rec, bool at_exit) { struct perf_data *data = &rec->data; + char *new_filename = NULL; int fd, err; - char *new_filename; /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; -- GitLab From fa8810aa5ac3e21a45fd2b28d47e49683f29635f Mon Sep 17 00:00:00 2001 From: Mikhail Khvainitski Date: Sat, 23 Dec 2023 21:12:13 +0200 Subject: [PATCH 0992/2290] HID: lenovo: Add middleclick_workaround sysfs knob for cptkbd [ Upstream commit 2814646f76f8518326964f12ff20aaee70ba154d ] Previous attempt to autodetect well-behaving patched firmware introduced in commit 46a0a2c96f0f ("HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround") has shown that there are false-positives on original firmware (on both 1st gen and 2nd gen keyboards) which causes the middle button click workaround to be mistakenly disabled. This commit adds explicit parameter to sysfs to control this workaround. Fixes: 46a0a2c96f0f ("HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround") Fixes: 43527a0094c1 ("HID: lenovo: Restrict detection of patched firmware only to USB cptkbd") Signed-off-by: Mikhail Khvainitski Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-lenovo.c | 57 +++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 149a3c74346b4..f86c1ea83a037 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -54,10 +54,10 @@ struct lenovo_drvdata { /* 0: Up * 1: Down (undecided) * 2: Scrolling - * 3: Patched firmware, disable workaround */ u8 middlebutton_state; bool fn_lock; + bool middleclick_workaround_cptkbd; }; #define map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c)) @@ -621,6 +621,36 @@ static ssize_t attr_sensitivity_store_cptkbd(struct device *dev, return count; } +static ssize_t attr_middleclick_workaround_show_cptkbd(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hid_device *hdev = to_hid_device(dev); + struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); + + return snprintf(buf, PAGE_SIZE, "%u\n", + cptkbd_data->middleclick_workaround_cptkbd); +} + +static ssize_t attr_middleclick_workaround_store_cptkbd(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct hid_device *hdev = to_hid_device(dev); + struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); + int value; + + if (kstrtoint(buf, 10, &value)) + return -EINVAL; + if (value < 0 || value > 1) + return -EINVAL; + + cptkbd_data->middleclick_workaround_cptkbd = !!value; + + return count; +} + static struct device_attribute dev_attr_fn_lock = __ATTR(fn_lock, S_IWUSR | S_IRUGO, @@ -632,10 +662,16 @@ static struct device_attribute dev_attr_sensitivity_cptkbd = attr_sensitivity_show_cptkbd, attr_sensitivity_store_cptkbd); +static struct device_attribute dev_attr_middleclick_workaround_cptkbd = + __ATTR(middleclick_workaround, S_IWUSR | S_IRUGO, + attr_middleclick_workaround_show_cptkbd, + attr_middleclick_workaround_store_cptkbd); + static struct attribute *lenovo_attributes_cptkbd[] = { &dev_attr_fn_lock.attr, &dev_attr_sensitivity_cptkbd.attr, + &dev_attr_middleclick_workaround_cptkbd.attr, NULL }; @@ -686,23 +722,7 @@ static int lenovo_event_cptkbd(struct hid_device *hdev, { struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); - if (cptkbd_data->middlebutton_state != 3) { - /* REL_X and REL_Y events during middle button pressed - * are only possible on patched, bug-free firmware - * so set middlebutton_state to 3 - * to never apply workaround anymore - */ - if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD && - cptkbd_data->middlebutton_state == 1 && - usage->type == EV_REL && - (usage->code == REL_X || usage->code == REL_Y)) { - cptkbd_data->middlebutton_state = 3; - /* send middle button press which was hold before */ - input_event(field->hidinput->input, - EV_KEY, BTN_MIDDLE, 1); - input_sync(field->hidinput->input); - } - + if (cptkbd_data->middleclick_workaround_cptkbd) { /* "wheel" scroll events */ if (usage->type == EV_REL && (usage->code == REL_WHEEL || usage->code == REL_HWHEEL)) { @@ -1166,6 +1186,7 @@ static int lenovo_probe_cptkbd(struct hid_device *hdev) cptkbd_data->middlebutton_state = 0; cptkbd_data->fn_lock = true; cptkbd_data->sensitivity = 0x05; + cptkbd_data->middleclick_workaround_cptkbd = true; lenovo_features_set_cptkbd(hdev); ret = sysfs_create_group(&hdev->dev.kobj, &lenovo_attr_group_cptkbd); -- GitLab From d346b3e5b25c95d504478507eb867cd3818775ab Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 23 Jan 2024 20:18:07 +0530 Subject: [PATCH 0993/2290] drm/amd/display: Fix a potential buffer overflow in 'dp_dsc_clock_en_read()' [ Upstream commit 4b09715f1504f1b6e8dff0e9643630610bc05141 ] Tell snprintf() to store at most 10 bytes in the output buffer instead of 30. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1508 dp_dsc_clock_en_read() error: snprintf() is printing too much 30 vs 10 Fixes: c06e09b76639 ("drm/amd/display: Add DSC parameters logging to debugfs") Cc: Alex Hung Cc: Qingqing Zhuo Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index ee242d9d8b060..ff7dd17ad0763 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1358,7 +1358,7 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, const uint32_t rd_buf_size = 10; struct pipe_ctx *pipe_ctx; ssize_t result = 0; - int i, r, str_len = 30; + int i, r, str_len = 10; rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL); -- GitLab From 29fde8895b2fcc33f44aea28c644ce2d9b62f9e0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 25 Jan 2024 21:16:04 +0530 Subject: [PATCH 0994/2290] drm/amd/display: Fix potential NULL pointer dereferences in 'dcn10_set_output_transfer_func()' [ Upstream commit 9ccfe80d022df7c595f1925afb31de2232900656 ] The 'stream' pointer is used in dcn10_set_output_transfer_func() before the check if 'stream' is NULL. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn10/dcn10_hwseq.c:1892 dcn10_set_output_transfer_func() warn: variable dereferenced before check 'stream' (see line 1875) Fixes: ddef02de0d71 ("drm/amd/display: add null checks before logging") Cc: Wyatt Wood Cc: Anthony Koo Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Anthony Koo Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 009b5861a3fec..d6c5d48c878ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1854,6 +1854,9 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, { struct dpp *dpp = pipe_ctx->plane_res.dpp; + if (!stream) + return false; + if (dpp == NULL) return false; @@ -1876,8 +1879,8 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, } else dpp->funcs->dpp_program_regamma_pwl(dpp, NULL, OPP_REGAMMA_BYPASS); - if (stream != NULL && stream->ctx != NULL && - stream->out_transfer_func != NULL) { + if (stream->ctx && + stream->out_transfer_func) { log_tf(stream->ctx, stream->out_transfer_func, dpp->regamma_params.hw_points_num); -- GitLab From a19403d20ed86930fbcf14434ff87737691f28ef Mon Sep 17 00:00:00 2001 From: Linh Phung Date: Thu, 2 Feb 2023 01:03:09 +0000 Subject: [PATCH 0995/2290] pinctrl: renesas: r8a779g0: Add Audio SSI pins, groups, and functions [ Upstream commit b37d57e1daccbc1a0393d9207d5c48f9181fe85a ] Add pins, groups, and functions for the Serial Sound Interface (SSI) on the Renesas R-Car V4H (R8A779G0) SoC. Signed-off-by: Linh Phung Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/87bkmcang2.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: 68540257cdf1 ("pinctrl: renesas: r8a779g0: Add missing SCIF_CLK2 pin group/function") Signed-off-by: Sasha Levin --- drivers/pinctrl/renesas/pfc-r8a779g0.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/pinctrl/renesas/pfc-r8a779g0.c b/drivers/pinctrl/renesas/pfc-r8a779g0.c index 43a63a21a6fb5..14774163df354 100644 --- a/drivers/pinctrl/renesas/pfc-r8a779g0.c +++ b/drivers/pinctrl/renesas/pfc-r8a779g0.c @@ -2360,6 +2360,22 @@ static const unsigned int scif_clk_mux[] = { SCIF_CLK_MARK, }; +/* - SSI ------------------------------------------------- */ +static const unsigned int ssi_data_pins[] = { + /* SSI_SD */ + RCAR_GP_PIN(1, 20), +}; +static const unsigned int ssi_data_mux[] = { + SSI_SD_MARK, +}; +static const unsigned int ssi_ctrl_pins[] = { + /* SSI_SCK, SSI_WS */ + RCAR_GP_PIN(1, 18), RCAR_GP_PIN(1, 19), +}; +static const unsigned int ssi_ctrl_mux[] = { + SSI_SCK_MARK, SSI_WS_MARK, +}; + /* - TPU ------------------------------------------------------------------- */ static const unsigned int tpu_to0_pins[] = { /* TPU0TO0 */ @@ -2652,6 +2668,9 @@ static const struct sh_pfc_pin_group pinmux_groups[] = { SH_PFC_PIN_GROUP(scif4_ctrl), SH_PFC_PIN_GROUP(scif_clk), + SH_PFC_PIN_GROUP(ssi_data), + SH_PFC_PIN_GROUP(ssi_ctrl), + SH_PFC_PIN_GROUP(tpu_to0), /* suffix might be updated */ SH_PFC_PIN_GROUP(tpu_to0_a), /* suffix might be updated */ SH_PFC_PIN_GROUP(tpu_to1), /* suffix might be updated */ @@ -2964,6 +2983,11 @@ static const char * const scif_clk_groups[] = { "scif_clk", }; +static const char * const ssi_groups[] = { + "ssi_data", + "ssi_ctrl", +}; + static const char * const tpu_groups[] = { /* suffix might be updated */ "tpu_to0", @@ -3045,6 +3069,8 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(scif4), SH_PFC_FUNCTION(scif_clk), + SH_PFC_FUNCTION(ssi), + SH_PFC_FUNCTION(tpu), SH_PFC_FUNCTION(tsn0), -- GitLab From 29eaa9246b0da536912125f32e47f69a62097d6a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 18 Jan 2024 17:32:36 +0100 Subject: [PATCH 0996/2290] pinctrl: renesas: r8a779g0: Add missing SCIF_CLK2 pin group/function [ Upstream commit 68540257cdf1d07ff8a649aa94c21c5804bbb9b0 ] R-Car V4H actually has two SCIF_CLK pins. The second pin provides the SCIF_CLK signal for HSCIF2 and SCIF4. Fixes: 050442ae4c74f830 ("pinctrl: renesas: r8a779g0: Add pins, groups and functions") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/6352ec9b63fdd38c2c70d8d203e46f21fbfeccdc.1705589612.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/pinctrl/renesas/pfc-r8a779g0.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/pinctrl/renesas/pfc-r8a779g0.c b/drivers/pinctrl/renesas/pfc-r8a779g0.c index 14774163df354..acf7664ea835b 100644 --- a/drivers/pinctrl/renesas/pfc-r8a779g0.c +++ b/drivers/pinctrl/renesas/pfc-r8a779g0.c @@ -2360,6 +2360,14 @@ static const unsigned int scif_clk_mux[] = { SCIF_CLK_MARK, }; +static const unsigned int scif_clk2_pins[] = { + /* SCIF_CLK2 */ + RCAR_GP_PIN(8, 11), +}; +static const unsigned int scif_clk2_mux[] = { + SCIF_CLK2_MARK, +}; + /* - SSI ------------------------------------------------- */ static const unsigned int ssi_data_pins[] = { /* SSI_SD */ @@ -2667,6 +2675,7 @@ static const struct sh_pfc_pin_group pinmux_groups[] = { SH_PFC_PIN_GROUP(scif4_clk), SH_PFC_PIN_GROUP(scif4_ctrl), SH_PFC_PIN_GROUP(scif_clk), + SH_PFC_PIN_GROUP(scif_clk2), SH_PFC_PIN_GROUP(ssi_data), SH_PFC_PIN_GROUP(ssi_ctrl), @@ -2983,6 +2992,10 @@ static const char * const scif_clk_groups[] = { "scif_clk", }; +static const char * const scif_clk2_groups[] = { + "scif_clk2", +}; + static const char * const ssi_groups[] = { "ssi_data", "ssi_ctrl", @@ -3068,6 +3081,7 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(scif3), SH_PFC_FUNCTION(scif4), SH_PFC_FUNCTION(scif_clk), + SH_PFC_FUNCTION(scif_clk2), SH_PFC_FUNCTION(ssi), -- GitLab From 9310d3de984cf2a51eb966bb0bbd8fd9711c16e4 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Wed, 24 Jan 2024 19:38:56 -0600 Subject: [PATCH 0997/2290] clk: samsung: exynos850: Propagate SPI IPCLK rate change [ Upstream commit 67c15187d4910ee353374676d4dddf09d8cb227e ] When SPI transfer is being prepared, the spi-s3c64xx driver will call clk_set_rate() to change the rate of SPI source clock (IPCLK). But IPCLK is a gate (leaf) clock, so it must propagate the rate change up the clock tree, so that corresponding DIV clocks can actually change their divider values. Add CLK_SET_RATE_PARENT flag to corresponding clocks for all SPI instances in Exynos850 (spi_0, spi_1 and spi_2) to make it possible. This change involves next clocks: usi_spi_0: Clock Block Div range -------------------------------------------- gout_spi0_ipclk CMU_PERI - dout_peri_spi0 CMU_PERI /1..32 mout_peri_spi_user CMU_PERI - dout_peri_ip CMU_TOP /1..16 usi_cmgp0: Clock Block Div range -------------------------------------------- gout_cmgp_usi0_ipclk CMU_CMGP - dout_cmgp_usi0 CMU_CMGP /1..32 mout_cmgp_usi0 CMU_CMGP - gout_clkcmu_cmgp_bus CMU_APM - dout_apm_bus CMU_APM /1..8 usi_cmgp1: Clock Block Div range -------------------------------------------- gout_cmgp_usi1_ipclk CMU_CMGP - dout_cmgp_usi1 CMU_CMGP /1..32 mout_cmgp_usi1 CMU_CMGP - gout_clkcmu_cmgp_bus CMU_APM - dout_apm_bus CMU_APM /1..8 With input clock of 400 MHz, this scheme provides next IPCLK rate range, for each SPI block: SPI0: 781 kHz ... 400 MHz SPI1/2: 1.6 MHz ... 400 MHz Accounting for internal /4 divider in SPI blocks, and because the max SPI frequency is limited at 50 MHz, it gives us next SPI SCK rates: SPI0: 200 kHz ... 49.9 MHz SPI1/2: 400 kHz ... 49.9 MHz Which should cover all possible applications of SPI bus. Of course, setting SPI frequency to values as low as 500 kHz will also affect the common bus dividers (dout_apm_bus or dout_peri_ip), which in turn effectively lowers the rates for all leaf bus clocks derived from those dividers, like HSI2C and I3C clocks. But at least it gives the board designer a choice, whether to keep all clocks (SPI/HSI2C/I3C) at high frequencies, or make all those clocks have lower frequencies. Not propagating the rate change to those common dividers would limit this choice to "only high frequencies are allowed for SPI/HSI2C/I3C" option, making the common dividers useless. This decision follows the "Worse is better" approach, relying on the users/engineers to know the system internals when working with such low-level features, instead of trying to account for all possible use-cases. Fixes: 7dd05578198b ("clk: samsung: Introduce Exynos850 clock driver") Signed-off-by: Sam Protsenko Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20240125013858.3986-2-semen.protsenko@linaro.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-exynos850.c | 33 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos850.c b/drivers/clk/samsung/clk-exynos850.c index 541761e96aeb6..87e463ad42741 100644 --- a/drivers/clk/samsung/clk-exynos850.c +++ b/drivers/clk/samsung/clk-exynos850.c @@ -572,7 +572,7 @@ static const struct samsung_div_clock apm_div_clks[] __initconst = { static const struct samsung_gate_clock apm_gate_clks[] __initconst = { GATE(CLK_GOUT_CLKCMU_CMGP_BUS, "gout_clkcmu_cmgp_bus", "dout_apm_bus", - CLK_CON_GAT_CLKCMU_CMGP_BUS, 21, 0, 0), + CLK_CON_GAT_CLKCMU_CMGP_BUS, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CLKCMU_CHUB_BUS, "gout_clkcmu_chub_bus", "mout_clkcmu_chub_bus", CLK_CON_GAT_GATE_CLKCMU_CHUB_BUS, 21, 0, 0), @@ -936,19 +936,19 @@ static const struct samsung_fixed_rate_clock cmgp_fixed_clks[] __initconst = { static const struct samsung_mux_clock cmgp_mux_clks[] __initconst = { MUX(CLK_MOUT_CMGP_ADC, "mout_cmgp_adc", mout_cmgp_adc_p, CLK_CON_MUX_CLK_CMGP_ADC, 0, 1), - MUX(CLK_MOUT_CMGP_USI0, "mout_cmgp_usi0", mout_cmgp_usi0_p, - CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP0, 0, 1), - MUX(CLK_MOUT_CMGP_USI1, "mout_cmgp_usi1", mout_cmgp_usi1_p, - CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP1, 0, 1), + MUX_F(CLK_MOUT_CMGP_USI0, "mout_cmgp_usi0", mout_cmgp_usi0_p, + CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP0, 0, 1, CLK_SET_RATE_PARENT, 0), + MUX_F(CLK_MOUT_CMGP_USI1, "mout_cmgp_usi1", mout_cmgp_usi1_p, + CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP1, 0, 1, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_div_clock cmgp_div_clks[] __initconst = { DIV(CLK_DOUT_CMGP_ADC, "dout_cmgp_adc", "gout_clkcmu_cmgp_bus", CLK_CON_DIV_DIV_CLK_CMGP_ADC, 0, 4), - DIV(CLK_DOUT_CMGP_USI0, "dout_cmgp_usi0", "mout_cmgp_usi0", - CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP0, 0, 5), - DIV(CLK_DOUT_CMGP_USI1, "dout_cmgp_usi1", "mout_cmgp_usi1", - CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP1, 0, 5), + DIV_F(CLK_DOUT_CMGP_USI0, "dout_cmgp_usi0", "mout_cmgp_usi0", + CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP0, 0, 5, CLK_SET_RATE_PARENT, 0), + DIV_F(CLK_DOUT_CMGP_USI1, "dout_cmgp_usi1", "mout_cmgp_usi1", + CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP1, 0, 5, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_gate_clock cmgp_gate_clks[] __initconst = { @@ -963,12 +963,12 @@ static const struct samsung_gate_clock cmgp_gate_clks[] __initconst = { "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_GPIO_PCLK, 21, CLK_IGNORE_UNUSED, 0), GATE(CLK_GOUT_CMGP_USI0_IPCLK, "gout_cmgp_usi0_ipclk", "dout_cmgp_usi0", - CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CMGP_USI0_PCLK, "gout_cmgp_usi0_pclk", "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_PCLK, 21, 0, 0), GATE(CLK_GOUT_CMGP_USI1_IPCLK, "gout_cmgp_usi1_ipclk", "dout_cmgp_usi1", - CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_CMGP_USI1_PCLK, "gout_cmgp_usi1_pclk", "gout_clkcmu_cmgp_bus", CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_PCLK, 21, 0, 0), @@ -1409,8 +1409,9 @@ static const struct samsung_mux_clock peri_mux_clks[] __initconst = { mout_peri_uart_user_p, PLL_CON0_MUX_CLKCMU_PERI_UART_USER, 4, 1), MUX(CLK_MOUT_PERI_HSI2C_USER, "mout_peri_hsi2c_user", mout_peri_hsi2c_user_p, PLL_CON0_MUX_CLKCMU_PERI_HSI2C_USER, 4, 1), - MUX(CLK_MOUT_PERI_SPI_USER, "mout_peri_spi_user", mout_peri_spi_user_p, - PLL_CON0_MUX_CLKCMU_PERI_SPI_USER, 4, 1), + MUX_F(CLK_MOUT_PERI_SPI_USER, "mout_peri_spi_user", + mout_peri_spi_user_p, PLL_CON0_MUX_CLKCMU_PERI_SPI_USER, 4, 1, + CLK_SET_RATE_PARENT, 0), }; static const struct samsung_div_clock peri_div_clks[] __initconst = { @@ -1420,8 +1421,8 @@ static const struct samsung_div_clock peri_div_clks[] __initconst = { CLK_CON_DIV_DIV_CLK_PERI_HSI2C_1, 0, 5), DIV(CLK_DOUT_PERI_HSI2C2, "dout_peri_hsi2c2", "gout_peri_hsi2c2", CLK_CON_DIV_DIV_CLK_PERI_HSI2C_2, 0, 5), - DIV(CLK_DOUT_PERI_SPI0, "dout_peri_spi0", "mout_peri_spi_user", - CLK_CON_DIV_DIV_CLK_PERI_SPI_0, 0, 5), + DIV_F(CLK_DOUT_PERI_SPI0, "dout_peri_spi0", "mout_peri_spi_user", + CLK_CON_DIV_DIV_CLK_PERI_SPI_0, 0, 5, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_gate_clock peri_gate_clks[] __initconst = { @@ -1463,7 +1464,7 @@ static const struct samsung_gate_clock peri_gate_clks[] __initconst = { "mout_peri_bus_user", CLK_CON_GAT_GOUT_PERI_PWM_MOTOR_PCLK, 21, 0, 0), GATE(CLK_GOUT_SPI0_IPCLK, "gout_spi0_ipclk", "dout_peri_spi0", - CLK_CON_GAT_GOUT_PERI_SPI_0_IPCLK, 21, 0, 0), + CLK_CON_GAT_GOUT_PERI_SPI_0_IPCLK, 21, CLK_SET_RATE_PARENT, 0), GATE(CLK_GOUT_SPI0_PCLK, "gout_spi0_pclk", "mout_peri_bus_user", CLK_CON_GAT_GOUT_PERI_SPI_0_PCLK, 21, 0, 0), GATE(CLK_GOUT_SYSREG_PERI_PCLK, "gout_sysreg_peri_pclk", -- GitLab From 83fe9c3f445d7b94dc4ff61f454618a5bdd7bf38 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 27 Jan 2024 02:57:56 +0000 Subject: [PATCH 0998/2290] perf evsel: Fix duplicate initialization of data->id in evsel__parse_sample() [ Upstream commit 4962aec0d684c8edb14574ccd0da53e4926ff834 ] data->id has been initialized at line 2362, remove duplicate initialization. Fixes: 3ad31d8a0df2 ("perf evsel: Centralize perf_sample initialization") Signed-off-by: Yang Jihong Reviewed-by: Arnaldo Carvalho de Melo Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240127025756.4041808-1-yangjihong1@huawei.com Signed-off-by: Sasha Levin --- tools/perf/util/evsel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 76605fde35078..7db35dbdfcefe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2375,7 +2375,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, data->period = evsel->core.attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; data->misc = event->header.misc; - data->id = -1ULL; data->data_src = PERF_MEM_DATA_SRC_NONE; data->vcpu = -1; -- GitLab From 7ae1b0dc12ec407f12f80b49d22c6ad2308e2202 Mon Sep 17 00:00:00 2001 From: Igor Prusov Date: Fri, 2 Feb 2024 17:25:48 +0300 Subject: [PATCH 0999/2290] clk: meson: Add missing clocks to axg_clk_regmaps [ Upstream commit ba535bce57e71463a86f8b33a0ea88c26e3a6418 ] Some clocks were missing from axg_clk_regmaps, which caused kernel panic during cat /sys/kernel/debug/clk/clk_summary [ 57.349402] Unable to handle kernel NULL pointer dereference at virtual address 00000000000001fc ... [ 57.430002] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 57.436900] pc : regmap_read+0x1c/0x88 [ 57.440608] lr : clk_regmap_gate_is_enabled+0x3c/0xb0 [ 57.445611] sp : ffff800082f1b690 [ 57.448888] x29: ffff800082f1b690 x28: 0000000000000000 x27: ffff800080eb9a70 [ 57.455961] x26: 0000000000000007 x25: 0000000000000016 x24: 0000000000000000 [ 57.463033] x23: ffff800080e8b488 x22: 0000000000000015 x21: ffff00000e7e7000 [ 57.470106] x20: ffff00000400ec00 x19: 0000000000000000 x18: ffffffffffffffff [ 57.477178] x17: 0000000000000000 x16: 0000000000000000 x15: ffff0000042a3000 [ 57.484251] x14: 0000000000000000 x13: ffff0000042a2fec x12: 0000000005f5e100 [ 57.491323] x11: abcc77118461cefd x10: 0000000000000020 x9 : ffff8000805e4b24 [ 57.498396] x8 : ffff0000028063c0 x7 : ffff800082f1b710 x6 : ffff800082f1b710 [ 57.505468] x5 : 00000000ffffffd0 x4 : ffff800082f1b6e0 x3 : 0000000000001000 [ 57.512541] x2 : ffff800082f1b6e4 x1 : 000000000000012c x0 : 0000000000000000 [ 57.519615] Call trace: [ 57.522030] regmap_read+0x1c/0x88 [ 57.525393] clk_regmap_gate_is_enabled+0x3c/0xb0 [ 57.530050] clk_core_is_enabled+0x44/0x120 [ 57.534190] clk_summary_show_subtree+0x154/0x2f0 [ 57.538847] clk_summary_show_subtree+0x220/0x2f0 [ 57.543505] clk_summary_show_subtree+0x220/0x2f0 [ 57.548162] clk_summary_show_subtree+0x220/0x2f0 [ 57.552820] clk_summary_show_subtree+0x220/0x2f0 [ 57.557477] clk_summary_show_subtree+0x220/0x2f0 [ 57.562135] clk_summary_show_subtree+0x220/0x2f0 [ 57.566792] clk_summary_show_subtree+0x220/0x2f0 [ 57.571450] clk_summary_show+0x84/0xb8 [ 57.575245] seq_read_iter+0x1bc/0x4b8 [ 57.578954] seq_read+0x8c/0xd0 [ 57.582059] full_proxy_read+0x68/0xc8 [ 57.585767] vfs_read+0xb0/0x268 [ 57.588959] ksys_read+0x70/0x108 [ 57.592236] __arm64_sys_read+0x24/0x38 [ 57.596031] invoke_syscall+0x50/0x128 [ 57.599740] el0_svc_common.constprop.0+0x48/0xf8 [ 57.604397] do_el0_svc+0x28/0x40 [ 57.607675] el0_svc+0x34/0xb8 [ 57.610694] el0t_64_sync_handler+0x13c/0x158 [ 57.615006] el0t_64_sync+0x190/0x198 [ 57.618635] Code: a9bd7bfd 910003fd a90153f3 aa0003f3 (b941fc00) [ 57.624668] ---[ end trace 0000000000000000 ]--- [jbrunet: add missing Fixes tag] Signed-off-by: Igor Prusov Link: https://lore.kernel.org/r/20240202172537.1.I64656c75d84284bc91e6126b50b33c502be7c42a@changeid Fixes: 14ebb3154b8f ("clk: meson: axg: add Video Clocks") Signed-off-by: Jerome Brunet Signed-off-by: Sasha Levin --- drivers/clk/meson/axg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c index 2ad3801398dc1..7802dabb26f6d 100644 --- a/drivers/clk/meson/axg.c +++ b/drivers/clk/meson/axg.c @@ -2144,7 +2144,9 @@ static struct clk_regmap *const axg_clk_regmaps[] = { &axg_vclk_input, &axg_vclk2_input, &axg_vclk_div, + &axg_vclk_div1, &axg_vclk2_div, + &axg_vclk2_div1, &axg_vclk_div2_en, &axg_vclk_div4_en, &axg_vclk_div6_en, -- GitLab From 3828fc3ab84cea74dd41a143456b0238ba2b56c5 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 12 Jan 2024 05:42:26 -0800 Subject: [PATCH 1000/2290] media: em28xx: annotate unchecked call to media_device_register() [ Upstream commit fd61d77a3d28444b2635f0c8b5a2ecd6a4d94026 ] Static analyzers generate alerts for an unchecked call to `media_device_register()`. However, in this case, the device will work reliably without the media controller API. Add a comment above the call to prevent future unnecessary changes. Suggested-by: Mauro Carvalho Chehab Fixes: 37ecc7b1278f ("[media] em28xx: add media controller support") Signed-off-by: Nikita Zhandarovich Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/em28xx/em28xx-cards.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 4d037c92af7c5..bae76023cf71d 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -4094,6 +4094,10 @@ static int em28xx_usb_probe(struct usb_interface *intf, * topology will likely change after the load of the em28xx subdrivers. */ #ifdef CONFIG_MEDIA_CONTROLLER + /* + * No need to check the return value, the device will still be + * usable without media controller API. + */ retval = media_device_register(dev->media_dev); #endif -- GitLab From 6bf5c2fade8ed53b2d26fa9875e5b04f36c7145d Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 1 Feb 2024 20:47:53 +0800 Subject: [PATCH 1001/2290] media: v4l2-tpg: fix some memleaks in tpg_alloc [ Upstream commit 8cf9c5051076e0eb958f4361d50d8b0c3ee6691c ] In tpg_alloc, resources should be deallocated in each and every error-handling paths, since they are allocated in for statements. Otherwise there would be memleaks because tpg_free is called only when tpg_alloc return 0. Fixes: 63881df94d3e ("[media] vivid: add the Test Pattern Generator") Signed-off-by: Zhipeng Lu Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/common/v4l2-tpg/v4l2-tpg-core.c | 52 +++++++++++++++---- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index 303d02b1d71c9..fe30f5b0050dd 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -113,6 +113,7 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) { unsigned pat; unsigned plane; + int ret = 0; tpg->max_line_width = max_w; for (pat = 0; pat < TPG_MAX_PAT_LINES; pat++) { @@ -121,14 +122,18 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) tpg->lines[pat][plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->lines[pat][plane]) - return -ENOMEM; + if (!tpg->lines[pat][plane]) { + ret = -ENOMEM; + goto free_lines; + } if (plane == 0) continue; tpg->downsampled_lines[pat][plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->downsampled_lines[pat][plane]) - return -ENOMEM; + if (!tpg->downsampled_lines[pat][plane]) { + ret = -ENOMEM; + goto free_lines; + } } } for (plane = 0; plane < TPG_MAX_PLANES; plane++) { @@ -136,18 +141,45 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w) tpg->contrast_line[plane] = vzalloc(array_size(pixelsz, max_w)); - if (!tpg->contrast_line[plane]) - return -ENOMEM; + if (!tpg->contrast_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } tpg->black_line[plane] = vzalloc(array_size(pixelsz, max_w)); - if (!tpg->black_line[plane]) - return -ENOMEM; + if (!tpg->black_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } tpg->random_line[plane] = vzalloc(array3_size(max_w, 2, pixelsz)); - if (!tpg->random_line[plane]) - return -ENOMEM; + if (!tpg->random_line[plane]) { + ret = -ENOMEM; + goto free_contrast_line; + } } return 0; + +free_contrast_line: + for (plane = 0; plane < TPG_MAX_PLANES; plane++) { + vfree(tpg->contrast_line[plane]); + vfree(tpg->black_line[plane]); + vfree(tpg->random_line[plane]); + tpg->contrast_line[plane] = NULL; + tpg->black_line[plane] = NULL; + tpg->random_line[plane] = NULL; + } +free_lines: + for (pat = 0; pat < TPG_MAX_PAT_LINES; pat++) + for (plane = 0; plane < TPG_MAX_PLANES; plane++) { + vfree(tpg->lines[pat][plane]); + tpg->lines[pat][plane] = NULL; + if (plane == 0) + continue; + vfree(tpg->downsampled_lines[pat][plane]); + tpg->downsampled_lines[pat][plane] = NULL; + } + return ret; } EXPORT_SYMBOL_GPL(tpg_alloc); -- GitLab From 0c9550b032de48d6a7fa6a4ddc09699d64d9300d Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 1 Feb 2024 20:48:44 +0800 Subject: [PATCH 1002/2290] media: v4l2-mem2mem: fix a memleak in v4l2_m2m_register_entity [ Upstream commit 8f94b49a5b5d386c038e355bef6347298aabd211 ] The entity->name (i.e. name) is allocated in v4l2_m2m_register_entity but isn't freed in its following error-handling paths. This patch adds such deallocation to prevent memleak of entity->name. Fixes: be2fff656322 ("media: add helpers for memory-to-memory media controller") Signed-off-by: Zhipeng Lu Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/v4l2-core/v4l2-mem2mem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index be7fde1ed3eaa..97645d6509e1c 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -1084,11 +1084,17 @@ static int v4l2_m2m_register_entity(struct media_device *mdev, entity->function = function; ret = media_entity_pads_init(entity, num_pads, pads); - if (ret) + if (ret) { + kfree(entity->name); + entity->name = NULL; return ret; + } ret = media_device_register_entity(mdev, entity); - if (ret) + if (ret) { + kfree(entity->name); + entity->name = NULL; return ret; + } return 0; } -- GitLab From 437a111f79a2f5b2a5f21e27fdec6f40c8768712 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Sat, 3 Feb 2024 14:40:43 +0100 Subject: [PATCH 1003/2290] media: edia: dvbdev: fix a use-after-free [ Upstream commit 8c64f4cdf4e6cc5682c52523713af8c39c94e6d5 ] In dvb_register_device, *pdvbdev is set equal to dvbdev, which is freed in several error-handling paths. However, *pdvbdev is not set to NULL after dvbdev's deallocation, causing use-after-frees in many places, for example, in the following call chain: budget_register |-> dvb_dmxdev_init |-> dvb_register_device |-> dvb_dmxdev_release |-> dvb_unregister_device |-> dvb_remove_device |-> dvb_device_put |-> kref_put When calling dvb_unregister_device, dmxdev->dvbdev (i.e. *pdvbdev in dvb_register_device) could point to memory that had been freed in dvb_register_device. Thereafter, this pointer is transferred to kref_put and triggering a use-after-free. Link: https://lore.kernel.org/linux-media/20240203134046.3120099-1-alexious@zju.edu.cn Fixes: b61901024776 ("V4L/DVB (5244): Dvbdev: fix illegal re-usage of fileoperations struct") Signed-off-by: Zhipeng Lu Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/dvb-core/dvbdev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index d352e028491aa..aefee2277254d 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -494,6 +494,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL); if (!dvbdevfops) { kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } @@ -502,6 +503,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, if (!new_node) { kfree(dvbdevfops); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } @@ -535,6 +537,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, } list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; up_write(&minor_rwsem); mutex_unlock(&dvbdev_register_lock); return -EINVAL; @@ -557,6 +560,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvb_media_device_free(dvbdev); list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return ret; } @@ -575,6 +579,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvb_media_device_free(dvbdev); list_del (&dvbdev->list_head); kfree(dvbdev); + *pdvbdev = NULL; mutex_unlock(&dvbdev_register_lock); return PTR_ERR(clsdev); } -- GitLab From f69b926799b3d0a01376f77c86f3697462c913a2 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 31 Jan 2024 15:19:07 +0800 Subject: [PATCH 1004/2290] pinctrl: mediatek: Drop bogus slew rate register range for MT8186 [ Upstream commit 3a29c87548809405bcbc66acc69cbe6f15184d94 ] The MT8186 does not support configuring pin slew rate. This is evident from both the datasheet, and the fact that the driver points the slew rate register range at the GPIO direction register range. Drop the bogus setting. Fixes: 8b483bda1e46 ("pinctrl: add pinctrl driver on mt8186") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240131071910.3950450-1-wenst@chromium.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/pinctrl-mt8186.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8186.c b/drivers/pinctrl/mediatek/pinctrl-mt8186.c index a02f7c3269707..09edcf47effec 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8186.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8186.c @@ -1198,7 +1198,6 @@ static const struct mtk_pin_reg_calc mt8186_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8186_pin_dir_range), [PINCTRL_PIN_REG_DI] = MTK_RANGE(mt8186_pin_di_range), [PINCTRL_PIN_REG_DO] = MTK_RANGE(mt8186_pin_do_range), - [PINCTRL_PIN_REG_SR] = MTK_RANGE(mt8186_pin_dir_range), [PINCTRL_PIN_REG_SMT] = MTK_RANGE(mt8186_pin_smt_range), [PINCTRL_PIN_REG_IES] = MTK_RANGE(mt8186_pin_ies_range), [PINCTRL_PIN_REG_PU] = MTK_RANGE(mt8186_pin_pu_range), -- GitLab From 99cd54167d0b3c75e3957b45ecc1a584548a219e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 31 Jan 2024 15:19:08 +0800 Subject: [PATCH 1005/2290] pinctrl: mediatek: Drop bogus slew rate register range for MT8192 [ Upstream commit e15ab05a6b3ed42f2f43f8bd1a1abdbde64afecd ] The MT8192 does not support configuring pin slew rate. This is evident from both the datasheet, and the fact that the driver points the slew rate register range at the GPIO direction register range. Drop the bogus setting. Fixes: d32f38f2a8fc ("pinctrl: mediatek: Add pinctrl driver for mt8192") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240131071910.3950450-2-wenst@chromium.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/pinctrl-mt8192.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8192.c b/drivers/pinctrl/mediatek/pinctrl-mt8192.c index 9695f4ec6aba9..f120268c00f56 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8192.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8192.c @@ -1379,7 +1379,6 @@ static const struct mtk_pin_reg_calc mt8192_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8192_pin_dir_range), [PINCTRL_PIN_REG_DI] = MTK_RANGE(mt8192_pin_di_range), [PINCTRL_PIN_REG_DO] = MTK_RANGE(mt8192_pin_do_range), - [PINCTRL_PIN_REG_SR] = MTK_RANGE(mt8192_pin_dir_range), [PINCTRL_PIN_REG_SMT] = MTK_RANGE(mt8192_pin_smt_range), [PINCTRL_PIN_REG_IES] = MTK_RANGE(mt8192_pin_ies_range), [PINCTRL_PIN_REG_PU] = MTK_RANGE(mt8192_pin_pu_range), -- GitLab From 1f3e9910422eaeb7b829807ad73143d58b557e0e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 6 Feb 2024 19:43:35 +0100 Subject: [PATCH 1006/2290] clk: qcom: reset: Commonize the de/assert functions [ Upstream commit eda40d9c583e95e0b6ac69d2950eec10f802e0e8 ] They do the same thing, except the last argument of the last function call differs. Commonize them. Reviewed-by: Bryan O'Donoghue Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240105-topic-venus_reset-v2-2-c37eba13b5ce@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 2f8cf2c3f3e3 ("clk: qcom: reset: Ensure write completion on reset de/assertion") Signed-off-by: Sasha Levin --- drivers/clk/qcom/reset.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c index e45e32804d2c7..20d1d35aaf229 100644 --- a/drivers/clk/qcom/reset.c +++ b/drivers/clk/qcom/reset.c @@ -22,8 +22,8 @@ static int qcom_reset(struct reset_controller_dev *rcdev, unsigned long id) return 0; } -static int -qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) +static int qcom_reset_set_assert(struct reset_controller_dev *rcdev, + unsigned long id, bool assert) { struct qcom_reset_controller *rst; const struct qcom_reset_map *map; @@ -33,21 +33,17 @@ qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) map = &rst->reset_map[id]; mask = map->bitmask ? map->bitmask : BIT(map->bit); - return regmap_update_bits(rst->regmap, map->reg, mask, mask); + return regmap_update_bits(rst->regmap, map->reg, mask, assert ? mask : 0); } -static int -qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id) +static int qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) { - struct qcom_reset_controller *rst; - const struct qcom_reset_map *map; - u32 mask; - - rst = to_qcom_reset_controller(rcdev); - map = &rst->reset_map[id]; - mask = map->bitmask ? map->bitmask : BIT(map->bit); + return qcom_reset_set_assert(rcdev, id, true); +} - return regmap_update_bits(rst->regmap, map->reg, mask, 0); +static int qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id) +{ + return qcom_reset_set_assert(rcdev, id, false); } const struct reset_control_ops qcom_reset_ops = { -- GitLab From 6814dc592d35a9484c3ac83c0a3b7ac07ed0f99b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 6 Feb 2024 19:43:36 +0100 Subject: [PATCH 1007/2290] clk: qcom: reset: Ensure write completion on reset de/assertion [ Upstream commit 2f8cf2c3f3e3f7ef61bd19abb4b0bb797ad50aaf ] Trying to toggle the resets in a rapid fashion can lead to the changes not actually arriving at the clock controller block when we expect them to. This was observed at least on SM8250. Read back the value after regmap_update_bits to ensure write completion. Fixes: b36ba30c8ac6 ("clk: qcom: Add reset controller support") Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240105-topic-venus_reset-v2-3-c37eba13b5ce@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/reset.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c index 20d1d35aaf229..d96c96a9089f4 100644 --- a/drivers/clk/qcom/reset.c +++ b/drivers/clk/qcom/reset.c @@ -33,7 +33,12 @@ static int qcom_reset_set_assert(struct reset_controller_dev *rcdev, map = &rst->reset_map[id]; mask = map->bitmask ? map->bitmask : BIT(map->bit); - return regmap_update_bits(rst->regmap, map->reg, mask, assert ? mask : 0); + regmap_update_bits(rst->regmap, map->reg, mask, assert ? mask : 0); + + /* Read back the register to ensure write completion, ignore the value */ + regmap_read(rst->regmap, map->reg, &mask); + + return 0; } static int qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) -- GitLab From fd14781b30215672c09826f666ed65339885a358 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 30 Jun 2023 19:08:22 +0800 Subject: [PATCH 1008/2290] quota: simplify drop_dquot_ref() [ Upstream commit 7bce48f0fec602b3b6c335963b26d9eefa417788 ] As Honza said, remove_inode_dquot_ref() currently does not release the last dquot reference but instead adds the dquot to tofree_head list. This is because dqput() can sleep while dropping of the last dquot reference (writing back the dquot and calling ->release_dquot()) and that must not happen under dq_list_lock. Now that dqput() queues the final dquot cleanup into a workqueue, remove_inode_dquot_ref() can call dqput() unconditionally and we can significantly simplify it. Here we open code the simplified code of remove_inode_dquot_ref() into remove_dquot_ref() and remove the function put_dquot_list() which is no longer used. Signed-off-by: Baokun Li Signed-off-by: Jan Kara Message-Id: <20230630110822.3881712-6-libaokun1@huawei.com> Stable-dep-of: 179b8c97ebf6 ("quota: Fix rcu annotations of inode dquot pointers") Signed-off-by: Sasha Levin --- fs/quota/dquot.c | 70 +++++++----------------------------------------- 1 file changed, 9 insertions(+), 61 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index b0cf3869d3bf5..730d8ffc4928a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1084,59 +1084,7 @@ out: return err; } -/* - * Remove references to dquots from inode and add dquot to list for freeing - * if we have the last reference to dquot - */ -static void remove_inode_dquot_ref(struct inode *inode, int type, - struct list_head *tofree_head) -{ - struct dquot **dquots = i_dquot(inode); - struct dquot *dquot = dquots[type]; - - if (!dquot) - return; - - dquots[type] = NULL; - if (list_empty(&dquot->dq_free)) { - /* - * The inode still has reference to dquot so it can't be in the - * free list - */ - spin_lock(&dq_list_lock); - list_add(&dquot->dq_free, tofree_head); - spin_unlock(&dq_list_lock); - } else { - /* - * Dquot is already in a list to put so we won't drop the last - * reference here. - */ - dqput(dquot); - } -} - -/* - * Free list of dquots - * Dquots are removed from inodes and no new references can be got so we are - * the only ones holding reference - */ -static void put_dquot_list(struct list_head *tofree_head) -{ - struct list_head *act_head; - struct dquot *dquot; - - act_head = tofree_head->next; - while (act_head != tofree_head) { - dquot = list_entry(act_head, struct dquot, dq_free); - act_head = act_head->next; - /* Remove dquot from the list so we won't have problems... */ - list_del_init(&dquot->dq_free); - dqput(dquot); - } -} - -static void remove_dquot_ref(struct super_block *sb, int type, - struct list_head *tofree_head) +static void remove_dquot_ref(struct super_block *sb, int type) { struct inode *inode; #ifdef CONFIG_QUOTA_DEBUG @@ -1153,11 +1101,16 @@ static void remove_dquot_ref(struct super_block *sb, int type, */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { + struct dquot **dquots = i_dquot(inode); + struct dquot *dquot = dquots[type]; + #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif - remove_inode_dquot_ref(inode, type, tofree_head); + dquots[type] = NULL; + if (dquot) + dqput(dquot); } spin_unlock(&dq_data_lock); } @@ -1174,13 +1127,8 @@ static void remove_dquot_ref(struct super_block *sb, int type, /* Gather all references from inodes and drop them */ static void drop_dquot_ref(struct super_block *sb, int type) { - LIST_HEAD(tofree_head); - - if (sb->dq_op) { - remove_dquot_ref(sb, type, &tofree_head); - synchronize_srcu(&dquot_srcu); - put_dquot_list(&tofree_head); - } + if (sb->dq_op) + remove_dquot_ref(sb, type); } static inline -- GitLab From 7f9e833fc0f9b47be503af012eb5903086939754 Mon Sep 17 00:00:00 2001 From: Wang Jianjian Date: Fri, 2 Feb 2024 16:18:52 +0800 Subject: [PATCH 1009/2290] quota: Fix potential NULL pointer dereference [ Upstream commit d0aa72604fbd80c8aabb46eda00535ed35570f1f ] Below race may cause NULL pointer dereference P1 P2 dquot_free_inode quota_off drop_dquot_ref remove_dquot_ref dquots = i_dquot(inode) dquots = i_dquot(inode) srcu_read_lock dquots[cnt]) != NULL (1) dquots[type] = NULL (2) spin_lock(&dquots[cnt]->dq_dqb_lock) (3) .... If dquot_free_inode(or other routines) checks inode's quota pointers (1) before quota_off sets it to NULL(2) and use it (3) after that, NULL pointer dereference will be triggered. So let's fix it by using a temporary pointer to avoid this issue. Signed-off-by: Wang Jianjian Signed-off-by: Jan Kara Message-Id: <20240202081852.2514092-1-wangjianjian3@huawei.com> Stable-dep-of: 179b8c97ebf6 ("quota: Fix rcu annotations of inode dquot pointers") Signed-off-by: Sasha Levin --- fs/quota/dquot.c | 98 ++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 730d8ffc4928a..44c4da364c994 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -399,15 +399,17 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ -static inline int mark_all_dquot_dirty(struct dquot * const *dquot) +static inline int mark_all_dquot_dirty(struct dquot * const *dquots) { int ret, err, cnt; + struct dquot *dquot; ret = err = 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquot[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) /* Even in case of error we have to continue */ - ret = mark_dquot_dirty(dquot[cnt]); + ret = mark_dquot_dirty(dquot); if (!err) err = ret; } @@ -1684,6 +1686,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; struct dquot **dquots; + struct dquot *dquot; if (!inode_quota_active(inode)) { if (reserve) { @@ -1703,27 +1706,26 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; if (reserve) { - ret = dquot_add_space(dquots[cnt], 0, number, flags, - &warn[cnt]); + ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]); } else { - ret = dquot_add_space(dquots[cnt], number, 0, flags, - &warn[cnt]); + ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]); } if (ret) { /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); if (reserve) - dquot_free_reserved_space(dquots[cnt], - number); + dquot_free_reserved_space(dquot, number); else - dquot_decr_space(dquots[cnt], number); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + dquot_decr_space(dquot, number); + spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); goto out_flush_warn; @@ -1754,6 +1756,7 @@ int dquot_alloc_inode(struct inode *inode) int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots; + struct dquot *dquot; if (!inode_quota_active(inode)) return 0; @@ -1764,17 +1767,19 @@ int dquot_alloc_inode(struct inode *inode) index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]); + ret = dquot_add_inodes(dquot, 1, &warn[cnt]); if (ret) { for (cnt--; cnt >= 0; cnt--) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; /* Back out changes we already did */ - spin_lock(&dquots[cnt]->dq_dqb_lock); - dquot_decr_inodes(dquots[cnt], 1); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); + dquot_decr_inodes(dquot, 1); + spin_unlock(&dquot->dq_dqb_lock); } goto warn_put_all; } @@ -1796,6 +1801,7 @@ EXPORT_SYMBOL(dquot_alloc_inode); int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot **dquots; + struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { @@ -1811,9 +1817,8 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) { - struct dquot *dquot = dquots[cnt]; - + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) number = dquot->dq_dqb.dqb_rsvspace; @@ -1838,6 +1843,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot **dquots; + struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { @@ -1853,9 +1859,8 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) { - struct dquot *dquot = dquots[cnt]; - + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) number = dquot->dq_dqb.dqb_curspace; @@ -1882,6 +1887,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot **dquots; + struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!inode_quota_active(inode)) { @@ -1902,17 +1908,18 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); - wtype = info_bdq_free(dquots[cnt], number); + spin_lock(&dquot->dq_dqb_lock); + wtype = info_bdq_free(dquot, number); if (wtype != QUOTA_NL_NOWARN) - prepare_warning(&warn[cnt], dquots[cnt], wtype); + prepare_warning(&warn[cnt], dquot, wtype); if (reserve) - dquot_free_reserved_space(dquots[cnt], number); + dquot_free_reserved_space(dquot, number); else - dquot_decr_space(dquots[cnt], number); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + dquot_decr_space(dquot, number); + spin_unlock(&dquot->dq_dqb_lock); } if (reserve) *inode_reserved_space(inode) -= number; @@ -1937,6 +1944,7 @@ void dquot_free_inode(struct inode *inode) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots; + struct dquot *dquot; int index; if (!inode_quota_active(inode)) @@ -1947,16 +1955,16 @@ void dquot_free_inode(struct inode *inode) spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; - warn[cnt].w_type = QUOTA_NL_NOWARN; - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); - wtype = info_idq_free(dquots[cnt], 1); + spin_lock(&dquot->dq_dqb_lock); + wtype = info_idq_free(dquot, 1); if (wtype != QUOTA_NL_NOWARN) - prepare_warning(&warn[cnt], dquots[cnt], wtype); - dquot_decr_inodes(dquots[cnt], 1); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + prepare_warning(&warn[cnt], dquot, wtype); + dquot_decr_inodes(dquot, 1); + spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); @@ -1983,7 +1991,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) qsize_t rsv_space = 0; qsize_t inode_usage = 1; struct dquot *transfer_from[MAXQUOTAS] = {}; - int cnt, ret = 0; + int cnt, index, ret = 0; char is_valid[MAXQUOTAS] = {}; struct dquot_warn warn_to[MAXQUOTAS]; struct dquot_warn warn_from_inodes[MAXQUOTAS]; @@ -2072,8 +2080,16 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); + /* + * These arrays are local and we hold dquot references so we don't need + * the srcu protection but still take dquot_srcu to avoid warning in + * mark_all_dquot_dirty(). + */ + index = srcu_read_lock(&dquot_srcu); mark_all_dquot_dirty(transfer_from); mark_all_dquot_dirty(transfer_to); + srcu_read_unlock(&dquot_srcu, index); + flush_warnings(warn_to); flush_warnings(warn_from_inodes); flush_warnings(warn_from_space); -- GitLab From c12efda47df9ef2680c1fd6dbbad5c022521c31b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Feb 2024 15:32:09 +0100 Subject: [PATCH 1010/2290] quota: Fix rcu annotations of inode dquot pointers [ Upstream commit 179b8c97ebf63429589f5afeba59a181fe70603e ] Dquot pointers in i_dquot array in the inode are protected by dquot_srcu. Annotate the array pointers with __rcu, perform the locked dereferences with srcu_dereference_check() instead of plain reads, and set the array elements with rcu_assign_pointer(). Fixes: b9ba6f94b238 ("quota: remove dqptr_sem") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402061900.rTuYDlo6-lkp@intel.com/ Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/quota/dquot.c | 66 ++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 44c4da364c994..b67557647d61f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -399,7 +399,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ -static inline int mark_all_dquot_dirty(struct dquot * const *dquots) +static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots) { int ret, err, cnt; struct dquot *dquot; @@ -1006,14 +1006,15 @@ out: } EXPORT_SYMBOL(dqget); -static inline struct dquot **i_dquot(struct inode *inode) +static inline struct dquot __rcu **i_dquot(struct inode *inode) { - return inode->i_sb->s_op->get_dquots(inode); + /* Force __rcu for now until filesystems are fixed */ + return (struct dquot __rcu **)inode->i_sb->s_op->get_dquots(inode); } static int dqinit_needed(struct inode *inode, int type) { - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) @@ -1103,14 +1104,16 @@ static void remove_dquot_ref(struct super_block *sb, int type) */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { - struct dquot **dquots = i_dquot(inode); - struct dquot *dquot = dquots[type]; + struct dquot __rcu **dquots = i_dquot(inode); + struct dquot *dquot = srcu_dereference_check( + dquots[type], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif - dquots[type] = NULL; + rcu_assign_pointer(dquots[type], NULL); if (dquot) dqput(dquot); } @@ -1463,7 +1466,8 @@ static int inode_quota_active(const struct inode *inode) static int __dquot_initialize(struct inode *inode, int type) { int cnt, init_needed = 0; - struct dquot **dquots, *got[MAXQUOTAS] = {}; + struct dquot __rcu **dquots; + struct dquot *got[MAXQUOTAS] = {}; struct super_block *sb = inode->i_sb; qsize_t rsv; int ret = 0; @@ -1538,7 +1542,7 @@ static int __dquot_initialize(struct inode *inode, int type) if (!got[cnt]) continue; if (!dquots[cnt]) { - dquots[cnt] = got[cnt]; + rcu_assign_pointer(dquots[cnt], got[cnt]); got[cnt] = NULL; /* * Make quota reservation system happy if someone @@ -1546,12 +1550,16 @@ static int __dquot_initialize(struct inode *inode, int type) */ rsv = inode_get_rsv_space(inode); if (unlikely(rsv)) { + struct dquot *dquot = srcu_dereference_check( + dquots[cnt], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); + spin_lock(&inode->i_lock); /* Get reservation again under proper lock */ rsv = __inode_get_rsv_space(inode); - spin_lock(&dquots[cnt]->dq_dqb_lock); - dquots[cnt]->dq_dqb.dqb_rsvspace += rsv; - spin_unlock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); + dquot->dq_dqb.dqb_rsvspace += rsv; + spin_unlock(&dquot->dq_dqb_lock); spin_unlock(&inode->i_lock); } } @@ -1573,7 +1581,7 @@ EXPORT_SYMBOL(dquot_initialize); bool dquot_initialize_needed(struct inode *inode) { - struct dquot **dquots; + struct dquot __rcu **dquots; int i; if (!inode_quota_active(inode)) @@ -1598,13 +1606,14 @@ EXPORT_SYMBOL(dquot_initialize_needed); static void __dquot_drop(struct inode *inode) { int cnt; - struct dquot **dquots = i_dquot(inode); + struct dquot __rcu **dquots = i_dquot(inode); struct dquot *put[MAXQUOTAS]; spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - put[cnt] = dquots[cnt]; - dquots[cnt] = NULL; + put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); + rcu_assign_pointer(dquots[cnt], NULL); } spin_unlock(&dq_data_lock); dqput_all(put); @@ -1612,7 +1621,7 @@ static void __dquot_drop(struct inode *inode) void dquot_drop(struct inode *inode) { - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) @@ -1685,7 +1694,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; - struct dquot **dquots; + struct dquot __rcu **dquots; struct dquot *dquot; if (!inode_quota_active(inode)) { @@ -1755,7 +1764,7 @@ int dquot_alloc_inode(struct inode *inode) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; struct dquot *dquot; if (!inode_quota_active(inode)) @@ -1800,7 +1809,7 @@ EXPORT_SYMBOL(dquot_alloc_inode); */ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { - struct dquot **dquots; + struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; @@ -1842,7 +1851,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); */ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { - struct dquot **dquots; + struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; @@ -1886,7 +1895,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot **dquots; + struct dquot __rcu **dquots; struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; @@ -1943,7 +1952,7 @@ void dquot_free_inode(struct inode *inode) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots; + struct dquot __rcu * const *dquots; struct dquot *dquot; int index; @@ -1990,6 +1999,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) qsize_t cur_space; qsize_t rsv_space = 0; qsize_t inode_usage = 1; + struct dquot __rcu **dquots; struct dquot *transfer_from[MAXQUOTAS] = {}; int cnt, index, ret = 0; char is_valid[MAXQUOTAS] = {}; @@ -2022,6 +2032,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) } cur_space = __inode_get_bytes(inode); rsv_space = __inode_get_rsv_space(inode); + dquots = i_dquot(inode); /* * Build the transfer_from list, check limits, and update usage in * the target structures. @@ -2036,7 +2047,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) if (!sb_has_quota_active(inode->i_sb, cnt)) continue; is_valid[cnt] = 1; - transfer_from[cnt] = i_dquot(inode)[cnt]; + transfer_from[cnt] = srcu_dereference_check(dquots[cnt], + &dquot_srcu, lockdep_is_held(&dq_data_lock)); ret = dquot_add_inodes(transfer_to[cnt], inode_usage, &warn_to[cnt]); if (ret) @@ -2075,7 +2087,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) rsv_space); spin_unlock(&transfer_from[cnt]->dq_dqb_lock); } - i_dquot(inode)[cnt] = transfer_to[cnt]; + rcu_assign_pointer(dquots[cnt], transfer_to[cnt]); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); @@ -2086,8 +2098,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) * mark_all_dquot_dirty(). */ index = srcu_read_lock(&dquot_srcu); - mark_all_dquot_dirty(transfer_from); - mark_all_dquot_dirty(transfer_to); + mark_all_dquot_dirty((struct dquot __rcu **)transfer_from); + mark_all_dquot_dirty((struct dquot __rcu **)transfer_to); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn_to); -- GitLab From 4421c746023bd368e17558eb9512ccfa6e82712c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Dec 2023 15:30:01 +0100 Subject: [PATCH 1011/2290] PCI: switchtec: Fix an error handling path in switchtec_pci_probe() [ Upstream commit dec529b0b0572b32f9eb91c882dd1f08ca657efb ] The commit in Fixes changed the logic on how resources are released and introduced a new switchtec_exit_pci() that need to be called explicitly in order to undo a corresponding switchtec_init_pci(). This was done in the remove function, but not in the probe. Fix the probe now. Fixes: df25461119d9 ("PCI: switchtec: Fix stdev_release() crash after surprise hot remove") Link: https://lore.kernel.org/r/01446d2ccb91a578239915812f2b7dfbeb2882af.1703428183.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/switch/switchtec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 3f3320d0a4f8f..d05a482639e3c 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1674,7 +1674,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev, rc = switchtec_init_isr(stdev); if (rc) { dev_err(&stdev->dev, "failed to init isr.\n"); - goto err_put; + goto err_exit_pci; } iowrite32(SWITCHTEC_EVENT_CLEAR | @@ -1695,6 +1695,8 @@ static int switchtec_pci_probe(struct pci_dev *pdev, err_devadd: stdev_kill(stdev); +err_exit_pci: + switchtec_exit_pci(stdev); err_put: ida_free(&switchtec_minor_ida, MINOR(stdev->dev.devt)); put_device(&stdev->dev); -- GitLab From a71f66bd5f7b9b35a8aaa49e29565eca66299399 Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Sun, 28 Jan 2024 12:29:06 +0800 Subject: [PATCH 1012/2290] crypto: xilinx - call finalize with bh disabled [ Upstream commit a853450bf4c752e664abab0b2fad395b7ad7701c ] When calling crypto_finalize_request, BH should be disabled to avoid triggering the following calltrace: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 74 at crypto/crypto_engine.c:58 crypto_finalize_request+0xa0/0x118 Modules linked in: cryptodev(O) CPU: 2 PID: 74 Comm: firmware:zynqmp Tainted: G O 6.8.0-rc1-yocto-standard #323 Hardware name: ZynqMP ZCU102 Rev1.0 (DT) pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : crypto_finalize_request+0xa0/0x118 lr : crypto_finalize_request+0x104/0x118 sp : ffffffc085353ce0 x29: ffffffc085353ce0 x28: 0000000000000000 x27: ffffff8808ea8688 x26: ffffffc081715038 x25: 0000000000000000 x24: ffffff880100db00 x23: ffffff880100da80 x22: 0000000000000000 x21: 0000000000000000 x20: ffffff8805b14000 x19: ffffff880100da80 x18: 0000000000010450 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000003 x13: 0000000000000000 x12: ffffff880100dad0 x11: 0000000000000000 x10: ffffffc0832dcd08 x9 : ffffffc0812416d8 x8 : 00000000000001f4 x7 : ffffffc0830d2830 x6 : 0000000000000001 x5 : ffffffc082091000 x4 : ffffffc082091658 x3 : 0000000000000000 x2 : ffffffc7f9653000 x1 : 0000000000000000 x0 : ffffff8802d20000 Call trace: crypto_finalize_request+0xa0/0x118 crypto_finalize_aead_request+0x18/0x30 zynqmp_handle_aes_req+0xcc/0x388 crypto_pump_work+0x168/0x2d8 kthread_worker_fn+0xfc/0x3a0 kthread+0x118/0x138 ret_from_fork+0x10/0x20 irq event stamp: 40 hardirqs last enabled at (39): [] _raw_spin_unlock_irqrestore+0x70/0xb0 hardirqs last disabled at (40): [] el1_dbg+0x28/0x90 softirqs last enabled at (36): [] kernel_neon_begin+0x8c/0xf0 softirqs last disabled at (34): [] kernel_neon_begin+0x60/0xf0 ---[ end trace 0000000000000000 ]--- Fixes: 4d96f7d48131 ("crypto: xilinx - Add Xilinx AES driver") Signed-off-by: Quanyang Wang Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/xilinx/zynqmp-aes-gcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c index bf1f421e05f25..74bd3eb63734d 100644 --- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c +++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c @@ -231,7 +231,10 @@ static int zynqmp_handle_aes_req(struct crypto_engine *engine, err = zynqmp_aes_aead_cipher(areq); } + local_bh_disable(); crypto_finalize_aead_request(engine, areq, err); + local_bh_enable(); + return 0; } -- GitLab From dc84f8c1a3dbea83a5be0bb9f25724b903286eef Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 6 Feb 2024 08:32:28 +0000 Subject: [PATCH 1013/2290] perf thread_map: Free strlist on normal path in thread_map__new_by_tid_str() [ Upstream commit 1eb3d924e3c0b8c27388b0583a989d757866efb6 ] slist needs to be freed in both error path and normal path in thread_map__new_by_tid_str(). Fixes: b52956c961be3a04 ("perf tools: Allow multiple threads or processes in record, stat, top") Reviewed-by: Arnaldo Carvalho de Melo Signed-off-by: Yang Jihong Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240206083228.172607-6-yangjihong1@huawei.com Signed-off-by: Sasha Levin --- tools/perf/util/thread_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index c9bfe4696943b..cee7fc3b5bb0c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -279,13 +279,13 @@ struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str) threads->nr = ntasks; } out: + strlist__delete(slist); if (threads) refcount_set(&threads->refcnt, 1); return threads; out_free_threads: zfree(&threads); - strlist__delete(slist); goto out; } -- GitLab From e4892870a5f55d1a06aa1c3d87e58875f4945fdb Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Wed, 31 Jan 2024 16:47:36 -0800 Subject: [PATCH 1014/2290] drm/msm/dpu: fix the programming of INTF_CFG2_DATA_HCTL_EN [ Upstream commit 2f4a67a3894e15c135125cb54edc5b43abc1b70e ] Currently INTF_CFG2_DATA_HCTL_EN is coupled with the enablement of widebus but this is incorrect because we should be enabling this bit independent of widebus except for cases where compression is enabled in one pixel per clock mode. Fix this by making the condition checks more explicit and enabling INTF_CFG2_DATA_HCTL_EN for all other cases when supported by DPU. Fixes: 3309a7563971 ("drm/msm/dpu: revise timing engine programming to support widebus feature") Suggested-by: Dmitry Baryshkov Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/576722/ Link: https://lore.kernel.org/r/20240201004737.2478-1-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 7 +++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h | 7 +++++++ .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c | 15 +++++++++------ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h | 1 + 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 25245ef386db6..4bdde5cb23aae 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -228,6 +228,13 @@ bool dpu_encoder_is_widebus_enabled(const struct drm_encoder *drm_enc) return dpu_enc->wide_bus_en; } +bool dpu_encoder_is_dsc_enabled(const struct drm_encoder *drm_enc) +{ + const struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); + + return dpu_enc->dsc ? true : false; +} + int dpu_encoder_get_crc_values_cnt(const struct drm_encoder *drm_enc) { struct dpu_encoder_virt *dpu_enc; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h index 9e7236ef34e6d..a71efa2b9e508 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h @@ -175,6 +175,13 @@ int dpu_encoder_get_vsync_count(struct drm_encoder *drm_enc); bool dpu_encoder_is_widebus_enabled(const struct drm_encoder *drm_enc); +/** + * dpu_encoder_is_dsc_enabled - indicate whether dsc is enabled + * for the encoder. + * @drm_enc: Pointer to previously created drm encoder structure + */ +bool dpu_encoder_is_dsc_enabled(const struct drm_encoder *drm_enc); + /** * dpu_encoder_get_crc_values_cnt - get number of physical encoders contained * in virtual encoder that can collect CRC values diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 2c14646661b77..09aeec00bf5e2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -100,6 +100,7 @@ static void drm_mode_to_intf_timing_params( } timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); + timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); /* * for DP, divide the horizonal parameters by 2 when diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index 384558d2f9602..1debac4fcc3eb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -154,13 +154,8 @@ static void dpu_hw_intf_setup_timing_engine(struct dpu_hw_intf *ctx, hsync_ctl = (hsync_period << 16) | p->hsync_pulse_width; display_hctl = (hsync_end_x << 16) | hsync_start_x; - /* - * DATA_HCTL_EN controls data timing which can be different from - * video timing. It is recommended to enable it for all cases, except - * if compression is enabled in 1 pixel per clock mode - */ if (p->wide_bus_en) - intf_cfg2 |= INTF_CFG2_DATABUS_WIDEN | INTF_CFG2_DATA_HCTL_EN; + intf_cfg2 |= INTF_CFG2_DATABUS_WIDEN; data_width = p->width; @@ -230,6 +225,14 @@ static void dpu_hw_intf_setup_timing_engine(struct dpu_hw_intf *ctx, DPU_REG_WRITE(c, INTF_CONFIG, intf_cfg); DPU_REG_WRITE(c, INTF_PANEL_FORMAT, panel_format); if (ctx->cap->features & BIT(DPU_DATA_HCTL_EN)) { + /* + * DATA_HCTL_EN controls data timing which can be different from + * video timing. It is recommended to enable it for all cases, except + * if compression is enabled in 1 pixel per clock mode + */ + if (!(p->compression_en && !p->wide_bus_en)) + intf_cfg2 |= INTF_CFG2_DATA_HCTL_EN; + DPU_REG_WRITE(c, INTF_CONFIG2, intf_cfg2); DPU_REG_WRITE(c, INTF_DISPLAY_DATA_HCTL, display_data_hctl); DPU_REG_WRITE(c, INTF_ACTIVE_DATA_HCTL, active_data_hctl); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h index e75339b96a1d2..7f502c8bee1d4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h @@ -33,6 +33,7 @@ struct intf_timing_params { u32 hsync_skew; bool wide_bus_en; + bool compression_en; }; struct intf_prog_fetch { -- GitLab From c264af81ad60aa4129ad2158c0d2a6c0a252fdd4 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 4 Feb 2024 18:45:27 +0100 Subject: [PATCH 1015/2290] drm/msm/dpu: Only enable DSC_MODE_MULTIPLEX if dsc_merge is enabled [ Upstream commit 06267d22f9ee6fd34150b6dcdb2fa6983e1a85bc ] When the topology calls for two interfaces on the current fixed topology of 2 DSC blocks, or uses 1 DSC block for a single interface (e.g. SC7280 with only one DSC block), there should be no merging of DSC output. This is already represented by the return value of dpu_encoder_use_dsc_merge(), but not yet used to correctly configure this flag. Fixes: 58dca9810749 ("drm/msm/disp/dpu1: Add support for DSC in encoder") Signed-off-by: Marijn Suijten Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/577067/ Link: https://lore.kernel.org/r/20240204-dpu-dsc-multiplex-v1-1-080963233c52@somainline.org Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 4bdde5cb23aae..3632f0768aa9e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1871,7 +1871,9 @@ static void dpu_encoder_prep_dsc(struct dpu_encoder_virt *dpu_enc, dsc_common_mode = 0; pic_width = dsc->pic_width; - dsc_common_mode = DSC_MODE_MULTIPLEX | DSC_MODE_SPLIT_PANEL; + dsc_common_mode = DSC_MODE_SPLIT_PANEL; + if (dpu_encoder_use_dsc_merge(enc_master->parent)) + dsc_common_mode |= DSC_MODE_MULTIPLEX; if (enc_master->intf_mode == INTF_MODE_VIDEO) dsc_common_mode |= DSC_MODE_VIDEO; -- GitLab From 7d82efd8d3a955470a4f35376f51506dc22bd270 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 6 Feb 2024 08:48:14 -0800 Subject: [PATCH 1016/2290] drm/radeon/ni: Fix wrong firmware size logging in ni_init_microcode() [ Upstream commit c4891d979c7668b195a0a75787967ec95a24ecef ] Clean up a typo in pr_err() erroneously printing NI MC 'rdev->mc_fw->size' during SMC firmware load. Log 'rdev->smc_fw->size' instead. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 6596afd48af4 ("drm/radeon/kms: add dpm support for btc (v3)") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/ni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 927e5f42e97d0..3e48cbb522a1c 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -813,7 +813,7 @@ int ni_init_microcode(struct radeon_device *rdev) err = 0; } else if (rdev->smc_fw->size != smc_req_size) { pr_err("ni_mc: Bogus length %zu in firmware \"%s\"\n", - rdev->mc_fw->size, fw_name); + rdev->smc_fw->size, fw_name); err = -EINVAL; } } -- GitLab From ca9b82755a51832e249ea7abda34ea8f3933db56 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Nov 2022 16:11:33 +0100 Subject: [PATCH 1017/2290] clk: renesas: r8a779g0: Add CMT clocks [ Upstream commit 523ed9442b997c39220ee364b07a8773623e3a58 ] Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20221104151135.4706-2-wsa+renesas@sang-engineering.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: abb3fa662b8f ("clk: renesas: r8a779g0: Correct PFC/GPIO parent clocks") Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index d5b325e3c5398..f89cda70f2cbb 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -169,6 +169,10 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("i2c4", 522, R8A779G0_CLK_S0D6_PER), DEF_MOD("i2c5", 523, R8A779G0_CLK_S0D6_PER), DEF_MOD("wdt1:wdt0", 907, R8A779G0_CLK_R), + DEF_MOD("cmt0", 910, R8A779G0_CLK_R), + DEF_MOD("cmt1", 911, R8A779G0_CLK_R), + DEF_MOD("cmt2", 912, R8A779G0_CLK_R), + DEF_MOD("cmt3", 913, R8A779G0_CLK_R), DEF_MOD("pfc0", 915, R8A779G0_CLK_CL16M), DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), -- GitLab From bf10ef659b21510f3a2b02289a441c1a533ea3b8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 2 Feb 2023 01:03:24 +0000 Subject: [PATCH 1018/2290] clk: renesas: r8a779g0: Add Audio clocks [ Upstream commit 8dffb520ace48bcb996db049540c78261730213c ] Add module clocks for the Audio (SSI/SSIU) blocks on the Renesas R-Car V4H (R8A779G0) SoC. Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/878rhganfo.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Geert Uytterhoeven Stable-dep-of: abb3fa662b8f ("clk: renesas: r8a779g0: Correct PFC/GPIO parent clocks") Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index f89cda70f2cbb..d0e8deacdd0be 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -177,6 +177,8 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), DEF_MOD("pfc3", 918, R8A779G0_CLK_CL16M), + DEF_MOD("ssiu", 2926, R8A779G0_CLK_S0D6_PER), + DEF_MOD("ssi", 2927, R8A779G0_CLK_S0D6_PER), }; /* -- GitLab From 4f6274bafde6180d7ec83e06041f039531454f58 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 9 Feb 2023 17:03:00 +0100 Subject: [PATCH 1019/2290] clk: renesas: r8a779g0: Add thermal clock [ Upstream commit 7502a04dae0e614bc14553e31461e50499bc67aa ] Add the module clock used by the Thermal Sensor/Chip Internal Voltage Monitor/Core Voltage Monitor (THS/CIVM/CVM) on the Renesas R-Car V4H (R8A779G0) SoC. Based on a large patch in the BSP by Kazuya Mizuguchi. Signed-off-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/59461effd0d9f7a39e0c91352c87f2b7071b1891.1675958536.git.geert+renesas@glider.be Stable-dep-of: abb3fa662b8f ("clk: renesas: r8a779g0: Correct PFC/GPIO parent clocks") Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index d0e8deacdd0be..aace98c0c4735 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -177,6 +177,7 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), DEF_MOD("pfc3", 918, R8A779G0_CLK_CL16M), + DEF_MOD("tsc", 919, R8A779G0_CLK_CL16M), DEF_MOD("ssiu", 2926, R8A779G0_CLK_S0D6_PER), DEF_MOD("ssi", 2927, R8A779G0_CLK_S0D6_PER), }; -- GitLab From 128c04cc04726a168797af94f399bfb6c9ed285d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 Jan 2024 16:43:26 +0100 Subject: [PATCH 1020/2290] clk: renesas: r8a779g0: Correct PFC/GPIO parent clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit abb3fa662b8f8eaed1590b0e7a4e19eda467cdd3 ] According to the R-Car V4H Series Hardware User’s Manual Rev.1.00, the parent clock of the Pin Function (PFC/GPIO) module clocks is the CP clock. Fix this by adding the missing CP clock, and correcting the PFC parents. Fixes: f2afa78d5a0c0b0b ("dt-bindings: clock: Add r8a779g0 CPG Core Clock Definitions") Fixes: 36ff366033f0dde1 ("clk: renesas: r8a779g0: Add PFC/GPIO clocks") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/5401fccd204dc90b44f0013e7f53b9eff8df8214.1706197297.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 11 ++++++----- include/dt-bindings/clock/r8a779g0-cpg-mssr.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index aace98c0c4735..e4c616921e5ea 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -22,7 +22,7 @@ enum clk_ids { /* Core Clock Outputs exported to DT */ - LAST_DT_CORE_CLK = R8A779G0_CLK_R, + LAST_DT_CORE_CLK = R8A779G0_CLK_CP, /* External Input Clocks */ CLK_EXTAL, @@ -139,6 +139,7 @@ static const struct cpg_core_clk r8a779g0_core_clks[] __initconst = { DEF_FIXED("svd2_vip", R8A779G0_CLK_SVD2_VIP, CLK_SV_VIP, 2, 1), DEF_FIXED("cbfusa", R8A779G0_CLK_CBFUSA, CLK_EXTAL, 2, 1), DEF_FIXED("cpex", R8A779G0_CLK_CPEX, CLK_EXTAL, 2, 1), + DEF_FIXED("cp", R8A779G0_CLK_CP, CLK_EXTAL, 2, 1), DEF_FIXED("viobus", R8A779G0_CLK_VIOBUS, CLK_VIO, 1, 1), DEF_FIXED("viobusd2", R8A779G0_CLK_VIOBUSD2, CLK_VIO, 2, 1), DEF_FIXED("vcbus", R8A779G0_CLK_VCBUS, CLK_VC, 1, 1), @@ -173,10 +174,10 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("cmt1", 911, R8A779G0_CLK_R), DEF_MOD("cmt2", 912, R8A779G0_CLK_R), DEF_MOD("cmt3", 913, R8A779G0_CLK_R), - DEF_MOD("pfc0", 915, R8A779G0_CLK_CL16M), - DEF_MOD("pfc1", 916, R8A779G0_CLK_CL16M), - DEF_MOD("pfc2", 917, R8A779G0_CLK_CL16M), - DEF_MOD("pfc3", 918, R8A779G0_CLK_CL16M), + DEF_MOD("pfc0", 915, R8A779G0_CLK_CP), + DEF_MOD("pfc1", 916, R8A779G0_CLK_CP), + DEF_MOD("pfc2", 917, R8A779G0_CLK_CP), + DEF_MOD("pfc3", 918, R8A779G0_CLK_CP), DEF_MOD("tsc", 919, R8A779G0_CLK_CL16M), DEF_MOD("ssiu", 2926, R8A779G0_CLK_S0D6_PER), DEF_MOD("ssi", 2927, R8A779G0_CLK_S0D6_PER), diff --git a/include/dt-bindings/clock/r8a779g0-cpg-mssr.h b/include/dt-bindings/clock/r8a779g0-cpg-mssr.h index 754c54a6eb06a..7850cdc62e285 100644 --- a/include/dt-bindings/clock/r8a779g0-cpg-mssr.h +++ b/include/dt-bindings/clock/r8a779g0-cpg-mssr.h @@ -86,5 +86,6 @@ #define R8A779G0_CLK_CPEX 74 #define R8A779G0_CLK_CBFUSA 75 #define R8A779G0_CLK_R 76 +#define R8A779G0_CLK_CP 77 #endif /* __DT_BINDINGS_CLOCK_R8A779G0_CPG_MSSR_H__ */ -- GitLab From 5342ad8db53c93d07ff0f4ec9120b0809f4f0442 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 Jan 2024 16:45:13 +0100 Subject: [PATCH 1021/2290] clk: renesas: r8a779f0: Correct PFC/GPIO parent clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d1b32a83a02d9433dbd8c5f4d6fc44aa597755bd ] According to the R-Car S4 Series Hardware User’s Manual Rev.0.81, the parent clock of the Pin Function (PFC/GPIO) module clock is the CP clock. As this clock is not documented to exist on R-Car S4, use the CPEX clock instead. Fixes: 73421f2a48e6bd1d ("clk: renesas: r8a779f0: Add PFC clock") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/f88ec4aede0eaf0107c8bb7b28ba719ac6cd418f.1706197415.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a779f0-cpg-mssr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/renesas/r8a779f0-cpg-mssr.c b/drivers/clk/renesas/r8a779f0-cpg-mssr.c index 27b668def357f..7a49b91c93710 100644 --- a/drivers/clk/renesas/r8a779f0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779f0-cpg-mssr.c @@ -159,7 +159,7 @@ static const struct mssr_mod_clk r8a779f0_mod_clks[] __initconst = { DEF_MOD("cmt1", 911, R8A779F0_CLK_R), DEF_MOD("cmt2", 912, R8A779F0_CLK_R), DEF_MOD("cmt3", 913, R8A779F0_CLK_R), - DEF_MOD("pfc0", 915, R8A779F0_CLK_CL16M), + DEF_MOD("pfc0", 915, R8A779F0_CLK_CPEX), DEF_MOD("tsc", 919, R8A779F0_CLK_CL16M), DEF_MOD("ufs", 1514, R8A779F0_CLK_S0D4_HSC), }; -- GitLab From b14524556fe33f7580fd870c89cb23e4939381a0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Feb 2024 14:53:43 +0100 Subject: [PATCH 1022/2290] ALSA: seq: fix function cast warnings [ Upstream commit d7bf73809849463f76de42aad62c850305dd6c5d ] clang-16 points out a control flow integrity (kcfi) issue when event callbacks get converted to incompatible types: sound/core/seq/seq_midi.c:135:30: error: cast from 'int (*)(struct snd_rawmidi_substream *, const char *, int)' to 'snd_seq_dump_func_t' (aka 'int (*)(void *, void *, int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 135 | snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)dump_midi, substream); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sound/core/seq/seq_virmidi.c:83:31: error: cast from 'int (*)(struct snd_rawmidi_substream *, const unsigned char *, int)' to 'snd_seq_dump_func_t' (aka 'int (*)(void *, void *, int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 83 | snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For addressing those errors, introduce wrapper functions that are used for callbacks and bridge to the actual function call with pointer cast. The code was originally added with the initial ALSA merge in linux-2.5.4. [ the patch description shamelessly copied from Arnd's original patch -- tiwai ] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213101020.459183-1-arnd@kernel.org Link: https://lore.kernel.org/r/20240213135343.16411-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_midi.c | 8 +++++++- sound/core/seq/seq_virmidi.c | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_midi.c b/sound/core/seq/seq_midi.c index 4589aac091542..b00bbf18a6f5d 100644 --- a/sound/core/seq/seq_midi.c +++ b/sound/core/seq/seq_midi.c @@ -112,6 +112,12 @@ static int dump_midi(struct snd_rawmidi_substream *substream, const char *buf, i return 0; } +/* callback for snd_seq_dump_var_event(), bridging to dump_midi() */ +static int __dump_midi(void *ptr, void *buf, int count) +{ + return dump_midi(ptr, buf, count); +} + static int event_process_midi(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { @@ -131,7 +137,7 @@ static int event_process_midi(struct snd_seq_event *ev, int direct, pr_debug("ALSA: seq_midi: invalid sysex event flags = 0x%x\n", ev->flags); return 0; } - snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)dump_midi, substream); + snd_seq_dump_var_event(ev, __dump_midi, substream); snd_midi_event_reset_decode(msynth->parser); } else { if (msynth->parser == NULL) diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index f5cae49500c81..ffd8e7202c334 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -62,6 +62,13 @@ static void snd_virmidi_init_event(struct snd_virmidi *vmidi, /* * decode input event and put to read buffer of each opened file */ + +/* callback for snd_seq_dump_var_event(), bridging to snd_rawmidi_receive() */ +static int dump_to_rawmidi(void *ptr, void *buf, int count) +{ + return snd_rawmidi_receive(ptr, buf, count); +} + static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, struct snd_seq_event *ev, bool atomic) @@ -80,7 +87,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) continue; - snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); + snd_seq_dump_var_event(ev, dump_to_rawmidi, vmidi->substream); snd_midi_event_reset_decode(vmidi->parser); } else { len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev); -- GitLab From 511a6a88b24af64786c93bb902bfb924a1c01ef1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 9 Feb 2024 12:49:46 -0800 Subject: [PATCH 1023/2290] perf stat: Avoid metric-only segv [ Upstream commit 2543947c77e0e224bda86b4e7220c2f6714da463 ] Cycles is recognized as part of a hard coded metric in stat-shadow.c, it may call print_metric_only with a NULL fmt string leading to a segfault. Handle the NULL fmt explicitly. Fixes: 088519f318be ("perf stat: Move the display functions to stat-display.c") Signed-off-by: Ian Rogers Reviewed-by: Kan Liang Cc: K Prateek Nayak Cc: James Clark Cc: Kaige Ye Cc: John Garry Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240209204947.3873294-4-irogers@google.com Signed-off-by: Sasha Levin --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bc866d18973e4..ef9a3df459657 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -366,7 +366,7 @@ static void print_metric_only(struct perf_stat_config *config, if (color) mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; - color_snprintf(str, sizeof(str), color ?: "", fmt, val); + color_snprintf(str, sizeof(str), color ?: "", fmt ?: "", val); fprintf(out, "%*s ", mlen, str); } -- GitLab From fe636b9a6ede6ebf705b9db6b7590c8d642a48e1 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 13 Feb 2024 22:58:03 +0100 Subject: [PATCH 1024/2290] ASoC: meson: aiu: fix function pointer type mismatch [ Upstream commit 98ac85a00f31d2e9d5452b825a9ed0153d934043 ] clang-16 warns about casting functions to incompatible types, as is done here to call clk_disable_unprepare: sound/soc/meson/aiu.c:243:12: error: cast from 'void (*)(struct clk *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 243 | (void(*)(void *))clk_disable_unprepare, The pattern of getting, enabling and setting a disable callback for a clock can be replaced with devm_clk_get_enabled(), which also fixes this warning. Fixes: 6ae9ca9ce986 ("ASoC: meson: aiu: add i2s and spdif support") Reported-by: Arnd Bergmann Signed-off-by: Jerome Brunet Reviewed-by: Justin Stitt Link: https://msgid.link/r/20240213215807.3326688-2-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/aiu.c | 19 ++++--------------- sound/soc/meson/aiu.h | 1 - 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/sound/soc/meson/aiu.c b/sound/soc/meson/aiu.c index 88e611e64d14f..077b9c0b6c4ca 100644 --- a/sound/soc/meson/aiu.c +++ b/sound/soc/meson/aiu.c @@ -218,11 +218,12 @@ static const char * const aiu_spdif_ids[] = { static int aiu_clk_get(struct device *dev) { struct aiu *aiu = dev_get_drvdata(dev); + struct clk *pclk; int ret; - aiu->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(aiu->pclk)) - return dev_err_probe(dev, PTR_ERR(aiu->pclk), "Can't get the aiu pclk\n"); + pclk = devm_clk_get_enabled(dev, "pclk"); + if (IS_ERR(pclk)) + return dev_err_probe(dev, PTR_ERR(pclk), "Can't get the aiu pclk\n"); aiu->spdif_mclk = devm_clk_get(dev, "spdif_mclk"); if (IS_ERR(aiu->spdif_mclk)) @@ -239,18 +240,6 @@ static int aiu_clk_get(struct device *dev) if (ret) return dev_err_probe(dev, ret, "Can't get the spdif clocks\n"); - ret = clk_prepare_enable(aiu->pclk); - if (ret) { - dev_err(dev, "peripheral clock enable failed\n"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - aiu->pclk); - if (ret) - dev_err(dev, "failed to add reset action on pclk"); - return ret; } diff --git a/sound/soc/meson/aiu.h b/sound/soc/meson/aiu.h index 393b6c2307e49..0f94c8bf60818 100644 --- a/sound/soc/meson/aiu.h +++ b/sound/soc/meson/aiu.h @@ -33,7 +33,6 @@ struct aiu_platform_data { }; struct aiu { - struct clk *pclk; struct clk *spdif_mclk; struct aiu_interface i2s; struct aiu_interface spdif; -- GitLab From 4a00001d22d1f5722d519da41fdddef6d3afdf99 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 13 Feb 2024 22:58:04 +0100 Subject: [PATCH 1025/2290] ASoC: meson: t9015: fix function pointer type mismatch [ Upstream commit 5ad992c71b6a8e8a547954addc7af9fbde6ca10a ] clang-16 warns about casting functions to incompatible types, as is done here to call clk_disable_unprepare: sound/soc/meson/t9015.c:274:4: error: cast from 'void (*)(struct clk *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 274 | (void(*)(void *))clk_disable_unprepare, The pattern of getting, enabling and setting a disable callback for a clock can be replaced with devm_clk_get_enabled(), which also fixes this warning. Fixes: 33901f5b9b16 ("ASoC: meson: add t9015 internal DAC driver") Reported-by: Arnd Bergmann Signed-off-by: Jerome Brunet Reviewed-by: Justin Stitt Link: https://msgid.link/r/20240213215807.3326688-3-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/t9015.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/sound/soc/meson/t9015.c b/sound/soc/meson/t9015.c index 9c6b4dac68932..571f65788c592 100644 --- a/sound/soc/meson/t9015.c +++ b/sound/soc/meson/t9015.c @@ -48,7 +48,6 @@ #define POWER_CFG 0x10 struct t9015 { - struct clk *pclk; struct regulator *avdd; }; @@ -249,6 +248,7 @@ static int t9015_probe(struct platform_device *pdev) struct t9015 *priv; void __iomem *regs; struct regmap *regmap; + struct clk *pclk; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -256,26 +256,14 @@ static int t9015_probe(struct platform_device *pdev) return -ENOMEM; platform_set_drvdata(pdev, priv); - priv->pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(priv->pclk)) - return dev_err_probe(dev, PTR_ERR(priv->pclk), "failed to get core clock\n"); + pclk = devm_clk_get_enabled(dev, "pclk"); + if (IS_ERR(pclk)) + return dev_err_probe(dev, PTR_ERR(pclk), "failed to get core clock\n"); priv->avdd = devm_regulator_get(dev, "AVDD"); if (IS_ERR(priv->avdd)) return dev_err_probe(dev, PTR_ERR(priv->avdd), "failed to AVDD\n"); - ret = clk_prepare_enable(priv->pclk); - if (ret) { - dev_err(dev, "core clock enable failed\n"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - priv->pclk); - if (ret) - return ret; - ret = device_reset(dev); if (ret) { dev_err(dev, "reset failed\n"); -- GitLab From b494caad9c6b19014da6aa6e53a0239bda25faad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 13 Feb 2024 14:58:37 +0100 Subject: [PATCH 1026/2290] powerpc: Force inlining of arch_vmap_p{u/m}d_supported() [ Upstream commit c5aebb53b32460bc52680dd4e2a2f6b84d5ea521 ] arch_vmap_pud_supported() and arch_vmap_pmd_supported() are expected to constant-fold to false when RADIX is not enabled. Force inlining in order to avoid following failure which leads to unexpected call of non-existing pud_set_huge() and pmd_set_huge() on powerpc 8xx. In function 'pud_huge_tests', inlined from 'debug_vm_pgtable' at mm/debug_vm_pgtable.c:1399:2: ./arch/powerpc/include/asm/vmalloc.h:9:33: warning: inlining failed in call to 'arch_vmap_pud_supported.isra': call is unlikely and code size would grow [-Winline] 9 | #define arch_vmap_pud_supported arch_vmap_pud_supported | ^~~~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/vmalloc.h:10:20: note: in expansion of macro 'arch_vmap_pud_supported' 10 | static inline bool arch_vmap_pud_supported(pgprot_t prot) | ^~~~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/vmalloc.h:9:33: note: called from here 9 | #define arch_vmap_pud_supported arch_vmap_pud_supported mm/debug_vm_pgtable.c:458:14: note: in expansion of macro 'arch_vmap_pud_supported' 458 | if (!arch_vmap_pud_supported(args->page_prot) || | ^~~~~~~~~~~~~~~~~~~~~~~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402131836.OU1TDuoi-lkp@intel.com/ Fixes: 8309c9d71702 ("powerpc: inline huge vmap supported functions") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/bbd84ad52bf377e8d3b5865a906f2dc5d99964ba.1707832677.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/vmalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h index 4c69ece52a31e..59ed89890c902 100644 --- a/arch/powerpc/include/asm/vmalloc.h +++ b/arch/powerpc/include/asm/vmalloc.h @@ -7,14 +7,14 @@ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP #define arch_vmap_pud_supported arch_vmap_pud_supported -static inline bool arch_vmap_pud_supported(pgprot_t prot) +static __always_inline bool arch_vmap_pud_supported(pgprot_t prot) { /* HPT does not cope with large pages in the vmalloc area */ return radix_enabled(); } #define arch_vmap_pmd_supported arch_vmap_pmd_supported -static inline bool arch_vmap_pmd_supported(pgprot_t prot) +static __always_inline bool arch_vmap_pmd_supported(pgprot_t prot) { return radix_enabled(); } -- GitLab From 9a819504171d4500876d81b17237c6980e6f9334 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 20 Oct 2022 15:12:21 +0300 Subject: [PATCH 1027/2290] ASoC: SOF: Introduce container struct for SOF firmware [ Upstream commit 4f373ccf226e37a20fdc15a3df8034517a6045fd ] Move the firmware related information under a new struct (sof_firmware) and add it to the high level snd_sof_dev struct. Convert the generic code to use this new container when working with the basefw and for compatibility reasons set the old plat_data members used by the platforms. Signed-off-by: Peter Ujfalusi Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Chao Song Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20221020121238.18339-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Stable-dep-of: 98f681b0f84c ("ASoC: SOF: Add some bounds checking to firmware data") Signed-off-by: Sasha Levin --- sound/soc/sof/ipc3-loader.c | 26 ++++++++++++-------------- sound/soc/sof/ipc4-loader.c | 6 ++---- sound/soc/sof/loader.c | 18 +++++++++++++----- sound/soc/sof/sof-priv.h | 14 ++++++++++++++ 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/sound/soc/sof/ipc3-loader.c b/sound/soc/sof/ipc3-loader.c index bf423ca4e97bb..28218766d2114 100644 --- a/sound/soc/sof/ipc3-loader.c +++ b/sound/soc/sof/ipc3-loader.c @@ -138,8 +138,7 @@ static ssize_t ipc3_fw_ext_man_size(struct snd_sof_dev *sdev, const struct firmw static size_t sof_ipc3_fw_parse_ext_man(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; const struct sof_ext_man_elem_header *elem_hdr; const struct sof_ext_man_header *head; ssize_t ext_man_size; @@ -310,18 +309,18 @@ static int sof_ipc3_parse_module_memcpy(struct snd_sof_dev *sdev, static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + u32 payload_offset = sdev->basefw.payload_offset; + const struct firmware *fw = sdev->basefw.fw; struct snd_sof_fw_header *header; struct snd_sof_mod_hdr *module; int (*load_module)(struct snd_sof_dev *sof_dev, struct snd_sof_mod_hdr *hdr); size_t remaining; int ret, count; - if (!plat_data->fw) + if (!fw) return -EINVAL; - header = (struct snd_sof_fw_header *)(fw->data + plat_data->fw_offset); + header = (struct snd_sof_fw_header *)(fw->data + payload_offset); load_module = sof_ops(sdev)->load_module; if (!load_module) { dev_dbg(sdev->dev, "Using generic module loading\n"); @@ -331,9 +330,8 @@ static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) } /* parse each module */ - module = (struct snd_sof_mod_hdr *)(fw->data + plat_data->fw_offset + - sizeof(*header)); - remaining = fw->size - sizeof(*header) - plat_data->fw_offset; + module = (struct snd_sof_mod_hdr *)(fw->data + payload_offset + sizeof(*header)); + remaining = fw->size - sizeof(*header) - payload_offset; /* check for wrap */ if (remaining > fw->size) { dev_err(sdev->dev, "%s: fw size smaller than header size\n", __func__); @@ -374,19 +372,19 @@ static int sof_ipc3_load_fw_to_dsp(struct snd_sof_dev *sdev) static int sof_ipc3_validate_firmware(struct snd_sof_dev *sdev) { - struct snd_sof_pdata *plat_data = sdev->pdata; - const struct firmware *fw = plat_data->fw; + u32 payload_offset = sdev->basefw.payload_offset; + const struct firmware *fw = sdev->basefw.fw; struct snd_sof_fw_header *header; - size_t fw_size = fw->size - plat_data->fw_offset; + size_t fw_size = fw->size - payload_offset; - if (fw->size <= plat_data->fw_offset) { + if (fw->size <= payload_offset) { dev_err(sdev->dev, "firmware size must be greater than firmware offset\n"); return -EINVAL; } /* Read the header information from the data pointer */ - header = (struct snd_sof_fw_header *)(fw->data + plat_data->fw_offset); + header = (struct snd_sof_fw_header *)(fw->data + payload_offset); /* verify FW sig */ if (strncmp(header->sig, SND_SOF_FW_SIG, SND_SOF_FW_SIG_SIZE) != 0) { diff --git a/sound/soc/sof/ipc4-loader.c b/sound/soc/sof/ipc4-loader.c index e635ae515fa9f..9f433e9b4cd37 100644 --- a/sound/soc/sof/ipc4-loader.c +++ b/sound/soc/sof/ipc4-loader.c @@ -17,9 +17,8 @@ static size_t sof_ipc4_fw_parse_ext_man(struct snd_sof_dev *sdev) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; - struct snd_sof_pdata *plat_data = sdev->pdata; struct sof_man4_fw_binary_header *fw_header; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; struct sof_ext_manifest4_hdr *ext_man_hdr; struct sof_man4_module_config *fm_config; struct sof_ipc4_fw_module *fw_module; @@ -138,9 +137,8 @@ static int sof_ipc4_validate_firmware(struct snd_sof_dev *sdev) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; u32 fw_hdr_offset = ipc4_data->manifest_fw_hdr_offset; - struct snd_sof_pdata *plat_data = sdev->pdata; struct sof_man4_fw_binary_header *fw_header; - const struct firmware *fw = plat_data->fw; + const struct firmware *fw = sdev->basefw.fw; struct sof_ext_manifest4_hdr *ext_man_hdr; ext_man_hdr = (struct sof_ext_manifest4_hdr *)fw->data; diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index 5f51d936b3067..ba8e3aae0a5cb 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -22,7 +22,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) int ret; /* Don't request firmware again if firmware is already requested */ - if (plat_data->fw) + if (sdev->basefw.fw) return 0; fw_filename = kasprintf(GFP_KERNEL, "%s/%s", @@ -31,7 +31,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) if (!fw_filename) return -ENOMEM; - ret = request_firmware(&plat_data->fw, fw_filename, sdev->dev); + ret = request_firmware(&sdev->basefw.fw, fw_filename, sdev->dev); if (ret < 0) { dev_err(sdev->dev, @@ -48,7 +48,7 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) ext_man_size = sdev->ipc->ops->fw_loader->parse_ext_manifest(sdev); if (ext_man_size > 0) { /* when no error occurred, drop extended manifest */ - plat_data->fw_offset = ext_man_size; + sdev->basefw.payload_offset = ext_man_size; } else if (!ext_man_size) { /* No extended manifest, so nothing to skip during FW load */ dev_dbg(sdev->dev, "firmware doesn't contain extended manifest\n"); @@ -58,6 +58,12 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev) fw_filename, ret); } + /* + * Until the platform code is switched to use the new container the fw + * and payload offset must be set in plat_data + */ + plat_data->fw = sdev->basefw.fw; + plat_data->fw_offset = sdev->basefw.payload_offset; err: kfree(fw_filename); @@ -100,7 +106,8 @@ int snd_sof_load_firmware_memcpy(struct snd_sof_dev *sdev) return 0; error: - release_firmware(plat_data->fw); + release_firmware(sdev->basefw.fw); + sdev->basefw.fw = NULL; plat_data->fw = NULL; return ret; @@ -185,7 +192,8 @@ EXPORT_SYMBOL(snd_sof_run_firmware); void snd_sof_fw_unload(struct snd_sof_dev *sdev) { /* TODO: support module unloading at runtime */ - release_firmware(sdev->pdata->fw); + release_firmware(sdev->basefw.fw); + sdev->basefw.fw = NULL; sdev->pdata->fw = NULL; } EXPORT_SYMBOL(snd_sof_fw_unload); diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h index de08825915b35..3d70b57e4864d 100644 --- a/sound/soc/sof/sof-priv.h +++ b/sound/soc/sof/sof-priv.h @@ -136,6 +136,17 @@ struct snd_sof_platform_stream_params { bool cont_update_posn; }; +/** + * struct sof_firmware - Container struct for SOF firmware + * @fw: Pointer to the firmware + * @payload_offset: Offset of the data within the loaded firmware image to be + * loaded to the DSP (skipping for example ext_manifest section) + */ +struct sof_firmware { + const struct firmware *fw; + u32 payload_offset; +}; + /* * SOF DSP HW abstraction operations. * Used to abstract DSP HW architecture and any IO busses between host CPU @@ -487,6 +498,9 @@ struct snd_sof_dev { spinlock_t ipc_lock; /* lock for IPC users */ spinlock_t hw_lock; /* lock for HW IO access */ + /* Main, Base firmware image */ + struct sof_firmware basefw; + /* * ASoC components. plat_drv fields are set dynamically so * can't use const -- GitLab From d133d67e7e724102d1e53009c4f88afaaf3e167c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Feb 2024 16:02:16 +0300 Subject: [PATCH 1028/2290] ASoC: SOF: Add some bounds checking to firmware data [ Upstream commit 98f681b0f84cfc3a1d83287b77697679e0398306 ] Smatch complains about "head->full_size - head->header_size" can underflow. To some extent, we're always going to have to trust the firmware a bit. However, it's easy enough to add a check for negatives, and let's add a upper bounds check as well. Fixes: d2458baa799f ("ASoC: SOF: ipc3-loader: Implement firmware parsing and loading") Signed-off-by: Dan Carpenter Link: https://msgid.link/r/5593d147-058c-4de3-a6f5-540ecb96f6f8@moroto.mountain Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/ipc3-loader.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/ipc3-loader.c b/sound/soc/sof/ipc3-loader.c index 28218766d2114..6e3ef06721106 100644 --- a/sound/soc/sof/ipc3-loader.c +++ b/sound/soc/sof/ipc3-loader.c @@ -148,6 +148,8 @@ static size_t sof_ipc3_fw_parse_ext_man(struct snd_sof_dev *sdev) head = (struct sof_ext_man_header *)fw->data; remaining = head->full_size - head->header_size; + if (remaining < 0 || remaining > sdev->basefw.fw->size) + return -EINVAL; ext_man_size = ipc3_fw_ext_man_size(sdev, fw); /* Assert firmware starts with extended manifest */ -- GitLab From 298f7f137021e2d38affc5d163fa64001c199760 Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Wed, 9 Nov 2022 17:28:52 +0800 Subject: [PATCH 1029/2290] NTB: EPF: fix possible memory leak in pci_vntb_probe() [ Upstream commit 956578e3d397e00d6254dc7b5194d28587f98518 ] As ntb_register_device() don't handle error of device_register(), if ntb_register_device() returns error in pci_vntb_probe(), name of kobject which is allocated in dev_set_name() called in device_add() is leaked. As comment of device_add() says, it should call put_device() to drop the reference count that was set in device_initialize() when it fails, so the name can be freed in kobject_cleanup(). Signed-off-by: ruanjinjie Signed-off-by: Jon Mason Stable-dep-of: aebfdfe39b93 ("NTB: fix possible name leak in ntb_register_device()") Signed-off-by: Sasha Levin --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 8c6931210ac4d..cd985a41c8d65 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -1288,6 +1288,7 @@ static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_register_dev: + put_device(&ndev->ntb.dev); return -EINVAL; } -- GitLab From 6632a54ac8057cc0b0d789c6f73883e871bcd25c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 1 Dec 2023 11:30:56 +0800 Subject: [PATCH 1030/2290] NTB: fix possible name leak in ntb_register_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aebfdfe39b9327a3077d0df8db3beb3160c9bdd0 ] If device_register() fails in ntb_register_device(), the device name allocated by dev_set_name() should be freed. As per the comment in device_register(), callers should use put_device() to give up the reference in the error path. So fix this by calling put_device() in the error path so that the name can be freed in kobject_cleanup(). As a result of this, put_device() in the error path of ntb_register_device() is removed and the actual error is returned. Fixes: a1bd3baeb2f1 ("NTB: Add NTB hardware abstraction layer") Signed-off-by: Yang Yingliang Reviewed-by: Ilpo Järvinen Reviewed-by: Manivannan Sadhasivam Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20231201033057.1399131-1-yangyingliang@huaweicloud.com [mani: reworded commit message] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- drivers/ntb/core.c | 8 +++++++- drivers/pci/endpoint/functions/pci-epf-vntb.c | 6 +----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/ntb/core.c b/drivers/ntb/core.c index 27dd93deff6e5..d702bee780826 100644 --- a/drivers/ntb/core.c +++ b/drivers/ntb/core.c @@ -100,6 +100,8 @@ EXPORT_SYMBOL(ntb_unregister_client); int ntb_register_device(struct ntb_dev *ntb) { + int ret; + if (!ntb) return -EINVAL; if (!ntb->pdev) @@ -120,7 +122,11 @@ int ntb_register_device(struct ntb_dev *ntb) ntb->ctx_ops = NULL; spin_lock_init(&ntb->ctx_lock); - return device_register(&ntb->dev); + ret = device_register(&ntb->dev); + if (ret) + put_device(&ntb->dev); + + return ret; } EXPORT_SYMBOL(ntb_register_device); diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index cd985a41c8d65..b4c1a4f6029d4 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -1281,15 +1281,11 @@ static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = ntb_register_device(&ndev->ntb); if (ret) { dev_err(dev, "Failed to register NTB device\n"); - goto err_register_dev; + return ret; } dev_dbg(dev, "PCI Virtual NTB driver loaded\n"); return 0; - -err_register_dev: - put_device(&ndev->ntb.dev); - return -EINVAL; } static struct pci_device_id pci_vntb_table[] = { -- GitLab From 8006813ebdf123583f35db41abcfacfc0c671844 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 19 Oct 2022 19:45:50 +0200 Subject: [PATCH 1031/2290] media: cedrus: h265: Associate mv col buffers with buffer [ Upstream commit 0ee952c2f484ee0059f7ce4951aaa3cb0eda96dd ] Currently mv col aux buffers are allocated as a pool. This is not optimal because pool size equals number of buffers before stream is started. Buffers can easily be allocated afterwards. In such cases, invalid pointer is assigned to the decoding frame and Cedrus might overwrite memory location which is allocated to different task. Solve this issue with allocating mv col buffer once capture buffer is actually used. Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Stable-dep-of: 3a11887f7f11 ("media: cedrus: h265: Fix configuring bitstream size") Signed-off-by: Sasha Levin --- drivers/staging/media/sunxi/cedrus/cedrus.h | 9 +-- .../staging/media/sunxi/cedrus/cedrus_h265.c | 63 ++++++++++--------- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h index 93a2196006f73..cb99610f3e128 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus.h +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h @@ -109,6 +109,11 @@ struct cedrus_buffer { unsigned int position; enum cedrus_h264_pic_type pic_type; } h264; + struct { + void *mv_col_buf; + dma_addr_t mv_col_buf_dma; + ssize_t mv_col_buf_size; + } h265; } codec; }; @@ -142,10 +147,6 @@ struct cedrus_ctx { ssize_t intra_pred_buf_size; } h264; struct { - void *mv_col_buf; - dma_addr_t mv_col_buf_addr; - ssize_t mv_col_buf_size; - ssize_t mv_col_buf_unit_size; void *neighbor_info_buf; dma_addr_t neighbor_info_buf_addr; void *entry_points_buf; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index 625f77a8c5bde..7a438cd22c341 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -90,12 +90,13 @@ static void cedrus_h265_sram_write_data(struct cedrus_dev *dev, void *data, } static inline dma_addr_t -cedrus_h265_frame_info_mv_col_buf_addr(struct cedrus_ctx *ctx, - unsigned int index, unsigned int field) +cedrus_h265_frame_info_mv_col_buf_addr(struct vb2_buffer *buf, + unsigned int field) { - return ctx->codec.h265.mv_col_buf_addr + index * - ctx->codec.h265.mv_col_buf_unit_size + - field * ctx->codec.h265.mv_col_buf_unit_size / 2; + struct cedrus_buffer *cedrus_buf = vb2_to_cedrus_buffer(buf); + + return cedrus_buf->codec.h265.mv_col_buf_dma + + field * cedrus_buf->codec.h265.mv_col_buf_size / 2; } static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, @@ -108,9 +109,8 @@ static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, dma_addr_t dst_luma_addr = cedrus_dst_buf_addr(ctx, buf, 0); dma_addr_t dst_chroma_addr = cedrus_dst_buf_addr(ctx, buf, 1); dma_addr_t mv_col_buf_addr[2] = { - cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, 0), - cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, - field_pic ? 1 : 0) + cedrus_h265_frame_info_mv_col_buf_addr(buf, 0), + cedrus_h265_frame_info_mv_col_buf_addr(buf, field_pic ? 1 : 0) }; u32 offset = VE_DEC_H265_SRAM_OFFSET_FRAME_INFO + VE_DEC_H265_SRAM_OFFSET_FRAME_INFO_UNIT * index; @@ -412,6 +412,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) unsigned int width_in_ctb_luma, ctb_size_luma; unsigned int log2_max_luma_coding_block_size; unsigned int ctb_addr_x, ctb_addr_y; + struct cedrus_buffer *cedrus_buf; dma_addr_t src_buf_addr; dma_addr_t src_buf_end_addr; u32 chroma_log2_weight_denom; @@ -428,6 +429,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) decode_params = run->h265.decode_params; pred_weight_table = &slice_params->pred_weight_table; num_entry_point_offsets = slice_params->num_entry_point_offsets; + cedrus_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); /* * If entry points offsets are present, we should get them @@ -445,31 +447,25 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) DIV_ROUND_UP(sps->pic_width_in_luma_samples, ctb_size_luma); /* MV column buffer size and allocation. */ - if (!ctx->codec.h265.mv_col_buf_size) { - unsigned int num_buffers = - run->dst->vb2_buf.vb2_queue->num_buffers; - + if (!cedrus_buf->codec.h265.mv_col_buf_size) { /* * Each CTB requires a MV col buffer with a specific unit size. * Since the address is given with missing lsb bits, 1 KiB is * added to each buffer to ensure proper alignment. */ - ctx->codec.h265.mv_col_buf_unit_size = + cedrus_buf->codec.h265.mv_col_buf_size = DIV_ROUND_UP(ctx->src_fmt.width, ctb_size_luma) * DIV_ROUND_UP(ctx->src_fmt.height, ctb_size_luma) * CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE + SZ_1K; - ctx->codec.h265.mv_col_buf_size = num_buffers * - ctx->codec.h265.mv_col_buf_unit_size; - /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ - ctx->codec.h265.mv_col_buf = + cedrus_buf->codec.h265.mv_col_buf = dma_alloc_attrs(dev->dev, - ctx->codec.h265.mv_col_buf_size, - &ctx->codec.h265.mv_col_buf_addr, + cedrus_buf->codec.h265.mv_col_buf_size, + &cedrus_buf->codec.h265.mv_col_buf_dma, GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); - if (!ctx->codec.h265.mv_col_buf) { - ctx->codec.h265.mv_col_buf_size = 0; + if (!cedrus_buf->codec.h265.mv_col_buf) { + cedrus_buf->codec.h265.mv_col_buf_size = 0; return -ENOMEM; } } @@ -816,9 +812,6 @@ static int cedrus_h265_start(struct cedrus_ctx *ctx) { struct cedrus_dev *dev = ctx->dev; - /* The buffer size is calculated at setup time. */ - ctx->codec.h265.mv_col_buf_size = 0; - /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ ctx->codec.h265.neighbor_info_buf = dma_alloc_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, @@ -845,14 +838,24 @@ static int cedrus_h265_start(struct cedrus_ctx *ctx) static void cedrus_h265_stop(struct cedrus_ctx *ctx) { struct cedrus_dev *dev = ctx->dev; + struct cedrus_buffer *buf; + struct vb2_queue *vq; + unsigned int i; - if (ctx->codec.h265.mv_col_buf_size > 0) { - dma_free_attrs(dev->dev, ctx->codec.h265.mv_col_buf_size, - ctx->codec.h265.mv_col_buf, - ctx->codec.h265.mv_col_buf_addr, - DMA_ATTR_NO_KERNEL_MAPPING); + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + for (i = 0; i < vq->num_buffers; i++) { + buf = vb2_to_cedrus_buffer(vb2_get_buffer(vq, i)); - ctx->codec.h265.mv_col_buf_size = 0; + if (buf->codec.h265.mv_col_buf_size > 0) { + dma_free_attrs(dev->dev, + buf->codec.h265.mv_col_buf_size, + buf->codec.h265.mv_col_buf, + buf->codec.h265.mv_col_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + + buf->codec.h265.mv_col_buf_size = 0; + } } dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, -- GitLab From acdf24f3d5232a86b81f7a14bd2998a78cb84843 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 16 Dec 2023 14:09:25 +0100 Subject: [PATCH 1032/2290] media: cedrus: h265: Fix configuring bitstream size [ Upstream commit 3a11887f7f11a6bb1f05e7f67b3ea20dadfec443 ] bit_size field holds size of slice, not slice + header. Because of HW quirks, driver can't program in just slice, but also preceding header. But that means that currently used bit_size is wrong (too small). Instead, just use size of whole buffer. There is no harm in doing this. Fixes: 86caab29da78 ("media: cedrus: Add HEVC/H.265 decoding support") Suggested-by: Paul Kocialkowski Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/staging/media/sunxi/cedrus/cedrus_h265.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index 7a438cd22c341..9f13c942a806b 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -414,11 +414,11 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) unsigned int ctb_addr_x, ctb_addr_y; struct cedrus_buffer *cedrus_buf; dma_addr_t src_buf_addr; - dma_addr_t src_buf_end_addr; u32 chroma_log2_weight_denom; u32 num_entry_point_offsets; u32 output_pic_list_index; u32 pic_order_cnt[2]; + size_t slice_bytes; u8 padding; int count; u32 reg; @@ -430,6 +430,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) pred_weight_table = &slice_params->pred_weight_table; num_entry_point_offsets = slice_params->num_entry_point_offsets; cedrus_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); + slice_bytes = vb2_get_plane_payload(&run->src->vb2_buf, 0); /* * If entry points offsets are present, we should get them @@ -477,7 +478,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) cedrus_write(dev, VE_DEC_H265_BITS_OFFSET, 0); - reg = slice_params->bit_size; + reg = slice_bytes * 8; cedrus_write(dev, VE_DEC_H265_BITS_LEN, reg); /* Source beginning and end addresses. */ @@ -491,10 +492,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) cedrus_write(dev, VE_DEC_H265_BITS_ADDR, reg); - src_buf_end_addr = src_buf_addr + - DIV_ROUND_UP(slice_params->bit_size, 8); - - reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_end_addr); + reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_addr + slice_bytes); cedrus_write(dev, VE_DEC_H265_BITS_END_ADDR, reg); /* Coding tree block address */ -- GitLab From 0dfdf4c1ac4cfbbbeb5be2cd8b2148f7ff8e3817 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 16 Dec 2023 14:34:20 +0100 Subject: [PATCH 1033/2290] media: sun8i-di: Fix coefficient writes [ Upstream commit 794b581f8c6eb7b60fe468ccb96dd3cd38ff779f ] Currently coefficients are applied only once, since they don't change. However, this is done before enable bit is set and thus it doesn't get applied properly. Fix that by applying coefficients after enable bit is set. While this means that it will be done evey time, it doesn't bring much time penalty. Fixes: a4260ea49547 ("media: sun4i: Add H3 deinterlace driver") Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../media/platform/sunxi/sun8i-di/sun8i-di.c | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c index aa65d70b6270a..3e58de58cd89d 100644 --- a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c +++ b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c @@ -66,6 +66,7 @@ static void deinterlace_device_run(void *priv) struct vb2_v4l2_buffer *src, *dst; unsigned int hstep, vstep; dma_addr_t addr; + int i; src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); @@ -160,6 +161,26 @@ static void deinterlace_device_run(void *priv) deinterlace_write(dev, DEINTERLACE_CH1_HORZ_FACT, hstep); deinterlace_write(dev, DEINTERLACE_CH1_VERT_FACT, vstep); + /* neutral filter coefficients */ + deinterlace_set_bits(dev, DEINTERLACE_FRM_CTRL, + DEINTERLACE_FRM_CTRL_COEF_ACCESS); + readl_poll_timeout(dev->base + DEINTERLACE_STATUS, val, + val & DEINTERLACE_STATUS_COEF_STATUS, 2, 40); + + for (i = 0; i < 32; i++) { + deinterlace_write(dev, DEINTERLACE_CH0_HORZ_COEF0 + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH0_VERT_COEF + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH1_HORZ_COEF0 + i * 4, + DEINTERLACE_IDENTITY_COEF); + deinterlace_write(dev, DEINTERLACE_CH1_VERT_COEF + i * 4, + DEINTERLACE_IDENTITY_COEF); + } + + deinterlace_clr_set_bits(dev, DEINTERLACE_FRM_CTRL, + DEINTERLACE_FRM_CTRL_COEF_ACCESS, 0); + deinterlace_clr_set_bits(dev, DEINTERLACE_FIELD_CTRL, DEINTERLACE_FIELD_CTRL_FIELD_CNT_MSK, DEINTERLACE_FIELD_CTRL_FIELD_CNT(ctx->field)); @@ -248,7 +269,6 @@ static irqreturn_t deinterlace_irq(int irq, void *data) static void deinterlace_init(struct deinterlace_dev *dev) { u32 val; - int i; deinterlace_write(dev, DEINTERLACE_BYPASS, DEINTERLACE_BYPASS_CSC); @@ -285,26 +305,6 @@ static void deinterlace_init(struct deinterlace_dev *dev) deinterlace_clr_set_bits(dev, DEINTERLACE_CHROMA_DIFF, DEINTERLACE_CHROMA_DIFF_TH_MSK, DEINTERLACE_CHROMA_DIFF_TH(5)); - - /* neutral filter coefficients */ - deinterlace_set_bits(dev, DEINTERLACE_FRM_CTRL, - DEINTERLACE_FRM_CTRL_COEF_ACCESS); - readl_poll_timeout(dev->base + DEINTERLACE_STATUS, val, - val & DEINTERLACE_STATUS_COEF_STATUS, 2, 40); - - for (i = 0; i < 32; i++) { - deinterlace_write(dev, DEINTERLACE_CH0_HORZ_COEF0 + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH0_VERT_COEF + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH1_HORZ_COEF0 + i * 4, - DEINTERLACE_IDENTITY_COEF); - deinterlace_write(dev, DEINTERLACE_CH1_VERT_COEF + i * 4, - DEINTERLACE_IDENTITY_COEF); - } - - deinterlace_clr_set_bits(dev, DEINTERLACE_FRM_CTRL, - DEINTERLACE_FRM_CTRL_COEF_ACCESS, 0); } static inline struct deinterlace_ctx *deinterlace_file2ctx(struct file *file) -- GitLab From f4258e5882e299a20ef723007d5cb9ef29a41e3a Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 16 Dec 2023 14:34:21 +0100 Subject: [PATCH 1034/2290] media: sun8i-di: Fix power on/off sequences [ Upstream commit cff104e33bad38f4b2c8d58816a7accfaa2879f9 ] According to user manual, reset line should be deasserted before clocks are enabled. Also fix power down sequence to be reverse of that. Fixes: a4260ea49547 ("media: sun4i: Add H3 deinterlace driver") Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../media/platform/sunxi/sun8i-di/sun8i-di.c | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c index 3e58de58cd89d..01b44dd708bd3 100644 --- a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c +++ b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c @@ -931,11 +931,18 @@ static int deinterlace_runtime_resume(struct device *device) return ret; } + ret = reset_control_deassert(dev->rstc); + if (ret) { + dev_err(dev->dev, "Failed to apply reset\n"); + + goto err_exclusive_rate; + } + ret = clk_prepare_enable(dev->bus_clk); if (ret) { dev_err(dev->dev, "Failed to enable bus clock\n"); - goto err_exclusive_rate; + goto err_rst; } ret = clk_prepare_enable(dev->mod_clk); @@ -952,23 +959,16 @@ static int deinterlace_runtime_resume(struct device *device) goto err_mod_clk; } - ret = reset_control_deassert(dev->rstc); - if (ret) { - dev_err(dev->dev, "Failed to apply reset\n"); - - goto err_ram_clk; - } - deinterlace_init(dev); return 0; -err_ram_clk: - clk_disable_unprepare(dev->ram_clk); err_mod_clk: clk_disable_unprepare(dev->mod_clk); err_bus_clk: clk_disable_unprepare(dev->bus_clk); +err_rst: + reset_control_assert(dev->rstc); err_exclusive_rate: clk_rate_exclusive_put(dev->mod_clk); @@ -979,11 +979,12 @@ static int deinterlace_runtime_suspend(struct device *device) { struct deinterlace_dev *dev = dev_get_drvdata(device); - reset_control_assert(dev->rstc); - clk_disable_unprepare(dev->ram_clk); clk_disable_unprepare(dev->mod_clk); clk_disable_unprepare(dev->bus_clk); + + reset_control_assert(dev->rstc); + clk_rate_exclusive_put(dev->mod_clk); return 0; -- GitLab From 77301ad7bebf00f60d01decc12c51d27321acbb8 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 16 Dec 2023 14:34:22 +0100 Subject: [PATCH 1035/2290] media: sun8i-di: Fix chroma difference threshold [ Upstream commit 856525e8db272b0ce6d9c6e6c2eeb97892b485a6 ] While there is no good explanation what this value does, vendor driver uses value 31 for it. Align driver with it. Fixes: a4260ea49547 ("media: sun4i: Add H3 deinterlace driver") Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/platform/sunxi/sun8i-di/sun8i-di.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c index 01b44dd708bd3..7a2f558c981db 100644 --- a/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c +++ b/drivers/media/platform/sunxi/sun8i-di/sun8i-di.c @@ -304,7 +304,7 @@ static void deinterlace_init(struct deinterlace_dev *dev) deinterlace_clr_set_bits(dev, DEINTERLACE_CHROMA_DIFF, DEINTERLACE_CHROMA_DIFF_TH_MSK, - DEINTERLACE_CHROMA_DIFF_TH(5)); + DEINTERLACE_CHROMA_DIFF_TH(31)); } static inline struct deinterlace_ctx *deinterlace_file2ctx(struct file *file) -- GitLab From 8df9a3c7044b847e9c4dc7e683fd64c6b873f328 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 31 Jan 2024 13:00:33 +0100 Subject: [PATCH 1036/2290] media: imx: csc/scaler: fix v4l2_ctrl_handler memory leak [ Upstream commit 4797a3dd46f220e6d83daf54d70c5b33db6deb01 ] Free the memory allocated in v4l2_ctrl_handler_init on release. Fixes: a8ef0488cc59 ("media: imx: add csc/scaler mem2mem device") Signed-off-by: Lucas Stach Reviewed-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/staging/media/imx/imx-media-csc-scaler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/media/imx/imx-media-csc-scaler.c b/drivers/staging/media/imx/imx-media-csc-scaler.c index 1fd39a2fca98a..95cca281e8a37 100644 --- a/drivers/staging/media/imx/imx-media-csc-scaler.c +++ b/drivers/staging/media/imx/imx-media-csc-scaler.c @@ -803,6 +803,7 @@ static int ipu_csc_scaler_release(struct file *file) dev_dbg(priv->dev, "Releasing instance %p\n", ctx); + v4l2_ctrl_handler_free(&ctx->ctrl_hdlr); v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); -- GitLab From 8e6e28e650bac0eec2f18f022dd830a4b76daf53 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Sun, 11 Feb 2024 07:07:05 -0800 Subject: [PATCH 1037/2290] media: go7007: add check of return value of go7007_read_addr() [ Upstream commit 0b70530ee740861f4776ff724fcc25023df1799a ] If go7007_read_addr() returns error channel is not assigned a value. In this case go to allocfail. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 866b8695d67e ("Staging: add the go7007 video driver") Signed-off-by: Daniil Dulov Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/go7007/go7007-usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/go7007/go7007-usb.c b/drivers/media/usb/go7007/go7007-usb.c index eeb85981e02b6..762c13e49bfa5 100644 --- a/drivers/media/usb/go7007/go7007-usb.c +++ b/drivers/media/usb/go7007/go7007-usb.c @@ -1201,7 +1201,9 @@ static int go7007_usb_probe(struct usb_interface *intf, u16 channel; /* read channel number from GPIO[1:0] */ - go7007_read_addr(go, 0x3c81, &channel); + if (go7007_read_addr(go, 0x3c81, &channel)) + goto allocfail; + channel &= 0x3; go->board_id = GO7007_BOARDID_ADLINK_MPG24; usb->board = board = &board_adlink_mpg24; -- GitLab From 7dee677d17c9a38d5ae4853e19ee79eb9ed8a997 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Sun, 11 Feb 2024 07:07:25 -0800 Subject: [PATCH 1038/2290] media: pvrusb2: remove redundant NULL check [ Upstream commit 95ac1210fb2753f968ebce0730d4fbc553c2a3dc ] Pointer dip->stream cannot be NULL due to a shift, thus remove redundant NULL check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c74e0062684b ("V4L/DVB (5059): Pvrusb2: Be smarter about mode restoration") Signed-off-by: Daniil Dulov Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/pvrusb2/pvrusb2-v4l2.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c index c04ab7258d645..d195bd6a2248b 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c @@ -1198,11 +1198,6 @@ static void pvr2_v4l2_dev_init(struct pvr2_v4l2_dev *dip, dip->minor_type = pvr2_v4l_type_video; nr_ptr = video_nr; caps |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_AUDIO; - if (!dip->stream) { - pr_err(KBUILD_MODNAME - ": Failed to set up pvrusb2 v4l video dev due to missing stream instance\n"); - return; - } break; case VFL_TYPE_VBI: dip->config = pvr2_config_vbi; -- GitLab From 6e93e0f8832223769e61ed4accd8247da5ed839e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:04:27 +0100 Subject: [PATCH 1039/2290] media: pvrusb2: fix pvr2_stream_callback casts [ Upstream commit 30baa4a96b23add91a87305baaeba82c4e109e1f ] clang-16 complains about a control flow integrity (KCFI) issue in pvrusb2, which casts three different prototypes into pvr2_stream_callback: drivers/media/usb/pvrusb2/pvrusb2-v4l2.c:1070:30: error: cast from 'void (*)(struct pvr2_v4l2_fh *)' to 'pvr2_stream_callback' (aka 'void (*)(void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1070 | pvr2_stream_set_callback(sp,(pvr2_stream_callback)pvr2_v4l2_notify,fh); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/media/usb/pvrusb2/pvrusb2-context.c:110:6: error: cast from 'void (*)(struct pvr2_context *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 110 | (void (*)(void *))pvr2_context_notify, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/media/usb/pvrusb2/pvrusb2-dvb.c:152:6: error: cast from 'void (*)(struct pvr2_dvb_adapter *)' to 'pvr2_stream_callback' (aka 'void (*)(void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 152 | (pvr2_stream_callback) pvr2_dvb_notify, adap); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Change the functions to actually take a void* argument so the cast is no longer needed. Fixes: bb8ce9d9143c ("V4L/DVB (7682): pvrusb2-dvb: finish up stream & buffer handling") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/pvrusb2/pvrusb2-context.c | 8 ++++---- drivers/media/usb/pvrusb2/pvrusb2-dvb.c | 6 ++++-- drivers/media/usb/pvrusb2/pvrusb2-v4l2.c | 6 ++++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-context.c b/drivers/media/usb/pvrusb2/pvrusb2-context.c index 1764674de98bc..58f2f3ff10ee2 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-context.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-context.c @@ -90,8 +90,10 @@ static void pvr2_context_destroy(struct pvr2_context *mp) } -static void pvr2_context_notify(struct pvr2_context *mp) +static void pvr2_context_notify(void *ptr) { + struct pvr2_context *mp = ptr; + pvr2_context_set_notify(mp,!0); } @@ -106,9 +108,7 @@ static void pvr2_context_check(struct pvr2_context *mp) pvr2_trace(PVR2_TRACE_CTXT, "pvr2_context %p (initialize)", mp); /* Finish hardware initialization */ - if (pvr2_hdw_initialize(mp->hdw, - (void (*)(void *))pvr2_context_notify, - mp)) { + if (pvr2_hdw_initialize(mp->hdw, pvr2_context_notify, mp)) { mp->video_stream.stream = pvr2_hdw_get_video_stream(mp->hdw); /* Trigger interface initialization. By doing this diff --git a/drivers/media/usb/pvrusb2/pvrusb2-dvb.c b/drivers/media/usb/pvrusb2/pvrusb2-dvb.c index 26811efe0fb58..9a9bae21c6147 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-dvb.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-dvb.c @@ -88,8 +88,10 @@ static int pvr2_dvb_feed_thread(void *data) return stat; } -static void pvr2_dvb_notify(struct pvr2_dvb_adapter *adap) +static void pvr2_dvb_notify(void *ptr) { + struct pvr2_dvb_adapter *adap = ptr; + wake_up(&adap->buffer_wait_data); } @@ -149,7 +151,7 @@ static int pvr2_dvb_stream_do_start(struct pvr2_dvb_adapter *adap) } pvr2_stream_set_callback(pvr->video_stream.stream, - (pvr2_stream_callback) pvr2_dvb_notify, adap); + pvr2_dvb_notify, adap); ret = pvr2_stream_set_buffer_count(stream, PVR2_DVB_BUFFER_COUNT); if (ret < 0) return ret; diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c index d195bd6a2248b..d608b793fa847 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c @@ -1033,8 +1033,10 @@ static int pvr2_v4l2_open(struct file *file) } -static void pvr2_v4l2_notify(struct pvr2_v4l2_fh *fhp) +static void pvr2_v4l2_notify(void *ptr) { + struct pvr2_v4l2_fh *fhp = ptr; + wake_up(&fhp->wait_data); } @@ -1067,7 +1069,7 @@ static int pvr2_v4l2_iosetup(struct pvr2_v4l2_fh *fh) hdw = fh->channel.mc_head->hdw; sp = fh->pdi->stream->stream; - pvr2_stream_set_callback(sp,(pvr2_stream_callback)pvr2_v4l2_notify,fh); + pvr2_stream_set_callback(sp, pvr2_v4l2_notify, fh); pvr2_hdw_set_stream_type(hdw,fh->pdi->config); if ((ret = pvr2_hdw_set_streaming(hdw,!0)) < 0) return ret; return pvr2_ioread_set_enabled(fh->rhp,!0); -- GitLab From 2101966b774499da993984a4b07c7c6c6ea45fdd Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 3 Jan 2024 21:20:18 +0100 Subject: [PATCH 1040/2290] clk: qcom: dispcc-sdm845: Adjust internal GDSC wait times [ Upstream commit 117e7dc697c2739d754db8fe0c1e2d4f1f5d5f82 ] SDM845 downstream uses non-default values for GDSC internal waits. Program them accordingly to avoid surprises. Fixes: 81351776c9fb ("clk: qcom: Add display clock controller driver for SDM845") Signed-off-by: Konrad Dybcio Tested-by: Caleb Connolly # OnePlus 6 Link: https://lore.kernel.org/r/20240103-topic-845gdsc-v1-1-368efbe1a61d@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/dispcc-sdm845.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/dispcc-sdm845.c b/drivers/clk/qcom/dispcc-sdm845.c index 735adfefc3798..e792e0b130d33 100644 --- a/drivers/clk/qcom/dispcc-sdm845.c +++ b/drivers/clk/qcom/dispcc-sdm845.c @@ -759,6 +759,8 @@ static struct clk_branch disp_cc_mdss_vsync_clk = { static struct gdsc mdss_gdsc = { .gdscr = 0x3000, + .en_few_wait_val = 0x6, + .en_rest_wait_val = 0x5, .pd = { .name = "mdss_gdsc", }, -- GitLab From 7a08ebcfd7213e948f77fc3f7245416ddb3132d4 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 15 Feb 2024 09:53:09 +0100 Subject: [PATCH 1041/2290] drm/mediatek: dsi: Fix DSI RGB666 formats and definitions [ Upstream commit fae6f815505301b92d9113764f4d76d0bfe45607 ] The register bits definitions for RGB666 formats are wrong in multiple ways: first, in the DSI_PS_SEL bits region, the Packed 18-bits RGB666 format is selected with bit 1, while the Loosely Packed one is bit 2, and second - the definition name "LOOSELY_PS_18BIT_RGB666" is wrong because the loosely packed format is 24 bits instead! Either way, functions mtk_dsi_ps_control_vact() and mtk_dsi_ps_control() do not even agree on the DSI_PS_SEL bit to set in DSI_PSCTRL: one sets loosely packed (24) on RGB666, the other sets packed (18), and the other way around for RGB666_PACKED. Fixing this entire stack of issues is done in one go: - Use the correct bit for the Loosely Packed RGB666 definition - Rename LOOSELY_PS_18BIT_RGB666 to LOOSELY_PS_24BIT_RGB666 - Change ps_bpp_mode in mtk_dsi_ps_control_vact() to set: - Loosely Packed, 24-bits for MIPI_DSI_FMT_RGB666 - Packed, 18-bits for MIPI_DSI_FMT_RGB666_PACKED Fixes: 2e54c14e310f ("drm/mediatek: Add DSI sub driver") Reviewed-by: Alexandre Mergnat Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20240215085316.56835-3-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_dsi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 3e74c7c1b89fa..d871b1dba083d 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -70,8 +70,8 @@ #define DSI_PS_WC 0x3fff #define DSI_PS_SEL (3 << 16) #define PACKED_PS_16BIT_RGB565 (0 << 16) -#define LOOSELY_PS_18BIT_RGB666 (1 << 16) -#define PACKED_PS_18BIT_RGB666 (2 << 16) +#define PACKED_PS_18BIT_RGB666 (1 << 16) +#define LOOSELY_PS_24BIT_RGB666 (2 << 16) #define PACKED_PS_24BIT_RGB888 (3 << 16) #define DSI_VSA_NL 0x20 @@ -366,10 +366,10 @@ static void mtk_dsi_ps_control_vact(struct mtk_dsi *dsi) ps_bpp_mode |= PACKED_PS_24BIT_RGB888; break; case MIPI_DSI_FMT_RGB666: - ps_bpp_mode |= PACKED_PS_18BIT_RGB666; + ps_bpp_mode |= LOOSELY_PS_24BIT_RGB666; break; case MIPI_DSI_FMT_RGB666_PACKED: - ps_bpp_mode |= LOOSELY_PS_18BIT_RGB666; + ps_bpp_mode |= PACKED_PS_18BIT_RGB666; break; case MIPI_DSI_FMT_RGB565: ps_bpp_mode |= PACKED_PS_16BIT_RGB565; @@ -423,7 +423,7 @@ static void mtk_dsi_ps_control(struct mtk_dsi *dsi) dsi_tmp_buf_bpp = 3; break; case MIPI_DSI_FMT_RGB666: - tmp_reg = LOOSELY_PS_18BIT_RGB666; + tmp_reg = LOOSELY_PS_24BIT_RGB666; dsi_tmp_buf_bpp = 3; break; case MIPI_DSI_FMT_RGB666_PACKED: -- GitLab From ce106d8ef0ba5f7e6fb21a11e0b1d23f145a0e9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Wedekind?= Date: Mon, 19 Feb 2024 14:28:11 +0100 Subject: [PATCH 1042/2290] PCI: Mark 3ware-9650SE Root Port Extended Tags as broken MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit baf67aefbe7d7deafa59ca49612d163f8889934c ] Per PCIe r6.1, sec 2.2.6.2 and 7.5.3.4, a Requester may not use 8-bit Tags unless its Extended Tag Field Enable is set, but all Receivers/Completers must handle 8-bit Tags correctly regardless of their Extended Tag Field Enable. Some devices do not handle 8-bit Tags as Completers, so add a quirk for them. If we find such a device, we disable Extended Tags for the entire hierarchy to make peer-to-peer DMA possible. The 3ware 9650SE seems to have issues with handling 8-bit tags. Mark it as broken. This fixes PCI Parity Errors like : 3w-9xxx: scsi0: ERROR: (0x06:0x000C): PCI Parity Error: clearing. 3w-9xxx: scsi0: ERROR: (0x06:0x000D): PCI Abort: clearing. 3w-9xxx: scsi0: ERROR: (0x06:0x000E): Controller Queue Error: clearing. 3w-9xxx: scsi0: ERROR: (0x06:0x0010): Microcontroller Error: clearing. Link: https://lore.kernel.org/r/20240219132811.8351-1-joerg@wedekind.de Fixes: 60db3a4d8cc9 ("PCI: Enable PCIe Extended Tags if supported") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=202425 Signed-off-by: Jörg Wedekind Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 51d634fbdfb8e..c175b70a984c6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5415,6 +5415,7 @@ static void quirk_no_ext_tags(struct pci_dev *pdev) pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL); } +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_3WARE, 0x1004, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0132, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0141, quirk_no_ext_tags); -- GitLab From d183ee71366c7de1cb999b7573c1d6da2dc483cb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 10 Jan 2024 19:58:21 +0100 Subject: [PATCH 1043/2290] clk: hisilicon: hi3519: Release the correct number of gates in hi3519_clk_unregister() [ Upstream commit 74e39f526d95c0c119ada1874871ee328c59fbee ] The gates are stored in 'hi3519_gate_clks', not 'hi3519_mux_clks'. This is also in line with how hisi_clk_register_gate() is called in the probe. Fixes: 224b3b262c52 ("clk: hisilicon: hi3519: add driver remove path and fix some issues") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/c3f1877c9a0886fa35c949c8f0ef25547f284f18.1704912510.git.christophe.jaillet@wanadoo.fr Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/hisilicon/clk-hi3519.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/hisilicon/clk-hi3519.c b/drivers/clk/hisilicon/clk-hi3519.c index ad0c7f350cf03..60d8a27a90824 100644 --- a/drivers/clk/hisilicon/clk-hi3519.c +++ b/drivers/clk/hisilicon/clk-hi3519.c @@ -130,7 +130,7 @@ static void hi3519_clk_unregister(struct platform_device *pdev) of_clk_del_provider(pdev->dev.of_node); hisi_clk_unregister_gate(hi3519_gate_clks, - ARRAY_SIZE(hi3519_mux_clks), + ARRAY_SIZE(hi3519_gate_clks), crg->clk_data); hisi_clk_unregister_mux(hi3519_mux_clks, ARRAY_SIZE(hi3519_mux_clks), -- GitLab From e0b0d1c46a2ce1e46b79d004a7270fdef872e097 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 21 Jan 2024 16:16:24 +0100 Subject: [PATCH 1044/2290] clk: hisilicon: hi3559a: Fix an erroneous devm_kfree() [ Upstream commit 64c6a38136b74a2f18c42199830975edd9fbc379 ] 'p_clk' is an array allocated just before the for loop for all clk that need to be registered. It is incremented at each loop iteration. If a clk_register() call fails, 'p_clk' may point to something different from what should be freed. The best we can do, is to avoid this wrong release of memory. Fixes: 6c81966107dc ("clk: hisilicon: Add clock driver for hi3559A SoC") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/773fc8425c3b8f5b0ca7c1d89f15b65831a85ca9.1705850155.git.christophe.jaillet@wanadoo.fr Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/hisilicon/clk-hi3559a.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c index 9ea1a80acbe8b..0272276550ff1 100644 --- a/drivers/clk/hisilicon/clk-hi3559a.c +++ b/drivers/clk/hisilicon/clk-hi3559a.c @@ -491,7 +491,6 @@ static void hisi_clk_register_pll(struct hi3559av100_pll_clock *clks, clk = clk_register(NULL, &p_clk->hw); if (IS_ERR(clk)) { - devm_kfree(dev, p_clk); dev_err(dev, "%s: failed to register clock %s\n", __func__, clks[i].name); continue; -- GitLab From 527480cab5ccd85beb355f7ac4a27903eabc7348 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Fri, 15 Dec 2023 12:33:55 +0300 Subject: [PATCH 1045/2290] drm/tegra: put drm_gem_object ref on error in tegra_fb_create [ Upstream commit 32e5a120a5105bce01561978ee55aee8e40ac0dc ] Inside tegra_fb_create(), drm_gem_object_lookup() increments ref count of the found object. But if the following size check fails then the last found object's ref count should be put there as the unreferencing loop can't detect this situation. Found by Linux Verification Center (linuxtesting.org). Fixes: de2ba664c30f ("gpu: host1x: drm: Add memory manager and fb") Signed-off-by: Fedor Pchelkin Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20231215093356.12067-1-pchelkin@ispras.ru Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 9291209154a7a..a688ecf08451e 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -166,6 +166,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, if (gem->size < size) { err = -EINVAL; + drm_gem_object_put(gem); goto unreference; } -- GitLab From 3eb47e41c2057fb6db5f73a60f35705450d80c08 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 20 Feb 2024 11:50:10 +0000 Subject: [PATCH 1046/2290] mfd: syscon: Call of_node_put() only when of_parse_phandle() takes a ref [ Upstream commit d2b0680cf3b05490b579e71b0df6e07451977745 ] of_parse_phandle() returns a device_node with refcount incremented, which the callee needs to call of_node_put() on when done. We should only call of_node_put() when the property argument is provided though as otherwise nothing has taken a reference on the node. Fixes: 45330bb43421 ("mfd: syscon: Allow property as NULL in syscon_regmap_lookup_by_phandle") Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20240220115012.471689-2-peter.griffin@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/syscon.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 6196724ef39bb..ecfe151220919 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -223,7 +223,9 @@ struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, return ERR_PTR(-ENODEV); regmap = syscon_node_to_regmap(syscon_np); - of_node_put(syscon_np); + + if (property) + of_node_put(syscon_np); return regmap; } -- GitLab From 6be122c8d2151c5db0866fd64cbb893ca45bb5ab Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 20 Feb 2024 11:50:12 +0000 Subject: [PATCH 1047/2290] mfd: altera-sysmgr: Call of_node_put() only when of_parse_phandle() takes a ref [ Upstream commit e28c28a34ee9fa2ea671a20e5e7064e6220d55e7 ] of_parse_phandle() returns a device_node with refcount incremented, which the callee needs to call of_node_put() on when done. We should only call of_node_put() when the property argument is provided though as otherwise nothing has taken a reference on the node. Fixes: f36e789a1f8d ("mfd: altera-sysmgr: Add SOCFPGA System Manager") Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20240220115012.471689-4-peter.griffin@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/altera-sysmgr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index 5d3715a28b28e..dbe1009943718 100644 --- a/drivers/mfd/altera-sysmgr.c +++ b/drivers/mfd/altera-sysmgr.c @@ -110,7 +110,9 @@ struct regmap *altr_sysmgr_regmap_lookup_by_phandle(struct device_node *np, dev = driver_find_device_by_of_node(&altr_sysmgr_driver.driver, (void *)sysmgr_np); - of_node_put(sysmgr_np); + if (property) + of_node_put(sysmgr_np); + if (!dev) return ERR_PTR(-EPROBE_DEFER); -- GitLab From 6a2ef97250681a5b104396bcc1e4a67f1b25ec28 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 14:49:46 +0100 Subject: [PATCH 1048/2290] crypto: arm/sha - fix function cast warnings [ Upstream commit 53cc9baeb9bc2a187eb9c9790d30995148852b12 ] clang-16 warns about casting between incompatible function types: arch/arm/crypto/sha256_glue.c:37:5: error: cast from 'void (*)(u32 *, const void *, unsigned int)' (aka 'void (*)(unsigned int *, const void *, unsigned int)') to 'sha256_block_fn *' (aka 'void (*)(struct sha256_state *, const unsigned char *, int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 37 | (sha256_block_fn *)sha256_block_data_order); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm/crypto/sha512-glue.c:34:3: error: cast from 'void (*)(u64 *, const u8 *, int)' (aka 'void (*)(unsigned long long *, const unsigned char *, int)') to 'sha512_block_fn *' (aka 'void (*)(struct sha512_state *, const unsigned char *, int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 34 | (sha512_block_fn *)sha512_block_data_order); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix the prototypes for the assembler functions to match the typedef. The code already relies on the digest being the first part of the state structure, so there is no change in behavior. Fixes: c80ae7ca3726 ("crypto: arm/sha512 - accelerated SHA-512 using ARM generic ASM and NEON") Fixes: b59e2ae3690c ("crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer") Signed-off-by: Arnd Bergmann Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- arch/arm/crypto/sha256_glue.c | 13 +++++-------- arch/arm/crypto/sha512-glue.c | 12 +++++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index 433ee4ddce6c8..f85933fdec75f 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -24,8 +24,8 @@ #include "sha256_glue.h" -asmlinkage void sha256_block_data_order(u32 *digest, const void *data, - unsigned int num_blks); +asmlinkage void sha256_block_data_order(struct sha256_state *state, + const u8 *data, int num_blks); int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -33,23 +33,20 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, /* make sure casting to sha256_block_fn() is safe */ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); - return sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + return sha256_base_do_update(desc, data, len, sha256_block_data_order); } EXPORT_SYMBOL(crypto_sha256_arm_update); static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out) { - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_finalize(desc, sha256_block_data_order); return sha256_base_finish(desc, out); } int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_update(desc, data, len, sha256_block_data_order); return crypto_sha256_arm_final(desc, out); } EXPORT_SYMBOL(crypto_sha256_arm_finup); diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c index 0635a65aa488b..1be5bd498af36 100644 --- a/arch/arm/crypto/sha512-glue.c +++ b/arch/arm/crypto/sha512-glue.c @@ -25,27 +25,25 @@ MODULE_ALIAS_CRYPTO("sha512"); MODULE_ALIAS_CRYPTO("sha384-arm"); MODULE_ALIAS_CRYPTO("sha512-arm"); -asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks); +asmlinkage void sha512_block_data_order(struct sha512_state *state, + u8 const *src, int blocks); int sha512_arm_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); + return sha512_base_do_update(desc, data, len, sha512_block_data_order); } static int sha512_arm_final(struct shash_desc *desc, u8 *out) { - sha512_base_do_finalize(desc, - (sha512_block_fn *)sha512_block_data_order); + sha512_base_do_finalize(desc, sha512_block_data_order); return sha512_base_finish(desc, out); } int sha512_arm_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); + sha512_base_do_update(desc, data, len, sha512_block_data_order); return sha512_arm_final(desc, out); } -- GitLab From 67355a760bf52c8623b8242d845cf20db4517eb4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 17 Feb 2024 08:55:13 -0800 Subject: [PATCH 1049/2290] crypto: jitter - fix CRYPTO_JITTERENTROPY help text [ Upstream commit e63df1ec9a16dd9e13e9068243e64876de06f795 ] Correct various small problems in the help text: a. change 2 spaces to ", " b. finish an incomplete sentence c. change non-working URL to working URL Fixes: a9a98d49da52 ("crypto: Kconfig - simplify compression/RNG entries") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218458 Signed-off-by: Randy Dunlap Cc: Bagas Sanjaya Cc: Robert Elliott Cc: Christoph Biedl Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Acked-by: Bagas Sanjaya Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index d779667671b23..edf193aff23e7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1285,10 +1285,11 @@ config CRYPTO_JITTERENTROPY A non-physical non-deterministic ("true") RNG (e.g., an entropy source compliant with NIST SP800-90B) intended to provide a seed to a - deterministic RNG (e.g. per NIST SP800-90C). + deterministic RNG (e.g., per NIST SP800-90C). This RNG does not perform any cryptographic whitening of the generated + random numbers. - See https://www.chronox.de/jent.html + See https://www.chronox.de/jent/ config CRYPTO_KDF800108_CTR tristate -- GitLab From c82d434069733ee19f8fa7271e3f7cc505620219 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 13 Feb 2024 10:16:36 +0200 Subject: [PATCH 1050/2290] drm/tidss: Fix initial plane zpos values [ Upstream commit 3ec948ccb2c4b99e8fbfdd950adbe92ea577b395 ] When the driver sets up the zpos property it sets the default zpos value to the HW id of the plane. That is fine as such, but as on many DSS versions the driver arranges the DRM planes in a different order than the HW planes (to keep the non-scalable planes first), this leads to odd initial zpos values. An example is J721e, where the initial zpos values for DRM planes are 1, 3, 0, 2. In theory the userspace should configure the zpos values properly when using multiple planes, and in that sense the initial zpos values shouldn't matter, but there's really no reason not to fix this and help the userspace apps which don't handle zpos perfectly. In particular, some versions of Weston seem to have issues dealing with the planes with the current default zpos values. So let's change the zpos values for the DRM planes to 0, 1, 2, 3. Another option would be to configure the planes marked as primary planes to zpos 0. On a two display system this would give us plane zpos values of 0, 0, 1, 2. The end result and behavior would be very similar in this option, and I'm not aware that this would actually help us in any way. So, to keep the code simple, I opted for the 0, 1, 2, 3 values. Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem") Reviewed-by: Aradhya Bhatia Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20240213-tidss-fixes-v1-1-d709e8dfa505@ideasonboard.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tidss/tidss_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tidss/tidss_plane.c b/drivers/gpu/drm/tidss/tidss_plane.c index 42d50ec5526d7..435b3b66ae632 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.c +++ b/drivers/gpu/drm/tidss/tidss_plane.c @@ -211,7 +211,7 @@ struct tidss_plane *tidss_plane_create(struct tidss_device *tidss, drm_plane_helper_add(&tplane->plane, &tidss_plane_helper_funcs); - drm_plane_create_zpos_property(&tplane->plane, hw_plane_id, 0, + drm_plane_create_zpos_property(&tplane->plane, tidss->num_planes, 0, num_planes - 1); ret = drm_plane_create_color_properties(&tplane->plane, -- GitLab From 82120c9ab4b8edc5d53bc63a693e15c4d1bac5e1 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 13 Feb 2024 10:16:37 +0200 Subject: [PATCH 1051/2290] drm/tidss: Fix sync-lost issue with two displays [ Upstream commit c079e2e113f2ec2803ba859bbb442a6ab82c96bd ] A sync lost issue can be observed with two displays, when moving a plane from one disabled display to an another disabled display, and then enabling the display to which the plane was moved to. The exact requirements for this to trigger are not clear. It looks like the issue is that the layers are left enabled in the first display's OVR registers. Even if the corresponding VP is disabled, it still causes an issue, as if the disabled VP and its OVR would still be in use, leading to the same VID being used by two OVRs. However, this is just speculation based on testing the DSS behavior. Experimentation shows that as a workaround, we can disable all the layers in the OVR when disabling a VP. There should be no downside to this, as the OVR is anyway effectively disabled if its VP is disabled, and it seems to solve the sync lost issue. However, there may be a bigger issue in play here, related to J721e erratum i2097 ("DSS: Disabling a Layer Connected to Overlay May Result in Synclost During the Next Frame"). Experimentation also shows that the OVR's CHANNELIN field has similar issue. So we may need to revisit this when we find out more about the core issue. Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem") Reviewed-by: Aradhya Bhatia Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20240213-tidss-fixes-v1-2-d709e8dfa505@ideasonboard.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tidss/tidss_crtc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c index cb66a425dd200..896a77853ebc5 100644 --- a/drivers/gpu/drm/tidss/tidss_crtc.c +++ b/drivers/gpu/drm/tidss/tidss_crtc.c @@ -270,6 +270,16 @@ static void tidss_crtc_atomic_disable(struct drm_crtc *crtc, reinit_completion(&tcrtc->framedone_completion); + /* + * If a layer is left enabled when the videoport is disabled, and the + * vid pipeline that was used for the layer is taken into use on + * another videoport, the DSS will report sync lost issues. Disable all + * the layers here as a work-around. + */ + for (u32 layer = 0; layer < tidss->feat->num_planes; layer++) + dispc_ovr_enable_layer(tidss->dispc, tcrtc->hw_videoport, layer, + false); + dispc_vp_disable(tidss->dispc, tcrtc->hw_videoport); if (!wait_for_completion_timeout(&tcrtc->framedone_completion, -- GitLab From 438adcd017dc67782c8493ac3436b506798f2a6b Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 8 Feb 2024 12:34:18 +0200 Subject: [PATCH 1052/2290] mtd: maps: physmap-core: fix flash size larger than 32-bit [ Upstream commit 3884f03edd34887514a0865a80769cd5362d5c3b ] mtd-ram can potentially be larger than 4GB. get_bitmask_order() uses fls() that is not guaranteed to work with values larger than 32-bit. Specifically on aarch64 fls() returns 0 when all 32 LSB bits are clear. Use fls64() instead. Fixes: ba32ce95cbd987 ("mtd: maps: Merge gpio-addr-flash.c into physmap-core.c") Signed-off-by: Baruch Siach Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/9fbf3664ce00f8b07867f1011834015f21d162a5.1707388458.git.baruch@tkos.co.il Signed-off-by: Sasha Levin --- drivers/mtd/maps/physmap-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c index 19dad5a23f944..8cdb3512107d3 100644 --- a/drivers/mtd/maps/physmap-core.c +++ b/drivers/mtd/maps/physmap-core.c @@ -524,7 +524,7 @@ static int physmap_flash_probe(struct platform_device *dev) if (!info->maps[i].phys) info->maps[i].phys = res->start; - info->win_order = get_bitmask_order(resource_size(res)) - 1; + info->win_order = fls64(resource_size(res)) - 1; info->maps[i].size = BIT(info->win_order + (info->gpios ? info->gpios->ndescs : 0)); -- GitLab From 5ac2ca7a50ed4b8df6dad6c345724862c1ec0f6e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:00:09 +0100 Subject: [PATCH 1053/2290] mtd: rawnand: lpc32xx_mlc: fix irq handler prototype [ Upstream commit 347b828882e6334690e7003ce5e2fe5f233dc508 ] clang-16 warns about mismatched function prototypes: drivers/mtd/nand/raw/lpc32xx_mlc.c:783:29: error: cast from 'irqreturn_t (*)(int, struct lpc32xx_nand_host *)' (aka 'enum irqreturn (*)(int, struct lpc32xx_nand_host *)') to 'irq_handler_t' (aka 'enum irqreturn (*)(int, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] Change the interrupt handler to the normal way of just passing a void* pointer and converting it inside the function.. Fixes: 70f7cb78ec53 ("mtd: add LPC32xx MLC NAND driver") Signed-off-by: Arnd Bergmann Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240213100146.455811-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/lpc32xx_mlc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c index 452ecaf7775ac..1cfe3dd0bad4d 100644 --- a/drivers/mtd/nand/raw/lpc32xx_mlc.c +++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c @@ -303,8 +303,9 @@ static int lpc32xx_nand_device_ready(struct nand_chip *nand_chip) return 0; } -static irqreturn_t lpc3xxx_nand_irq(int irq, struct lpc32xx_nand_host *host) +static irqreturn_t lpc3xxx_nand_irq(int irq, void *data) { + struct lpc32xx_nand_host *host = data; uint8_t sr; /* Clear interrupt flag by reading status */ @@ -779,7 +780,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) goto release_dma_chan; } - if (request_irq(host->irq, (irq_handler_t)&lpc3xxx_nand_irq, + if (request_irq(host->irq, &lpc3xxx_nand_irq, IRQF_TRIGGER_HIGH, DRV_NAME, host)) { dev_err(&pdev->dev, "Error requesting NAND IRQ\n"); res = -ENXIO; -- GitLab From 83ee64af9bedccd3a18b90f6c2f637640a799c1a Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 23 Feb 2024 18:51:07 +0100 Subject: [PATCH 1054/2290] ASoC: meson: axg-tdm-interface: fix mclk setup without mclk-fs [ Upstream commit e3741a8d28a1137f8b19ae6f3d6e3be69a454a0a ] By default, when mclk-fs is not provided, the tdm-interface driver requests an MCLK that is 4x the bit clock, SCLK. However there is no justification for this: * If the codec needs MCLK for its operation, mclk-fs is expected to be set according to the codec requirements. * If the codec does not need MCLK the minimum is 2 * SCLK, because this is minimum the divider between SCLK and MCLK can do. Multiplying by 4 may cause problems because the PLL limit may be reached sooner than it should, so use 2x instead. Fixes: d60e4f1e4be5 ("ASoC: meson: add tdm interface driver") Signed-off-by: Jerome Brunet Link: https://msgid.link/r/20240223175116.2005407-2-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/axg-tdm-interface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index c040c83637e02..eb188ee950557 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -266,8 +266,8 @@ static int axg_tdm_iface_set_sclk(struct snd_soc_dai *dai, srate = iface->slots * iface->slot_width * params_rate(params); if (!iface->mclk_rate) { - /* If no specific mclk is requested, default to bit clock * 4 */ - clk_set_rate(iface->mclk, 4 * srate); + /* If no specific mclk is requested, default to bit clock * 2 */ + clk_set_rate(iface->mclk, 2 * srate); } else { /* Check if we can actually get the bit clock from mclk */ if (iface->mclk_rate % srate) { -- GitLab From ff4dd05a9415560128c723abff06a77ba74d92c1 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 23 Feb 2024 18:51:08 +0100 Subject: [PATCH 1055/2290] ASoC: meson: axg-tdm-interface: add frame rate constraint [ Upstream commit 59c6a3a43b221cc2a211181b1298e43b2c2df782 ] According to Amlogic datasheets for the SoCs supported by this driver, the maximum bit clock rate is 100MHz. The tdm interface allows the rates listed by the DAI driver, regardless of the number slots or their width. However, these will impact the bit clock rate. Hitting the 100MHz limit is very unlikely for most use cases but it is possible. For example with 32 slots / 32 bits wide, the maximum rate is no longer 384kHz but ~96kHz. Add the constraint accordingly if the component is not already active. If it is active, the rate is already constrained by the first stream rate. Fixes: d60e4f1e4be5 ("ASoC: meson: add tdm interface driver") Signed-off-by: Jerome Brunet Link: https://msgid.link/r/20240223175116.2005407-3-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/axg-tdm-interface.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index eb188ee950557..028383f949efd 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -12,6 +12,9 @@ #include "axg-tdm.h" +/* Maximum bit clock frequency according the datasheets */ +#define MAX_SCLK 100000000 /* Hz */ + enum { TDM_IFACE_PAD, TDM_IFACE_LOOPBACK, @@ -155,19 +158,27 @@ static int axg_tdm_iface_startup(struct snd_pcm_substream *substream, return -EINVAL; } - /* Apply component wide rate symmetry */ if (snd_soc_component_active(dai->component)) { + /* Apply component wide rate symmetry */ ret = snd_pcm_hw_constraint_single(substream->runtime, SNDRV_PCM_HW_PARAM_RATE, iface->rate); - if (ret < 0) { - dev_err(dai->dev, - "can't set iface rate constraint\n"); - return ret; - } + + } else { + /* Limit rate according to the slot number and width */ + unsigned int max_rate = + MAX_SCLK / (iface->slots * iface->slot_width); + ret = snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + 0, max_rate); } - return 0; + if (ret < 0) + dev_err(dai->dev, "can't set iface rate constraint\n"); + else + ret = 0; + + return ret; } static int axg_tdm_iface_set_stream(struct snd_pcm_substream *substream, -- GitLab From c6a05e45de82e3481f8f8ac4d3e655706fcae74a Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Wed, 14 Feb 2024 20:11:41 +0530 Subject: [PATCH 1056/2290] HID: amd_sfh: Update HPD sensor structure elements [ Upstream commit bbf0dec30696638b8bdc28cb2f5bf23f8d760b52 ] HPD sensor data is not populating properly because of wrong order of HPD sensor structure elements. So update the order of structure elements to match the HPD sensor data received from the firmware. Fixes: 24a31ea94922 ("HID: amd_sfh: Add initial support for HPD sensor") Co-developed-by: Akshata MukundShetty Signed-off-by: Akshata MukundShetty Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index dfb7cabd82efe..2b125cd9742cb 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -89,10 +89,10 @@ enum mem_use_type { struct hpd_status { union { struct { - u32 human_presence_report : 4; - u32 human_presence_actual : 4; - u32 probablity : 8; u32 object_distance : 16; + u32 probablity : 8; + u32 human_presence_actual : 4; + u32 human_presence_report : 4; } shpd; u32 val; }; -- GitLab From fcb7e704827e14510dc365f19782579fbafcaad2 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Wed, 14 Feb 2024 20:11:42 +0530 Subject: [PATCH 1057/2290] HID: amd_sfh: Avoid disabling the interrupt [ Upstream commit c1db0073212ef39d5a46c2aea5e49bf884375ce4 ] HP ProBook x360 435 G7 using older version of firmware which doesn't support disabling the interrupt for all commands. Hence avoid disabling the interrupt for that particular model. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218104 Fixes: b300667b33b2 ("HID: amd_sfh: Disable the interrupt for all command") Co-developed-by: Akshata MukundShetty Signed-off-by: Akshata MukundShetty Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 30 +++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index c936d6a51c0cd..9c963ad27f9d1 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -34,6 +34,8 @@ static int sensor_mask_override = -1; module_param_named(sensor_mask, sensor_mask_override, int, 0444); MODULE_PARM_DESC(sensor_mask, "override the detected sensors mask"); +static bool intr_disable = true; + static int amd_sfh_wait_response_v2(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts) { union cmd_response cmd_resp; @@ -54,7 +56,7 @@ static void amd_start_sensor_v2(struct amd_mp2_dev *privdata, struct amd_mp2_sen cmd_base.ul = 0; cmd_base.cmd_v2.cmd_id = ENABLE_SENSOR; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = info.period; cmd_base.cmd_v2.sensor_id = info.sensor_idx; cmd_base.cmd_v2.length = 16; @@ -72,7 +74,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx) cmd_base.ul = 0; cmd_base.cmd_v2.cmd_id = DISABLE_SENSOR; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = 0; cmd_base.cmd_v2.sensor_id = sensor_idx; cmd_base.cmd_v2.length = 16; @@ -86,7 +88,7 @@ static void amd_stop_all_sensor_v2(struct amd_mp2_dev *privdata) union sfh_cmd_base cmd_base; cmd_base.cmd_v2.cmd_id = STOP_ALL_SENSORS; - cmd_base.cmd_v2.intr_disable = 1; + cmd_base.cmd_v2.intr_disable = intr_disable; cmd_base.cmd_v2.period = 0; cmd_base.cmd_v2.sensor_id = 0; @@ -288,6 +290,26 @@ int amd_sfh_irq_init(struct amd_mp2_dev *privdata) return 0; } +static int mp2_disable_intr(const struct dmi_system_id *id) +{ + intr_disable = false; + return 0; +} + +static const struct dmi_system_id dmi_sfh_table[] = { + { + /* + * https://bugzilla.kernel.org/show_bug.cgi?id=218104 + */ + .callback = mp2_disable_intr, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook x360 435 G7"), + }, + }, + {} +}; + static const struct dmi_system_id dmi_nodevs[] = { { /* @@ -311,6 +333,8 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i if (dmi_first_match(dmi_nodevs)) return -ENODEV; + dmi_check_system(dmi_sfh_table); + privdata = devm_kzalloc(&pdev->dev, sizeof(*privdata), GFP_KERNEL); if (!privdata) return -ENOMEM; -- GitLab From 0a70199742c25694862b9ac09d0cb9d5e33de6fc Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 24 Feb 2024 07:48:52 +0530 Subject: [PATCH 1058/2290] drm/amdgpu: Fix missing break in ATOM_ARG_IMM Case of atom_get_src_int() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7cf1ad2fe10634238b38442a851d89514cb14ea2 ] Missing break statement in the ATOM_ARG_IMM case of a switch statement, adds the missing break statement, ensuring that the program's control flow is as intended. Fixes the below: drivers/gpu/drm/amd/amdgpu/atom.c:323 atom_get_src_int() warn: ignoring unreachable code. Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Cc: Jammy Zhou Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/atom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 1c5d9388ad0bb..cb6eb47aab65b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -313,7 +313,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, DEBUG("IMM 0x%02X\n", val); return val; } - return 0; + break; case ATOM_ARG_PLL: idx = U8(*ptr); (*ptr)++; -- GitLab From 3a1ec89708d2e57e2712f46241282961b1a7a475 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Fri, 16 Feb 2024 15:30:47 +0800 Subject: [PATCH 1059/2290] media: pvrusb2: fix uaf in pvr2_context_set_notify [ Upstream commit 0a0b79ea55de8514e1750884e5fec77f9fdd01ee ] [Syzbot reported] BUG: KASAN: slab-use-after-free in pvr2_context_set_notify+0x2c4/0x310 drivers/media/usb/pvrusb2/pvrusb2-context.c:35 Read of size 4 at addr ffff888113aeb0d8 by task kworker/1:1/26 CPU: 1 PID: 26 Comm: kworker/1:1 Not tainted 6.8.0-rc1-syzkaller-00046-gf1a27f081c1f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc4/0x620 mm/kasan/report.c:488 kasan_report+0xda/0x110 mm/kasan/report.c:601 pvr2_context_set_notify+0x2c4/0x310 drivers/media/usb/pvrusb2/pvrusb2-context.c:35 pvr2_context_notify drivers/media/usb/pvrusb2/pvrusb2-context.c:95 [inline] pvr2_context_disconnect+0x94/0xb0 drivers/media/usb/pvrusb2/pvrusb2-context.c:272 Freed by task 906: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3f/0x60 mm/kasan/generic.c:640 poison_slab_object mm/kasan/common.c:241 [inline] __kasan_slab_free+0x106/0x1b0 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kfree+0x105/0x340 mm/slub.c:4409 pvr2_context_check drivers/media/usb/pvrusb2/pvrusb2-context.c:137 [inline] pvr2_context_thread_func+0x69d/0x960 drivers/media/usb/pvrusb2/pvrusb2-context.c:158 [Analyze] Task A set disconnect_flag = !0, which resulted in Task B's condition being met and releasing mp, leading to this issue. [Fix] Place the disconnect_flag assignment operation after all code in pvr2_context_disconnect() to avoid this issue. Reported-and-tested-by: syzbot+ce750e124675d4599449@syzkaller.appspotmail.com Fixes: e5be15c63804 ("V4L/DVB (7711): pvrusb2: Fix race on module unload") Signed-off-by: Edward Adam Davis Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/pvrusb2/pvrusb2-context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-context.c b/drivers/media/usb/pvrusb2/pvrusb2-context.c index 58f2f3ff10ee2..73c95ba2328a4 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-context.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-context.c @@ -267,9 +267,9 @@ static void pvr2_context_exit(struct pvr2_context *mp) void pvr2_context_disconnect(struct pvr2_context *mp) { pvr2_hdw_disconnect(mp->hdw); - mp->disconnect_flag = !0; if (!pvr2_context_shutok()) pvr2_context_notify(mp); + mp->disconnect_flag = !0; } -- GitLab From 107052a8cfeff3a97326277192b4f052e4860a8a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Feb 2024 17:31:44 +0100 Subject: [PATCH 1060/2290] media: dvb-frontends: avoid stack overflow warnings with clang [ Upstream commit 7a4cf27d1f0538f779bf31b8c99eda394e277119 ] A previous patch worked around a KASAN issue in stv0367, now a similar problem showed up with clang: drivers/media/dvb-frontends/stv0367.c:1222:12: error: stack frame size (3624) exceeds limit (2048) in 'stv0367ter_set_frontend' [-Werror,-Wframe-larger-than] 1214 | static int stv0367ter_set_frontend(struct dvb_frontend *fe) Rework the stv0367_writereg() function to be simpler and mark both register access functions as noinline_for_stack so the temporary i2c_msg structures do not get duplicated on the stack when KASAN_STACK is enabled. Fixes: 3cd890dbe2a4 ("media: dvb-frontends: fix i2c access helpers for KASAN") Signed-off-by: Arnd Bergmann Reviewed-by: Justin Stitt Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/dvb-frontends/stv0367.c | 34 +++++++-------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index 04556b77c16c9..0977564a4a1a4 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -118,50 +118,32 @@ static const s32 stv0367cab_RF_LookUp2[RF_LOOKUP_TABLE2_SIZE][RF_LOOKUP_TABLE2_S } }; -static -int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) +static noinline_for_stack +int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - u8 buf[MAX_XFER_SIZE]; + u8 buf[3] = { MSB(reg), LSB(reg), data }; struct i2c_msg msg = { .addr = state->config->demod_address, .flags = 0, .buf = buf, - .len = len + 2 + .len = 3, }; int ret; - if (2 + len > sizeof(buf)) { - printk(KERN_WARNING - "%s: i2c wr reg=%04x: len=%d is too big!\n", - KBUILD_MODNAME, reg, len); - return -EINVAL; - } - - - buf[0] = MSB(reg); - buf[1] = LSB(reg); - memcpy(buf + 2, data, len); - if (i2cdebug) printk(KERN_DEBUG "%s: [%02x] %02x: %02x\n", __func__, - state->config->demod_address, reg, buf[2]); + state->config->demod_address, reg, data); ret = i2c_transfer(state->i2c, &msg, 1); if (ret != 1) printk(KERN_ERR "%s: i2c write error! ([%02x] %02x: %02x)\n", - __func__, state->config->demod_address, reg, buf[2]); + __func__, state->config->demod_address, reg, data); return (ret != 1) ? -EREMOTEIO : 0; } -static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) -{ - u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ - - return stv0367_writeregs(state, reg, &tmp, 1); -} - -static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) +static noinline_for_stack +u8 stv0367_readreg(struct stv0367_state *state, u16 reg) { u8 b0[] = { 0, 0 }; u8 b1[] = { 0 }; -- GitLab From e04d15c8bb3e111dd69f98894acd92d63e87aac3 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 21 Feb 2024 12:37:13 +0800 Subject: [PATCH 1061/2290] media: go7007: fix a memleak in go7007_load_encoder [ Upstream commit b9b683844b01d171a72b9c0419a2d760d946ee12 ] In go7007_load_encoder, bounce(i.e. go->boot_fw), is allocated without a deallocation thereafter. After the following call chain: saa7134_go7007_init |-> go7007_boot_encoder |-> go7007_load_encoder |-> kfree(go) go is freed and thus bounce is leaked. Fixes: 95ef39403f89 ("[media] go7007: remember boot firmware") Signed-off-by: Zhipeng Lu Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/go7007/go7007-driver.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/go7007/go7007-driver.c b/drivers/media/usb/go7007/go7007-driver.c index 0c24e29843048..eb03f98b2ef11 100644 --- a/drivers/media/usb/go7007/go7007-driver.c +++ b/drivers/media/usb/go7007/go7007-driver.c @@ -80,7 +80,7 @@ static int go7007_load_encoder(struct go7007 *go) const struct firmware *fw_entry; char fw_name[] = "go7007/go7007fw.bin"; void *bounce; - int fw_len, rv = 0; + int fw_len; u16 intr_val, intr_data; if (go->boot_fw == NULL) { @@ -109,9 +109,11 @@ static int go7007_load_encoder(struct go7007 *go) go7007_read_interrupt(go, &intr_val, &intr_data) < 0 || (intr_val & ~0x1) != 0x5a5a) { v4l2_err(go, "error transferring firmware\n"); - rv = -1; + kfree(go->boot_fw); + go->boot_fw = NULL; + return -1; } - return rv; + return 0; } MODULE_FIRMWARE("go7007/go7007fw.bin"); -- GitLab From 55ca0c7eae8499bb96f4e5d9b26af95e89c4e6a0 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 21 Feb 2024 13:17:04 +0800 Subject: [PATCH 1062/2290] media: ttpci: fix two memleaks in budget_av_attach [ Upstream commit d0b07f712bf61e1a3cf23c87c663791c42e50837 ] When saa7146_register_device and saa7146_vv_init fails, budget_av_attach should free the resources it allocates, like the error-handling of ttpci_budget_init does. Besides, there are two fixme comment refers to such deallocations. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhipeng Lu Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../staging/media/deprecated/saa7146/ttpci/budget-av.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c b/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c index 0c61a2dec2211..81fc4835679f3 100644 --- a/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c +++ b/drivers/staging/media/deprecated/saa7146/ttpci/budget-av.c @@ -1462,7 +1462,8 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio budget_av->has_saa7113 = 1; err = saa7146_vv_init(dev, &vv_data); if (err != 0) { - /* fixme: proper cleanup here */ + ttpci_budget_deinit(&budget_av->budget); + kfree(budget_av); ERR("cannot init vv subsystem\n"); return err; } @@ -1471,9 +1472,10 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio vv_data.vid_ops.vidioc_s_input = vidioc_s_input; if ((err = saa7146_register_device(&budget_av->vd, dev, "knc1", VFL_TYPE_VIDEO))) { - /* fixme: proper cleanup here */ - ERR("cannot register capture v4l2 device\n"); saa7146_vv_release(dev); + ttpci_budget_deinit(&budget_av->budget); + kfree(budget_av); + ERR("cannot register capture v4l2 device\n"); return err; } -- GitLab From 32bfbab72aaa16aa08cb058d96a4d9afd3d5037f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 24 Feb 2024 13:10:22 +0100 Subject: [PATCH 1063/2290] media: mediatek: vcodec: avoid -Wcast-function-type-strict warning [ Upstream commit bfb1b99802ef16045402deb855c197591dc78886 ] The ipi handler here tries hard to maintain const-ness of its argument, but by doing that causes a warning about function type casts: drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c:38:32: error: cast from 'mtk_vcodec_ipi_handler' (aka 'void (*)(void *, unsigned int, void *)') to 'ipi_handler_t' (aka 'void (*)(const void *, unsigned int, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 38 | ipi_handler_t handler_const = (ipi_handler_t)handler; | ^~~~~~~~~~~~~~~~~~~~~~ Remove the hack and just use a non-const argument. Fixes: bf1d556ad4e0 ("media: mtk-vcodec: abstract firmware interface") Signed-off-by: Arnd Bergmann Reviewed-by: Ricardo Ribalda Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c | 2 +- .../media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c | 10 +--------- drivers/media/platform/mediatek/vpu/mtk_vpu.c | 2 +- drivers/media/platform/mediatek/vpu/mtk_vpu.h | 2 +- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c b/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c index b065ccd069140..378a1cba0144f 100644 --- a/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c +++ b/drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c @@ -26,7 +26,7 @@ static void mtk_mdp_vpu_handle_init_ack(const struct mdp_ipi_comm_ack *msg) vpu->inst_addr = msg->vpu_inst_addr; } -static void mtk_mdp_vpu_ipi_handler(const void *data, unsigned int len, +static void mtk_mdp_vpu_ipi_handler(void *data, unsigned int len, void *priv) { const struct mdp_ipi_comm_ack *msg = data; diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c index cfc7ebed8fb7a..1ec29f1b163a1 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_fw_vpu.c @@ -29,15 +29,7 @@ static int mtk_vcodec_vpu_set_ipi_register(struct mtk_vcodec_fw *fw, int id, mtk_vcodec_ipi_handler handler, const char *name, void *priv) { - /* - * The handler we receive takes a void * as its first argument. We - * cannot change this because it needs to be passed down to the rproc - * subsystem when SCP is used. VPU takes a const argument, which is - * more constrained, so the conversion below is safe. - */ - ipi_handler_t handler_const = (ipi_handler_t)handler; - - return vpu_ipi_register(fw->pdev, id, handler_const, name, priv); + return vpu_ipi_register(fw->pdev, id, handler, name, priv); } static int mtk_vcodec_vpu_ipi_send(struct mtk_vcodec_fw *fw, int id, void *buf, diff --git a/drivers/media/platform/mediatek/vpu/mtk_vpu.c b/drivers/media/platform/mediatek/vpu/mtk_vpu.c index 6beab9e86a22a..44adf5cfc9bb2 100644 --- a/drivers/media/platform/mediatek/vpu/mtk_vpu.c +++ b/drivers/media/platform/mediatek/vpu/mtk_vpu.c @@ -635,7 +635,7 @@ OUT_LOAD_FW: } EXPORT_SYMBOL_GPL(vpu_load_firmware); -static void vpu_init_ipi_handler(const void *data, unsigned int len, void *priv) +static void vpu_init_ipi_handler(void *data, unsigned int len, void *priv) { struct mtk_vpu *vpu = priv; const struct vpu_run *run = data; diff --git a/drivers/media/platform/mediatek/vpu/mtk_vpu.h b/drivers/media/platform/mediatek/vpu/mtk_vpu.h index a56053ff135af..da05f3e740810 100644 --- a/drivers/media/platform/mediatek/vpu/mtk_vpu.h +++ b/drivers/media/platform/mediatek/vpu/mtk_vpu.h @@ -17,7 +17,7 @@ * VPU interfaces with other blocks by share memory and interrupt. */ -typedef void (*ipi_handler_t) (const void *data, +typedef void (*ipi_handler_t) (void *data, unsigned int len, void *priv); -- GitLab From e9eee1ce1b6cb44e0bf625533d8a21f58d3780e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 28 Feb 2024 12:28:03 +0100 Subject: [PATCH 1064/2290] gpio: nomadik: fix offset bug in nmk_pmx_set() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 53cf6b72e074864b94ade97dcb6f30b5ac1a82dc ] Previously, the statement looked like: slpm[x] &= ~BIT(g->grp.pins[i]); Where: - slpm is a unsigned int pointer; - g->grp.pins[i] is a pin number. It can grow to more than 32. The expected shift amount is a pin bank offset. This bug does not occur on every group or pin: the altsetting must be NMK_GPIO_ALT_C and the pin must be 32 or above. It might have occured. For example, in pinctrl-nomadik-db8500.c, pin group i2c3_c_2 has the right altsetting and pins 229 and 230. Fixes: dbfe8ca259e1 ("pinctrl/nomadik: implement pin multiplexing") Reviewed-by: Linus Walleij Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20240228-mbly-gpio-v2-5-3ba757474006@bootlin.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index f7d02513d8cc1..e79037dc85796 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -1571,8 +1571,10 @@ static int nmk_pmx_set(struct pinctrl_dev *pctldev, unsigned function, * Then mask the pins that need to be sleeping now when we're * switching to the ALT C function. */ - for (i = 0; i < g->grp.npins; i++) - slpm[g->grp.pins[i] / NMK_GPIO_PER_CHIP] &= ~BIT(g->grp.pins[i]); + for (i = 0; i < g->grp.npins; i++) { + unsigned int bit = g->grp.pins[i] % NMK_GPIO_PER_CHIP; + slpm[g->grp.pins[i] / NMK_GPIO_PER_CHIP] &= ~BIT(bit); + } nmk_gpio_glitch_slpm_init(slpm); } -- GitLab From d2bd30c710475b2e29288827d2c91f9e6e2b91d7 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Fri, 23 Feb 2024 13:23:29 -0800 Subject: [PATCH 1065/2290] drm/mediatek: Fix a null pointer crash in mtk_drm_crtc_finish_page_flip [ Upstream commit c958e86e9cc1b48cac004a6e245154dfba8e163b ] It's possible that mtk_crtc->event is NULL in mtk_drm_crtc_finish_page_flip(). pending_needs_vblank value is set by mtk_crtc->event, but in mtk_drm_crtc_atomic_flush(), it's is not guarded by the same lock in mtk_drm_finish_page_flip(), thus a race condition happens. Consider the following case: CPU1 CPU2 step 1: mtk_drm_crtc_atomic_begin() mtk_crtc->event is not null, step 1: mtk_drm_crtc_atomic_flush: mtk_drm_crtc_update_config( !!mtk_crtc->event) step 2: mtk_crtc_ddp_irq -> mtk_drm_finish_page_flip: lock mtk_crtc->event set to null, pending_needs_vblank set to false unlock pending_needs_vblank set to true, step 2: mtk_crtc_ddp_irq -> mtk_drm_finish_page_flip called again, pending_needs_vblank is still true //null pointer Instead of guarding the entire mtk_drm_crtc_atomic_flush(), it's more efficient to just check if mtk_crtc->event is null before use. Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Hsin-Yi Wang Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20240223212404.3709690-1-hsinyi@chromium.org/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 558000db4a100..beaaf44004cfd 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -91,11 +91,13 @@ static void mtk_drm_crtc_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) struct drm_crtc *crtc = &mtk_crtc->base; unsigned long flags; - spin_lock_irqsave(&crtc->dev->event_lock, flags); - drm_crtc_send_vblank_event(crtc, mtk_crtc->event); - drm_crtc_vblank_put(crtc); - mtk_crtc->event = NULL; - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + if (mtk_crtc->event) { + spin_lock_irqsave(&crtc->dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, mtk_crtc->event); + drm_crtc_vblank_put(crtc); + mtk_crtc->event = NULL; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + } } static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) -- GitLab From a3f22feb2220a945d1c3282e34199e8bcdc5afc4 Mon Sep 17 00:00:00 2001 From: Qiheng Lin Date: Thu, 8 Dec 2022 21:34:49 +0800 Subject: [PATCH 1066/2290] powerpc/pseries: Fix potential memleak in papr_get_attr() [ Upstream commit cda9c0d556283e2d4adaa9960b2dc19b16156bae ] `buf` is allocated in papr_get_attr(), and krealloc() of `buf` could fail. We need to free the original `buf` in the case of failure. Fixes: 3c14b73454cf ("powerpc/pseries: Interface to represent PAPR firmware attributes") Signed-off-by: Qiheng Lin Signed-off-by: Michael Ellerman Link: https://msgid.link/20221208133449.16284-1-linqiheng@huawei.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/papr_platform_attributes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c index 526c621b098be..eea2041b270b5 100644 --- a/arch/powerpc/platforms/pseries/papr_platform_attributes.c +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -101,10 +101,12 @@ retry: esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); - if (temp_buf) + if (temp_buf) { buf = temp_buf; - else - return -ENOMEM; + } else { + ret = -ENOMEM; + goto out_buf; + } goto retry; } -- GitLab From 48feaf23f3a4a4fe020976fa7aedc4a2205e16e4 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 29 Feb 2024 17:58:47 +0530 Subject: [PATCH 1067/2290] powerpc/hv-gpci: Fix the H_GET_PERF_COUNTER_INFO hcall return value checks [ Upstream commit ad86d7ee43b22aa2ed60fb982ae94b285c1be671 ] Running event hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ in one of the system throws below error: ---Logs--- # perf list | grep hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=?/[Kernel PMU event] # perf stat -v -e hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ sleep 2 Using CPUID 00800200 Control descriptor is not initialized Warning: hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ event is not supported by the kernel. failed to read counter hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ Performance counter stats for 'system wide': hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ 2.000700771 seconds time elapsed The above error is because of the hcall failure as required permission "Enable Performance Information Collection" is not set. Based on current code, single_gpci_request function did not check the error type incase hcall fails and by default returns EINVAL. But we can have other reasons for hcall failures like H_AUTHORITY/H_PARAMETER with detail_rc as GEN_BUF_TOO_SMALL, for which we need to act accordingly. Fix this issue by adding new checks in the single_gpci_request and h_gpci_event_init functions. Result after fix patch changes: # perf stat -e hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ sleep 2 Error: No permission to enable hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=0/ event. Fixes: 220a0c609ad1 ("powerpc/perf: Add support for the hv gpci (get performance counter info) interface") Reported-by: Akanksha J N Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20240229122847.101162-1-kjain@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/perf/hv-gpci.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 7ff8ff3509f5f..943248a0e9a9d 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -164,6 +164,20 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + /* + * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', + * specifies that the current buffer size cannot accommodate + * all the information and a partial buffer returned. + * Since in this function we are only accessing data for a given starting index, + * we don't need to accommodate whole data and can get required count by + * accessing first entry data. + * Hence hcall fails only incase the ret value is other than H_SUCCESS or + * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). + */ + if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) + ret = 0; + if (ret) { pr_devel("hcall failed: 0x%lx\n", ret); goto out; @@ -228,6 +242,7 @@ static int h_gpci_event_init(struct perf_event *event) { u64 count; u8 length; + unsigned long ret; /* Not our event */ if (event->attr.type != event->pmu->type) @@ -258,13 +273,23 @@ static int h_gpci_event_init(struct perf_event *event) } /* check if the request works... */ - if (single_gpci_request(event_get_request(event), + ret = single_gpci_request(event_get_request(event), event_get_starting_index(event), event_get_secondary_index(event), event_get_counter_info_version(event), event_get_offset(event), length, - &count)) { + &count); + + /* + * ret value as H_AUTHORITY implies that partition is not permitted to retrieve + * performance information, and required to set + * "Enable Performance Information Collection" option. + */ + if (ret == H_AUTHORITY) + return -EPERM; + + if (ret) { pr_devel("gpci hcall failed\n"); return -EINVAL; } -- GitLab From 0189516bb53699c4ba6814a2da8da818e8780cbb Mon Sep 17 00:00:00 2001 From: Paloma Arellano Date: Thu, 22 Feb 2024 11:39:47 -0800 Subject: [PATCH 1068/2290] drm/msm/dpu: add division of drm_display_mode's hskew parameter [ Upstream commit 551ee0f210991d25f336bc27262353bfe99d3eed ] Setting up the timing engine when the physical encoder has a split role neglects dividing the drm_display_mode's hskew parameter. Let's fix this since this must also be done in preparation for implementing YUV420 over DP. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Paloma Arellano Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/579605/ Link: https://lore.kernel.org/r/20240222194025.25329-3-quic_parellan@quicinc.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 09aeec00bf5e2..2baade1cd4876 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -257,12 +257,14 @@ static void dpu_encoder_phys_vid_setup_timing_engine( mode.htotal >>= 1; mode.hsync_start >>= 1; mode.hsync_end >>= 1; + mode.hskew >>= 1; DPU_DEBUG_VIDENC(phys_enc, - "split_role %d, halve horizontal %d %d %d %d\n", + "split_role %d, halve horizontal %d %d %d %d %d\n", phys_enc->split_role, mode.hdisplay, mode.htotal, - mode.hsync_start, mode.hsync_end); + mode.hsync_start, mode.hsync_end, + mode.hskew); } drm_mode_to_intf_timing_params(phys_enc, &mode, &timing_params); -- GitLab From c550f0055c5ddcbe9a909becb774c37c54c217be Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 27 Feb 2024 10:35:46 +0800 Subject: [PATCH 1069/2290] modules: wait do_free_init correctly [ Upstream commit 8f8cd6c0a43ed637e620bbe45a8d0e0c2f4d5130 ] The synchronization here is to ensure the ordering of freeing of a module init so that it happens before W+X checking. It is worth noting it is not that the freeing was not happening, it is just that our sanity checkers raced against the permission checkers which assume init memory is already gone. Commit 1a7b7d922081 ("modules: Use vmalloc special flag") moved calling do_free_init() into a global workqueue instead of relying on it being called through call_rcu(..., do_free_init), which used to allowed us call do_free_init() asynchronously after the end of a subsequent grace period. The move to a global workqueue broke the gaurantees for code which needed to be sure the do_free_init() would complete with rcu_barrier(). To fix this callers which used to rely on rcu_barrier() must now instead use flush_work(&init_free_wq). Without this fix, we still could encounter false positive reports in W+X checking since the rcu_barrier() here can not ensure the ordering now. Even worse, the rcu_barrier() can introduce significant delay. Eric Chanudet reported that the rcu_barrier introduces ~0.1s delay on a PREEMPT_RT kernel. [ 0.291444] Freeing unused kernel memory: 5568K [ 0.402442] Run /sbin/init as init process With this fix, the above delay can be eliminated. Link: https://lkml.kernel.org/r/20240227023546.2490667-1-changbin.du@huawei.com Fixes: 1a7b7d922081 ("modules: Use vmalloc special flag") Signed-off-by: Changbin Du Tested-by: Eric Chanudet Acked-by: Luis Chamberlain Cc: Xiaoyi Su Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- include/linux/moduleloader.h | 8 ++++++++ init/main.c | 5 +++-- kernel/module/main.c | 9 +++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1322652a9d0d9..7dc186ec52a29 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -95,6 +95,14 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +#ifdef CONFIG_MODULES +void flush_module_init_free_work(void); +#else +static inline void flush_module_init_free_work(void) +{ +} +#endif + /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); diff --git a/init/main.c b/init/main.c index 87a52bdb41d67..ccde19e7275fa 100644 --- a/init/main.c +++ b/init/main.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -1473,11 +1474,11 @@ static void mark_readonly(void) if (rodata_enabled) { /* * load_module() results in W+X mappings, which are cleaned - * up with call_rcu(). Let's make sure that queued work is + * up with init_free_wq. Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier(); + flush_module_init_free_work(); mark_rodata_ro(); rodata_test(); } else diff --git a/kernel/module/main.c b/kernel/module/main.c index 7a376e26de85b..554aba47ab689 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2434,6 +2434,11 @@ static void do_free_init(struct work_struct *w) } } +void flush_module_init_free_work(void) +{ + flush_work(&init_free_wq); +} + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "module." /* Default value for module->async_probe_requested */ @@ -2524,8 +2529,8 @@ static noinline int do_init_module(struct module *mod) * Note that module_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to - * be cleaned up needs to sync with the queued work - ie - * rcu_barrier() + * be cleaned up needs to sync with the queued work by invoking + * flush_module_init_free_work(). */ if (llist_add(&freeinit->node, &init_free_list)) schedule_work(&init_free_wq); -- GitLab From c2c32faae82fd08b27b2f8b782acefe949638774 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Mar 2024 23:34:08 +1100 Subject: [PATCH 1070/2290] powerpc/embedded6xx: Fix no previous prototype for avr_uart_send() etc. [ Upstream commit 20933531be0577cdd782216858c26150dbc7936f ] Move the prototypes into mpc10x.h which is included by all the relevant C files, fixes: arch/powerpc/platforms/embedded6xx/ls_uart.c:59:6: error: no previous prototype for 'avr_uart_configure' arch/powerpc/platforms/embedded6xx/ls_uart.c:82:6: error: no previous prototype for 'avr_uart_send' Signed-off-by: Michael Ellerman Link: https://msgid.link/20240305123410.3306253-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/platforms/embedded6xx/linkstation.c | 3 --- arch/powerpc/platforms/embedded6xx/mpc10x.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 1830e1ac1f8f0..107a8b60ad0c9 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -99,9 +99,6 @@ static void __init linkstation_init_IRQ(void) mpic_init(mpic); } -extern void avr_uart_configure(void); -extern void avr_uart_send(const char); - static void __noreturn linkstation_restart(char *cmd) { local_irq_disable(); diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h index 5ad12023e5628..ebc258fa4858d 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc10x.h +++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h @@ -156,4 +156,7 @@ int mpc10x_disable_store_gathering(struct pci_controller *hose); /* For MPC107 boards that use the built-in openpic */ void mpc10x_set_openpic(void); +void avr_uart_configure(void); +void avr_uart_send(const char c); + #endif /* __PPC_KERNEL_MPC10X_H */ -- GitLab From d1f384e4c2011a90c2025fabe95476c6fdb6888d Mon Sep 17 00:00:00 2001 From: George Stark Date: Thu, 14 Dec 2023 20:36:05 +0300 Subject: [PATCH 1071/2290] leds: aw2013: Unlock mutex before destroying it [ Upstream commit 6969d0a2ba1adc9ba6a49b9805f24080896c255c ] In the probe() callback in case of error mutex is destroyed being locked which is not allowed so unlock the mutex before destroying. Fixes: 59ea3c9faf32 ("leds: add aw2013 driver") Signed-off-by: George Stark Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231214173614.2820929-2-gnstark@salutedevices.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/leds/leds-aw2013.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/leds/leds-aw2013.c b/drivers/leds/leds-aw2013.c index 0b52fc9097c6e..3c05958578a1c 100644 --- a/drivers/leds/leds-aw2013.c +++ b/drivers/leds/leds-aw2013.c @@ -397,6 +397,7 @@ error_reg: regulator_disable(chip->vcc_regulator); error: + mutex_unlock(&chip->mutex); mutex_destroy(&chip->mutex); return ret; } -- GitLab From dd7f2d0f6187da4655820d4daf28fe935c79c423 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 17 Feb 2024 20:11:30 +0100 Subject: [PATCH 1072/2290] leds: sgm3140: Add missing timer cleanup and flash gpio control [ Upstream commit 205c29887a333ee4b37596e6533373e38cb23947 ] Enabling strobe and then setting brightness to 0 causes the driver to enter invalid state after strobe end timer fires. We should cancel strobe mode resources when changing brightness (aka torch mode). Fixes: cef8ec8cbd21 ("leds: add sgm3140 driver") Signed-off-by: Ondrej Jirman Link: https://lore.kernel.org/r/20240217191133.1757553-1-megi@xff.cz Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/leds/flash/leds-sgm3140.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/leds/flash/leds-sgm3140.c b/drivers/leds/flash/leds-sgm3140.c index d3a30ad94ac46..dd5d327c52a10 100644 --- a/drivers/leds/flash/leds-sgm3140.c +++ b/drivers/leds/flash/leds-sgm3140.c @@ -114,8 +114,11 @@ static int sgm3140_brightness_set(struct led_classdev *led_cdev, "failed to enable regulator: %d\n", ret); return ret; } + gpiod_set_value_cansleep(priv->flash_gpio, 0); gpiod_set_value_cansleep(priv->enable_gpio, 1); } else { + del_timer_sync(&priv->powerdown_timer); + gpiod_set_value_cansleep(priv->flash_gpio, 0); gpiod_set_value_cansleep(priv->enable_gpio, 0); ret = regulator_disable(priv->vin_regulator); if (ret) { -- GitLab From 491ec4f4544373277b4e7ca35525d0d167d6c8a2 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 20 Feb 2024 00:11:19 +0100 Subject: [PATCH 1073/2290] backlight: lm3630a: Initialize backlight_properties on init [ Upstream commit ad9aeb0e3aa90ebdad5fabf9c21783740eb95907 ] The backlight_properties struct should be initialized to zero before using, otherwise there will be some random values in the struct. Fixes: 0c2a665a648e ("backlight: add Backlight driver for lm3630 chip") Signed-off-by: Luca Weiss Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220-lm3630a-fixups-v1-1-9ca62f7e4a33@z3ntu.xyz Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lm3630a_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c index 475f35635bf67..855c275754578 100644 --- a/drivers/video/backlight/lm3630a_bl.c +++ b/drivers/video/backlight/lm3630a_bl.c @@ -339,6 +339,7 @@ static int lm3630a_backlight_register(struct lm3630a_chip *pchip) struct backlight_properties props; const char *label; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; if (pdata->leda_ctrl != LM3630A_LEDA_DISABLE) { props.brightness = pdata->leda_init_brt; -- GitLab From d6e321dcd7d42c492b305db50376bfa070b9b0cd Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 20 Feb 2024 00:11:20 +0100 Subject: [PATCH 1074/2290] backlight: lm3630a: Don't set bl->props.brightness in get_brightness [ Upstream commit 4bf7ddd2d2f0f8826f25f74c7eba4e2c323a1446 ] There's no need to set bl->props.brightness, the get_brightness function is just supposed to return the current brightness and not touch the struct. With that done we can also remove the 'goto out' and just return the value. Fixes: 0c2a665a648e ("backlight: add Backlight driver for lm3630 chip") Signed-off-by: Luca Weiss Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220-lm3630a-fixups-v1-2-9ca62f7e4a33@z3ntu.xyz Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lm3630a_bl.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c index 855c275754578..0d43f6326750f 100644 --- a/drivers/video/backlight/lm3630a_bl.c +++ b/drivers/video/backlight/lm3630a_bl.c @@ -231,7 +231,7 @@ static int lm3630a_bank_a_get_brightness(struct backlight_device *bl) if (rval < 0) goto out_i2c_err; brightness |= rval; - goto out; + return brightness; } /* disable sleep */ @@ -242,11 +242,8 @@ static int lm3630a_bank_a_get_brightness(struct backlight_device *bl) rval = lm3630a_read(pchip, REG_BRT_A); if (rval < 0) goto out_i2c_err; - brightness = rval; + return rval; -out: - bl->props.brightness = brightness; - return bl->props.brightness; out_i2c_err: dev_err(pchip->dev, "i2c failed to access register\n"); return 0; @@ -306,7 +303,7 @@ static int lm3630a_bank_b_get_brightness(struct backlight_device *bl) if (rval < 0) goto out_i2c_err; brightness |= rval; - goto out; + return brightness; } /* disable sleep */ @@ -317,11 +314,8 @@ static int lm3630a_bank_b_get_brightness(struct backlight_device *bl) rval = lm3630a_read(pchip, REG_BRT_B); if (rval < 0) goto out_i2c_err; - brightness = rval; + return rval; -out: - bl->props.brightness = brightness; - return bl->props.brightness; out_i2c_err: dev_err(pchip->dev, "i2c failed to access register\n"); return 0; -- GitLab From 6d7e897358fc1f15d25490376d9d4a3a57bc9782 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 20 Feb 2024 15:35:24 +0000 Subject: [PATCH 1075/2290] backlight: da9052: Fully initialize backlight_properties during probe [ Upstream commit 0285e9efaee8276305db5c52a59baf84e9731556 ] props is stack allocated and the fields that are not explcitly set by the probe function need to be zeroed or we'll get undefined behaviour (especially so power/blank states)! Fixes: 6ede3d832aaa ("backlight: add driver for DA9052/53 PMIC v1") Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220153532.76613-2-daniel.thompson@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/da9052_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/da9052_bl.c b/drivers/video/backlight/da9052_bl.c index 882359dd288c0..aa00379392a0f 100644 --- a/drivers/video/backlight/da9052_bl.c +++ b/drivers/video/backlight/da9052_bl.c @@ -117,6 +117,7 @@ static int da9052_backlight_probe(struct platform_device *pdev) wleds->led_reg = platform_get_device_id(pdev)->driver_data; wleds->state = DA9052_WLEDS_OFF; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.max_brightness = DA9052_MAX_BRIGHTNESS; -- GitLab From 693c1edfcf218814c6880e95b37496832446beb7 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 20 Feb 2024 15:35:25 +0000 Subject: [PATCH 1076/2290] backlight: lm3639: Fully initialize backlight_properties during probe [ Upstream commit abb5a5d951fbea3feb5c4ba179b89bb96a1d3462 ] props is stack allocated and the fields that are not explcitly set by the probe function need to be zeroed or we'll get undefined behaviour (especially so power/blank states)! Fixes: 0f59858d5119 ("backlight: add new lm3639 backlight driver") Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220153532.76613-3-daniel.thompson@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lm3639_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/lm3639_bl.c b/drivers/video/backlight/lm3639_bl.c index 6580911671a3e..4c9726a7fa720 100644 --- a/drivers/video/backlight/lm3639_bl.c +++ b/drivers/video/backlight/lm3639_bl.c @@ -339,6 +339,7 @@ static int lm3639_probe(struct i2c_client *client, } /* backlight */ + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.brightness = pdata->init_brt_led; props.max_brightness = pdata->max_brt_led; -- GitLab From 765f673c0eb9115d08689c89a1cc1acf1c2992c7 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 20 Feb 2024 15:35:26 +0000 Subject: [PATCH 1077/2290] backlight: lp8788: Fully initialize backlight_properties during probe [ Upstream commit 392346827fbe8a7fd573dfb145170d7949f639a6 ] props is stack allocated and the fields that are not explcitly set by the probe function need to be zeroed or we'll get undefined behaviour (especially so power/blank states)! Fixes: c5a51053cf3b ("backlight: add new lp8788 backlight driver") Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20240220153532.76613-4-daniel.thompson@linaro.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lp8788_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c index ba42f3fe0c739..d9b95dbd40d30 100644 --- a/drivers/video/backlight/lp8788_bl.c +++ b/drivers/video/backlight/lp8788_bl.c @@ -191,6 +191,7 @@ static int lp8788_backlight_register(struct lp8788_bl *bl) int init_brt; char *name; + memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_PLATFORM; props.max_brightness = MAX_BRIGHTNESS; -- GitLab From faa2ba4cebdfc58f583cad52348d0a491d6df6f8 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 24 Feb 2024 18:42:28 +0100 Subject: [PATCH 1078/2290] sparc32: Fix section mismatch in leon_pci_grpci [ Upstream commit 24338a6ae13cb743ced77da1b3a12c83f08a0c96 ] Passing a datastructre marked _initconst to platform_driver_register() is wrong. Drop the __initconst notation. This fixes the following warnings: WARNING: modpost: vmlinux: section mismatch in reference: grpci1_of_driver+0x30 (section: .data) -> grpci1_of_match (section: .init.rodata) WARNING: modpost: vmlinux: section mismatch in reference: grpci2_of_driver+0x30 (section: .data) -> grpci2_of_match (section: .init.rodata) Signed-off-by: Sam Ravnborg Cc: "David S. Miller" Cc: Andreas Larsson Fixes: 4154bb821f0b ("sparc: leon: grpci1: constify of_device_id") Fixes: 03949b1cb9f1 ("sparc: leon: grpci2: constify of_device_id") Tested-by: Randy Dunlap # build-tested Reviewed-by: Andreas Larsson Tested-by: Andreas Larsson Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240224-sam-fix-sparc32-all-builds-v2-7-1f186603c5c4@ravnborg.org Signed-off-by: Sasha Levin --- arch/sparc/kernel/leon_pci_grpci1.c | 2 +- arch/sparc/kernel/leon_pci_grpci2.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c index e6935d0ac1ec9..c32590bdd3120 100644 --- a/arch/sparc/kernel/leon_pci_grpci1.c +++ b/arch/sparc/kernel/leon_pci_grpci1.c @@ -696,7 +696,7 @@ err1: return err; } -static const struct of_device_id grpci1_of_match[] __initconst = { +static const struct of_device_id grpci1_of_match[] = { { .name = "GAISLER_PCIFBRG", }, diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index ca22f93d90454..dd06abc61657f 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -887,7 +887,7 @@ err1: return err; } -static const struct of_device_id grpci2_of_match[] __initconst = { +static const struct of_device_id grpci2_of_match[] = { { .name = "GAISLER_GRPCI2", }, -- GitLab From a8b2b26fdd011ebe36d68a9a321ca45801685959 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Sat, 2 Mar 2024 00:52:14 +0000 Subject: [PATCH 1079/2290] clk: Fix clk_core_get NULL dereference [ Upstream commit e97fe4901e0f59a0bfd524578fe3768f8ca42428 ] It is possible for clk_core_get to dereference a NULL in the following sequence: clk_core_get() of_clk_get_hw_from_clkspec() __of_clk_get_hw_from_provider() __clk_get_hw() __clk_get_hw() can return NULL which is dereferenced by clk_core_get() at hw->core. Prior to commit dde4eff47c82 ("clk: Look for parents with clkdev based clk_lookups") the check IS_ERR_OR_NULL() was performed which would have caught the NULL. Reading the description of this function it talks about returning NULL but that cannot be so at the moment. Update the function to check for hw before dereferencing it and return NULL if hw is NULL. Fixes: dde4eff47c82 ("clk: Look for parents with clkdev based clk_lookups") Signed-off-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20240302-linux-next-24-03-01-simple-clock-fixes-v1-1-25f348a5982b@linaro.org Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 33fedbd096f33..9004e07182259 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -407,6 +407,9 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) if (IS_ERR(hw)) return ERR_CAST(hw); + if (!hw) + return NULL; + return hw->core; } -- GitLab From 8c4889a9ea861d7be37463c10846eb75e1b49c9d Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 1 Mar 2024 16:44:37 +0800 Subject: [PATCH 1080/2290] clk: zynq: Prevent null pointer dereference caused by kmalloc failure [ Upstream commit 7938e9ce39d6779d2f85d822cc930f73420e54a6 ] The kmalloc() in zynq_clk_setup() will return null if the physical memory has run out. As a result, if we use snprintf() to write data to the null address, the null pointer dereference bug will happen. This patch uses a stack variable to replace the kmalloc(). Fixes: 0ee52b157b8e ("clk: zynq: Add clock controller driver") Suggested-by: Michal Simek Suggested-by: Stephen Boyd Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20240301084437.16084-1-duoming@zju.edu.cn Acked-by: Michal Simek Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/zynq/clkc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c index 7bdeaff2bfd68..c28d3dacf0fb2 100644 --- a/drivers/clk/zynq/clkc.c +++ b/drivers/clk/zynq/clkc.c @@ -42,6 +42,7 @@ static void __iomem *zynq_clkc_base; #define SLCR_SWDT_CLK_SEL (zynq_clkc_base + 0x204) #define NUM_MIO_PINS 54 +#define CLK_NAME_LEN 16 #define DBG_CLK_CTRL_CLKACT_TRC BIT(0) #define DBG_CLK_CTRL_CPU_1XCLKACT BIT(1) @@ -215,7 +216,7 @@ static void __init zynq_clk_setup(struct device_node *np) int i; u32 tmp; int ret; - char *clk_name; + char clk_name[CLK_NAME_LEN]; unsigned int fclk_enable = 0; const char *clk_output_name[clk_max]; const char *cpu_parents[4]; @@ -426,12 +427,10 @@ static void __init zynq_clk_setup(struct device_node *np) "gem1_emio_mux", CLK_SET_RATE_PARENT, SLCR_GEM1_CLK_CTRL, 0, 0, &gem1clk_lock); - tmp = strlen("mio_clk_00x"); - clk_name = kmalloc(tmp, GFP_KERNEL); for (i = 0; i < NUM_MIO_PINS; i++) { int idx; - snprintf(clk_name, tmp, "mio_clk_%2.2d", i); + snprintf(clk_name, CLK_NAME_LEN, "mio_clk_%2.2d", i); idx = of_property_match_string(np, "clock-names", clk_name); if (idx >= 0) can_mio_mux_parents[i] = of_clk_get_parent_name(np, @@ -439,7 +438,6 @@ static void __init zynq_clk_setup(struct device_node *np) else can_mio_mux_parents[i] = dummy_nm; } - kfree(clk_name); clk_register_mux(NULL, "can_mux", periph_parents, 4, CLK_SET_RATE_NO_REPARENT, SLCR_CAN_CLK_CTRL, 4, 2, 0, &canclk_lock); -- GitLab From fd58b4e3c0c68ce51bdcf85adbe0b21e768b7088 Mon Sep 17 00:00:00 2001 From: Athaariq Ardhiansyah Date: Sun, 10 Mar 2024 20:58:44 +0700 Subject: [PATCH 1081/2290] ALSA: hda/realtek: fix ALC285 issues on HP Envy x360 laptops [ Upstream commit c062166995c9e57d5cd508b332898f79da319802 ] Realtek codec on HP Envy laptop series are heavily modified by vendor. Therefore, need intervention to make it work properly. The patch fixes: - B&O soundbar speakers (between lid and keyboard) activation - Enable LED on mute button - Add missing process coefficient which affects the output amplifier - Volume control synchronization between B&O soundbar and side speakers - Unmute headset output on several HP Envy models - Auto-enable headset mic when plugged This patch was tested on HP Envy x360 13-AR0107AU with Realtek ALC285 The only unsolved problem is output amplifier of all built-in speakers is too weak, which causes volume of built-in speakers cannot be loud as vendor's proprietary driver due to missing _DSD parameter in the firmware. The solution is currently on research. Expected to has another patch in the future. Potential fix to related issues, need test before close those issues: - https://bugzilla.kernel.org/show_bug.cgi?id=189331 - https://bugzilla.kernel.org/show_bug.cgi?id=216632 - https://bugzilla.kernel.org/show_bug.cgi?id=216311 - https://bugzilla.kernel.org/show_bug.cgi?id=213507 Signed-off-by: Athaariq Ardhiansyah Message-ID: <20240310140249.3695-1-foss@athaariq.my.id> Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 63 +++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ede3f8b273d79..6e759032eba2e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6693,6 +6693,60 @@ static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, } } +static void alc285_fixup_hp_envy_x360(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + static const struct coef_fw coefs[] = { + WRITE_COEF(0x08, 0x6a0c), WRITE_COEF(0x0d, 0xa023), + WRITE_COEF(0x10, 0x0320), WRITE_COEF(0x1a, 0x8c03), + WRITE_COEF(0x25, 0x1800), WRITE_COEF(0x26, 0x003a), + WRITE_COEF(0x28, 0x1dfe), WRITE_COEF(0x29, 0xb014), + WRITE_COEF(0x2b, 0x1dfe), WRITE_COEF(0x37, 0xfe15), + WRITE_COEF(0x38, 0x7909), WRITE_COEF(0x45, 0xd489), + WRITE_COEF(0x46, 0x00f4), WRITE_COEF(0x4a, 0x21e0), + WRITE_COEF(0x66, 0x03f0), WRITE_COEF(0x67, 0x1000), + WRITE_COEF(0x6e, 0x1005), { } + }; + + static const struct hda_pintbl pincfgs[] = { + { 0x12, 0xb7a60130 }, /* Internal microphone*/ + { 0x14, 0x90170150 }, /* B&O soundbar speakers */ + { 0x17, 0x90170153 }, /* Side speakers */ + { 0x19, 0x03a11040 }, /* Headset microphone */ + { } + }; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_apply_pincfgs(codec, pincfgs); + + /* Fixes volume control problem for side speakers */ + alc295_fixup_disable_dac3(codec, fix, action); + + /* Fixes no sound from headset speaker */ + snd_hda_codec_amp_stereo(codec, 0x21, HDA_OUTPUT, 0, -1, 0); + + /* Auto-enable headset mic when plugged */ + snd_hda_jack_set_gating_jack(codec, 0x19, 0x21); + + /* Headset mic volume enhancement */ + snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREF50); + break; + case HDA_FIXUP_ACT_INIT: + alc_process_coef_fw(codec, coefs); + break; + case HDA_FIXUP_ACT_BUILD: + rename_ctl(codec, "Bass Speaker Playback Volume", + "B&O-Tuned Playback Volume"); + rename_ctl(codec, "Front Playback Switch", + "B&O Soundbar Playback Switch"); + rename_ctl(codec, "Bass Speaker Playback Switch", + "Side Speaker Playback Switch"); + break; + } +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -7131,6 +7185,7 @@ enum { ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, ALC285_FIXUP_HP_SPECTRE_X360_EB1, + ALC285_FIXUP_HP_ENVY_X360, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13, @@ -9054,6 +9109,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360_eb1 }, + [ALC285_FIXUP_HP_ENVY_X360] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_envy_x360, + .chained = true, + .chain_id = ALC285_FIXUP_HP_GPIO_AMP_INIT, + }, [ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_ideapad_s740_coef, @@ -9595,6 +9656,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), @@ -10249,6 +10311,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, + {.id = ALC285_FIXUP_HP_ENVY_X360, .name = "alc285-hp-envy-x360"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, .name = "alc287-yoga9-bass-spk-pin"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, -- GitLab From 629af0d5fe94a35f498ba2c3f19bd78bfa591be6 Mon Sep 17 00:00:00 2001 From: Johan Carlsson Date: Wed, 13 Mar 2024 09:15:09 +0100 Subject: [PATCH 1082/2290] ALSA: usb-audio: Stop parsing channels bits when all channels are found. [ Upstream commit a39d51ff1f52cd0b6fe7d379ac93bd8b4237d1b7 ] If a usb audio device sets more bits than the amount of channels it could write outside of the map array. Signed-off-by: Johan Carlsson Fixes: 04324ccc75f9 ("ALSA: usb-audio: add channel map support") Message-ID: <20240313081509.9801-1-johan.carlsson@teenage.engineering> Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/stream.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 3d4add94e367d..d5409f3879455 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -300,9 +300,12 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, c = 0; if (bits) { - for (; bits && *maps; maps++, bits >>= 1) + for (; bits && *maps; maps++, bits >>= 1) { if (bits & 1) chmap->map[c++] = *maps; + if (c == chmap->channels) + break; + } } else { /* If we're missing wChannelConfig, then guess something to make sure the channel map is not skipped entirely */ -- GitLab From 0606bedcc373696c4c7d637dc30ed50725d184f5 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 25 Jul 2023 10:55:24 -0500 Subject: [PATCH 1083/2290] RDMA/irdma: Allow accurate reporting on QP max send/recv WR [ Upstream commit 3a8498720450174b8db450d3375a04dca81b3534 ] Currently the attribute cap.max_send_wr and cap.max_recv_wr sent from user-space during create QP are the provider computed SQ/RQ depth as opposed to raw values passed from application. This inhibits computation of an accurate value for max_send_wr and max_recv_wr for this QP in the kernel which matches the value returned in user create QP. Also these capabilities needs to be reported from the driver in query QP. Add support by extending the ABI to allow the raw cap.max_send_wr and cap.max_recv_wr to be passed from user-space, while keeping compatibility for the older scheme. The internal HW depth and shift needed for the WQs needs to be computed now for both kernel and user-mode QPs. Add new helpers to assist with this: irdma_uk_calc_depth_shift_sq, irdma_uk_calc_depth_shift_rq and irdma_uk_calc_depth_shift_wq. Consolidate all the user mode QP setup into a new function irdma_setup_umode_qp which keeps it with its counterpart irdma_setup_kmode_qp. Signed-off-by: Youvaraj Sagar Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230725155525.1081-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky Stable-dep-of: 926e8ea4b8da ("RDMA/irdma: Remove duplicate assignment") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/uk.c | 89 +++++++++++--- drivers/infiniband/hw/irdma/user.h | 10 ++ drivers/infiniband/hw/irdma/verbs.c | 182 +++++++++++++++++----------- drivers/infiniband/hw/irdma/verbs.h | 3 +- include/uapi/rdma/irdma-abi.h | 6 + 5 files changed, 203 insertions(+), 87 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 280d633d4ec4f..d691cdef5e9a3 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1414,6 +1414,78 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); } +/** + * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ + * @ukinfo: qp initialization info + * @sq_shift: Returns shift of SQ + * @rq_shift: Returns shift of RQ + */ +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } +} + +/** + * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. + * @ukinfo: qp initialization info + * @sq_depth: Returns depth of SQ + * @sq_shift: Returns shift of SQ + */ +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size, + *sq_shift, sq_depth); + + return status; +} + +/** + * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size. + * @ukinfo: qp initialization info + * @rq_depth: Returns depth of RQ + * @rq_shift: Returns shift of RQ + */ +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift) +{ + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } + + status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size, + *rq_shift, rq_depth); + + return status; +} + /** * irdma_uk_qp_init - initialize shared qp * @qp: hw qp (user and kernel) @@ -1428,23 +1500,12 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { int ret_code = 0; u32 sq_ring_size; - u8 sqshift, rqshift; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) return -EINVAL; - irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift); - if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt, - info->max_inline_data, &sqshift); - if (info->abi_ver > 4) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - } else { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt + 1, - info->max_inline_data, &sqshift); - } qp->qp_caps = info->qp_caps; qp->sq_base = info->sq; qp->rq_base = info->rq; @@ -1458,7 +1519,7 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->sq_size = info->sq_size; qp->push_mode = false; qp->max_sq_frag_cnt = info->max_sq_frag_cnt; - sq_ring_size = qp->sq_size << sqshift; + sq_ring_size = qp->sq_size << info->sq_shift; IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); if (info->first_sq_wq) { @@ -1473,9 +1534,9 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->rq_size = info->rq_size; qp->max_rq_frag_cnt = info->max_rq_frag_cnt; qp->max_inline_data = info->max_inline_data; - qp->rq_wqe_size = rqshift; + qp->rq_wqe_size = info->rq_shift; IRDMA_RING_INIT(qp->rq_ring, qp->rq_size); - qp->rq_wqe_size_multiplier = 1 << rqshift; + qp->rq_wqe_size_multiplier = 1 << info->rq_shift; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) qp->wqe_ops = iw_wqe_uk_ops_gen_1; else diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index d0cdf609f5e06..1e0e1a71dbada 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -295,6 +295,12 @@ void irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift); +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift); +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; @@ -374,8 +380,12 @@ struct irdma_qp_uk_init_info { u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; + u32 sq_depth; + u32 rq_depth; u8 first_sq_wq; u8 type; + u8 sq_shift; + u8 rq_shift; int abi_ver; bool legacy_mode; }; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 42c671f209233..bb423849968d9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -277,7 +277,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct irdma_alloc_ucontext_req req = {}; struct irdma_alloc_ucontext_resp uresp = {}; struct irdma_ucontext *ucontext = to_ucontext(uctx); - struct irdma_uk_attrs *uk_attrs; + struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) @@ -292,7 +292,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; - uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; + if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) + ucontext->use_raw_attrs = true; + /* GEN_1 legacy support with libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) @@ -327,6 +329,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; + uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); @@ -566,6 +569,86 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev, } } +/** + * irdma_setup_umode_qp - setup sq and rq size in user mode qp + * @iwdev: iwarp device + * @iwqp: qp ptr (user or kernel) + * @info: initialize info to return + * @init_attr: Initial QP create attributes + */ +static int irdma_setup_umode_qp(struct ib_udata *udata, + struct irdma_device *iwdev, + struct irdma_qp *iwqp, + struct irdma_qp_init_info *info, + struct ib_qp_init_attr *init_attr) +{ + struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct irdma_ucontext, ibucontext); + struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; + struct irdma_create_qp_req req; + unsigned long flags; + int ret; + + ret = ib_copy_from_udata(&req, udata, + min(sizeof(req), udata->inlen)); + if (ret) { + ibdev_dbg(&iwdev->ibdev, "VERBS: ib_copy_from_data fail\n"); + return ret; + } + + iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; + iwqp->user_mode = 1; + if (req.user_wqe_bufs) { + info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode; + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); + iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); + + if (!iwqp->iwpbl) { + ret = -ENODATA; + ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); + return ret; + } + } + + if (!ucontext->use_raw_attrs) { + /** + * Maintain backward compat with older ABI which passes sq and + * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr. + * There is no way to compute the correct value of + * iwqp->max_send_wr/max_recv_wr in the kernel. + */ + iwqp->max_send_wr = init_attr->cap.max_send_wr; + iwqp->max_recv_wr = init_attr->cap.max_recv_wr; + ukinfo->sq_size = init_attr->cap.max_send_wr; + ukinfo->rq_size = init_attr->cap.max_recv_wr; + irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift, + &ukinfo->rq_shift); + } else { + ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); + if (ret) + return ret; + + ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); + if (ret) + return ret; + + iwqp->max_send_wr = + (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = + (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; + } + + irdma_setup_virt_qp(iwdev, iwqp, info); + + return 0; +} + /** * irdma_setup_kmode_qp - setup initialization for kernel mode qp * @iwdev: iwarp device @@ -579,40 +662,28 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, struct ib_qp_init_attr *init_attr) { struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem; - u32 sqdepth, rqdepth; - u8 sqshift, rqshift; u32 size; int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; - struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; - irdma_get_wqe_shift(uk_attrs, - uk_attrs->hw_rev >= IRDMA_GEN_2 ? ukinfo->max_sq_frag_cnt + 1 : - ukinfo->max_sq_frag_cnt, - ukinfo->max_inline_data, &sqshift); - status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift, - &sqdepth); + status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); if (status) return status; - if (uk_attrs->hw_rev == IRDMA_GEN_1) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - else - irdma_get_wqe_shift(uk_attrs, ukinfo->max_rq_frag_cnt, 0, - &rqshift); - - status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift, - &rqdepth); + status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); if (status) return status; iwqp->kqp.sq_wrid_mem = - kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.sq_wrid_mem) return -ENOMEM; iwqp->kqp.rq_wrid_mem = - kcalloc(rqdepth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + if (!iwqp->kqp.rq_wrid_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; @@ -622,7 +693,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem; ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem; - size = (sqdepth + rqdepth) * IRDMA_QP_WQE_MIN_SIZE; + size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE; size += (IRDMA_SHADOW_AREA_SIZE << 3); mem->size = ALIGN(size, 256); @@ -638,16 +709,19 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq = mem->va; info->sq_pa = mem->pa; - ukinfo->rq = &ukinfo->sq[sqdepth]; - info->rq_pa = info->sq_pa + (sqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->shadow_area = ukinfo->rq[rqdepth].elem; - info->shadow_area_pa = info->rq_pa + (rqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->sq_size = sqdepth >> sqshift; - ukinfo->rq_size = rqdepth >> rqshift; + ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth]; + info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem; + info->shadow_area_pa = + info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; ukinfo->qp_id = iwqp->ibqp.qp_num; - init_attr->cap.max_send_wr = (sqdepth - IRDMA_SQ_RSVD) >> sqshift; - init_attr->cap.max_recv_wr = (rqdepth - IRDMA_RQ_RSVD) >> rqshift; + iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + init_attr->cap.max_send_wr = iwqp->max_send_wr; + init_attr->cap.max_recv_wr = iwqp->max_recv_wr; return 0; } @@ -805,18 +879,14 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp = to_iwqp(ibqp); - struct irdma_create_qp_req req = {}; struct irdma_create_qp_resp uresp = {}; u32 qp_num = 0; int err_code; - int sq_size; - int rq_size; struct irdma_sc_qp *qp; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {}; struct irdma_qp_host_ctx_info *ctx_info; - unsigned long flags; err_code = irdma_validate_qp_attrs(init_attr, iwdev); if (err_code) @@ -826,13 +896,10 @@ static int irdma_create_qp(struct ib_qp *ibqp, udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) return -EINVAL; - sq_size = init_attr->cap.max_send_wr; - rq_size = init_attr->cap.max_recv_wr; - init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.uk_attrs = uk_attrs; - init_info.qp_uk_init_info.sq_size = sq_size; - init_info.qp_uk_init_info.rq_size = rq_size; + init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr; + init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; @@ -882,36 +949,9 @@ static int irdma_create_qp(struct ib_qp *ibqp, init_waitqueue_head(&iwqp->mod_qp_waitq); if (udata) { - err_code = ib_copy_from_udata(&req, udata, - min(sizeof(req), udata->inlen)); - if (err_code) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: ib_copy_from_data fail\n"); - goto error; - } - - iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - iwqp->user_mode = 1; - if (req.user_wqe_bufs) { - struct irdma_ucontext *ucontext = - rdma_udata_to_drv_context(udata, - struct irdma_ucontext, - ibucontext); - - init_info.qp_uk_init_info.legacy_mode = ucontext->legacy_mode; - spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); - - if (!iwqp->iwpbl) { - err_code = -ENODATA; - ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); - goto error; - } - } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - irdma_setup_virt_qp(iwdev, iwqp, &init_info); + err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, + init_attr); } else { INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; @@ -966,8 +1006,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; rf->qp_table[qp_num] = iwqp; - iwqp->max_send_wr = sq_size; - iwqp->max_recv_wr = rq_size; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (dev->ws_add(&iwdev->vsi, 0)) { @@ -988,8 +1026,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) uresp.lsmm = 1; } - uresp.actual_sq_size = sq_size; - uresp.actual_rq_size = rq_size; + uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size; + uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size; uresp.qp_id = qp_num; uresp.qp_caps = qp->qp_uk.qp_caps; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 9f9e273bbff3e..0bc0d0faa0868 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -18,7 +18,8 @@ struct irdma_ucontext { struct list_head qp_reg_mem_list; spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */ int abi_ver; - bool legacy_mode; + u8 legacy_mode : 1; + u8 use_raw_attrs : 1; }; struct irdma_pd { diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index a7085e092d348..3a0cde4dcf331 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -22,10 +22,15 @@ enum irdma_memreg_type { IRDMA_MEMREG_TYPE_CQ = 2, }; +enum { + IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, +}; + struct irdma_alloc_ucontext_req { __u32 rsvd32; __u8 userspace_ver; __u8 rsvd8[3]; + __aligned_u64 comp_mask; }; struct irdma_alloc_ucontext_resp { @@ -46,6 +51,7 @@ struct irdma_alloc_ucontext_resp { __u16 max_hw_sq_chunk; __u8 hw_rev; __u8 rsvd2; + __aligned_u64 comp_mask; }; struct irdma_alloc_pd_resp { -- GitLab From ae771b805dfd6f09c0c43a5e85056ce8cfc77d42 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:39:53 -0600 Subject: [PATCH 1084/2290] RDMA/irdma: Remove duplicate assignment [ Upstream commit 926e8ea4b8dac84f6d14a4b60d0653f1f2ba9431 ] Remove the unneeded assignment of the qp_num which is already set in irdma_create_qp(). Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233953.400483-1-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index bb423849968d9..76c5f461faca0 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -716,7 +716,6 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; - ukinfo->qp_id = iwqp->ibqp.qp_num; iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; @@ -941,7 +940,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE; init_info.pd = &iwpd->sc_pd; - init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num; + init_info.qp_uk_init_info.qp_id = qp_num; if (!rdma_protocol_roce(&iwdev->ibdev, 1)) init_info.qp_uk_init_info.first_sq_wq = 1; iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; -- GitLab From e362d007294955a4fb929e1c8978154a64efdcb6 Mon Sep 17 00:00:00 2001 From: William Kucharski Date: Fri, 2 Feb 2024 02:15:49 -0700 Subject: [PATCH 1085/2290] RDMA/srpt: Do not register event handler until srpt device is fully setup [ Upstream commit c21a8870c98611e8f892511825c9607f1e2cd456 ] Upon rare occasions, KASAN reports a use-after-free Write in srpt_refresh_port(). This seems to be because an event handler is registered before the srpt device is fully setup and a race condition upon error may leave a partially setup event handler in place. Instead, only register the event handler after srpt device initialization is complete. Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: William Kucharski Link: https://lore.kernel.org/r/20240202091549.991784-2-william.kucharski@oracle.com Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index cffa93f114a73..fd6c260d5857d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3209,7 +3209,6 @@ static int srpt_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, srpt_event_handler); - ib_register_event_handler(&sdev->event_handler); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; @@ -3232,6 +3231,7 @@ static int srpt_add_one(struct ib_device *device) } } + ib_register_event_handler(&sdev->event_handler); spin_lock(&srpt_dev_lock); list_add_tail(&sdev->list, &srpt_dev_list); spin_unlock(&srpt_dev_lock); @@ -3242,7 +3242,6 @@ static int srpt_add_one(struct ib_device *device) err_port: srpt_unregister_mad_agent(sdev, i); - ib_unregister_event_handler(&sdev->event_handler); err_cm: if (sdev->cm_id) ib_destroy_cm_id(sdev->cm_id); -- GitLab From 492acea36b29499ae0348e033073ec21d6cb2746 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 2 Feb 2023 15:04:56 +0800 Subject: [PATCH 1086/2290] f2fs: reduce stack memory cost by using bitfield in struct f2fs_io_info [ Upstream commit 2eae077e6e46f9046d383631145750e043820dce ] This patch tries to use bitfield in struct f2fs_io_info to improve memory usage. struct f2fs_io_info { ... unsigned int need_lock:8; /* indicate we need to lock cp_rwsem */ unsigned int version:8; /* version of the node */ unsigned int submitted:1; /* indicate IO submission */ unsigned int in_list:1; /* indicate fio is in io_list */ unsigned int is_por:1; /* indicate IO is from recovery or not */ unsigned int retry:1; /* need to reallocate block address */ unsigned int encrypted:1; /* indicate file is encrypted */ unsigned int post_read:1; /* require post read */ ... }; After this patch, size of struct f2fs_io_info reduces from 136 to 120. [Nathan: fix a compile warning (single-bit-bitfield-constant-conversion)] Signed-off-by: Nathan Chancellor Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 8a430dd49e9c ("f2fs: compress: fix to guarantee persisting compressed blocks by CP") Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 6 +++--- fs/f2fs/compress.c | 5 +++-- fs/f2fs/data.c | 10 +++++----- fs/f2fs/f2fs.h | 18 +++++++++--------- fs/f2fs/gc.c | 8 ++++---- fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 6 +++--- 7 files changed, 28 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index eb4d69f53337f..3ec203bbd5593 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -70,7 +70,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_por = !is_meta, + .is_por = !is_meta ? 1 : 0, }; int err; @@ -234,8 +234,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op = REQ_OP_READ, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, - .in_list = false, - .is_por = (type == META_POR), + .in_list = 0, + .is_por = (type == META_POR) ? 1 : 0, }; struct blk_plug plug; int err; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 967262c37da52..7d85db7208e14 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1249,10 +1249,11 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .page = NULL, .encrypted_page = NULL, .compressed_page = NULL, - .submitted = false, + .submitted = 0, .io_type = io_type, .io_wbc = wbc, - .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode), + .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? + 1 : 0, }; struct dnode_of_data dn; struct node_info ni; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8b561af379743..7b3eb192f9c03 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -982,7 +982,7 @@ next: bio_page = fio->page; /* set submitted = true as a return value */ - fio->submitted = true; + fio->submitted = 1; inc_page_count(sbi, WB_DATA_TYPE(bio_page)); @@ -998,7 +998,7 @@ alloc_new: (fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { dec_page_count(sbi, WB_DATA_TYPE(bio_page)); - fio->retry = true; + fio->retry = 1; goto skip; } io->bio = __bio_alloc(fio, BIO_MAX_VECS); @@ -2776,10 +2776,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, - .submitted = false, + .submitted = 0, .compr_blocks = compr_blocks, .need_lock = LOCK_RETRY, - .post_read = f2fs_post_read_required(inode), + .post_read = f2fs_post_read_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, .bio = bio, @@ -2902,7 +2902,7 @@ out: } if (submitted) - *submitted = fio.submitted ? 1 : 0; + *submitted = fio.submitted; return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e5a9498b89c06..1d78bca5037f4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1183,19 +1183,19 @@ struct f2fs_io_info { struct page *encrypted_page; /* encrypted page */ struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ - bool submitted; /* indicate IO submission */ - int need_lock; /* indicate we need to lock cp_rwsem */ - bool in_list; /* indicate fio is in io_list */ - bool is_por; /* indicate IO is from recovery or not */ - bool retry; /* need to reallocate block address */ - int compr_blocks; /* # of compressed block addresses */ - bool encrypted; /* indicate file is encrypted */ - bool post_read; /* require post read */ + unsigned int compr_blocks; /* # of compressed block addresses */ + unsigned int need_lock:8; /* indicate we need to lock cp_rwsem */ + unsigned int version:8; /* version of the node */ + unsigned int submitted:1; /* indicate IO submission */ + unsigned int in_list:1; /* indicate fio is in io_list */ + unsigned int is_por:1; /* indicate IO is from recovery or not */ + unsigned int retry:1; /* need to reallocate block address */ + unsigned int encrypted:1; /* indicate file is encrypted */ + unsigned int post_read:1; /* require post read */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ sector_t *last_block; /* last block number in bio */ - unsigned char version; /* version of the node */ }; struct bio_entry { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ec7212f7a9b73..8161355658562 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1187,8 +1187,8 @@ static int ra_data_block(struct inode *inode, pgoff_t index) .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, - .in_list = false, - .retry = false, + .in_list = 0, + .retry = 0, }; int err; @@ -1276,8 +1276,8 @@ static int move_data_block(struct inode *inode, block_t bidx, .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, - .in_list = false, - .retry = false, + .in_list = 0, + .retry = 0, }; struct dnode_of_data dn; struct f2fs_summary sum; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c6d0e07096326..5db6740d31364 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1587,7 +1587,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, - .submitted = false, + .submitted = 0, .io_type = io_type, .io_wbc = wbc, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 16bf9d5c8d4f9..a3dabec1f216a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3312,10 +3312,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_bio_info *io; if (F2FS_IO_ALIGNED(sbi)) - fio->retry = false; + fio->retry = 0; INIT_LIST_HEAD(&fio->list); - fio->in_list = true; + fio->in_list = 1; io = sbi->write_io[fio->type] + fio->temp; spin_lock(&io->io_lock); list_add_tail(&fio->list, &io->io_list); @@ -3396,7 +3396,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, .new_blkaddr = page->index, .page = page, .encrypted_page = NULL, - .in_list = false, + .in_list = 0, }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) -- GitLab From e54cce8137258a550b49cae45d09e024821fb28d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:27 +0800 Subject: [PATCH 1087/2290] f2fs: compress: fix to guarantee persisting compressed blocks by CP [ Upstream commit 8a430dd49e9cb021372b0ad91e60aeef9c6ced00 ] If data block in compressed cluster is not persisted with metadata during checkpoint, after SPOR, the data may be corrupted, let's guarantee to write compressed page by checkpoint. Fixes: 4c8ff7095bef ("f2fs: support data compression") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 4 +++- fs/f2fs/data.c | 17 +++++++++-------- fs/f2fs/f2fs.h | 4 +++- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7d85db7208e14..459bf10f297a2 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1437,6 +1437,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = (struct compress_io_ctx *)page_private(page); + enum count_type type = WB_DATA_TYPE(page, + f2fs_is_compressed_page(page)); int i; if (unlikely(bio->bi_status)) @@ -1444,7 +1446,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) f2fs_compress_free_page(page); - dec_page_count(sbi, F2FS_WB_DATA); + dec_page_count(sbi, type); if (atomic_dec_return(&cic->pending_pages)) return; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7b3eb192f9c03..6f649a60859fc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -50,7 +50,7 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -static bool __is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; @@ -67,8 +67,6 @@ static bool __is_cp_guaranteed(struct page *page) S_ISDIR(inode->i_mode)) return true; - if (f2fs_is_compressed_page(page)) - return false; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || page_private_gcing(page)) return true; @@ -327,7 +325,7 @@ static void f2fs_write_end_io(struct bio *bio) bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; - enum count_type type = WB_DATA_TYPE(page); + enum count_type type = WB_DATA_TYPE(page, false); if (page_private_dummy(page)) { clear_page_private_dummy(page); @@ -733,7 +731,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page) : WB_DATA_TYPE(fio->page)); + __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); __submit_bio(fio->sbi, bio, fio->type); return 0; @@ -941,7 +939,7 @@ alloc_new: if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); - inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -955,6 +953,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; + enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -984,7 +983,8 @@ next: /* set submitted = true as a return value */ fio->submitted = 1; - inc_page_count(sbi, WB_DATA_TYPE(bio_page)); + type = WB_DATA_TYPE(bio_page, fio->compressed_page); + inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, @@ -997,7 +997,8 @@ alloc_new: if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); + dec_page_count(sbi, WB_DATA_TYPE(bio_page, + fio->compressed_page)); fio->retry = 1; goto skip; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1d78bca5037f4..16dacf811481c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1067,7 +1067,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(p, f) \ + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -3761,6 +3762,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); +bool f2fs_is_cp_guaranteed(struct page *page); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_bio(struct f2fs_sb_info *sbi, -- GitLab From 542c8b3c774a480bfd0804291a12f6f2391b0cd1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:28 +0800 Subject: [PATCH 1088/2290] f2fs: compress: fix to cover normal cluster write with cp_rwsem [ Upstream commit fd244524c2cf07b5f4c3fe8abd6a99225c76544b ] When we overwrite compressed cluster w/ normal cluster, we should not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data will be corrupted if partial blocks were persisted before CP & SPOR, due to cluster metadata wasn't updated atomically. Fixes: 4c8ff7095bef ("f2fs: support data compression") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 27 ++++++++++++++++++--------- fs/f2fs/data.c | 3 ++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 459bf10f297a2..553950962842a 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1462,12 +1462,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) } static int f2fs_write_raw_pages(struct compress_ctx *cc, - int *submitted, + int *submitted_p, struct writeback_control *wbc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret, i; + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + int submitted, compr_blocks, i; + int ret = 0; compr_blocks = f2fs_compressed_blocks(cc); @@ -1482,6 +1484,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, if (compr_blocks < 0) return compr_blocks; + /* overwrite compressed cluster w/ normal cluster */ + if (compr_blocks > 0) + f2fs_lock_op(sbi); + for (i = 0; i < cc->cluster_size; i++) { if (!cc->rpages[i]) continue; @@ -1506,7 +1512,7 @@ continue_unlock: if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + ret = f2fs_write_single_data_page(cc->rpages[i], &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { @@ -1514,26 +1520,29 @@ continue_unlock: unlock_page(cc->rpages[i]); ret = 0; } else if (ret == -EAGAIN) { + ret = 0; /* * for quota file, just redirty left pages to * avoid deadlock caused by cluster update race * from foreground operation. */ if (IS_NOQUOTA(cc->inode)) - return 0; - ret = 0; + goto out; f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); goto retry_write; } - return ret; + goto out; } - *submitted += _submitted; + *submitted_p += submitted; } - f2fs_balance_fs(F2FS_M_SB(mapping), true); +out: + if (compr_blocks > 0) + f2fs_unlock_op(sbi); - return 0; + f2fs_balance_fs(sbi, true); + return ret; } int f2fs_write_multi_pages(struct compress_ctx *cc, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6f649a60859fc..3f0ba71451c27 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2779,7 +2779,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, - .need_lock = LOCK_RETRY, + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, .post_read = f2fs_post_read_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, @@ -2859,6 +2859,7 @@ write: if (err == -EAGAIN) { err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { + f2fs_bug_on(sbi, compr_blocks); fio.need_lock = LOCK_REQ; err = f2fs_do_write_data_page(&fio); } -- GitLab From b5ecf59fbab99edb06a7387faa7949be631046e6 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sat, 13 Jan 2024 03:41:29 +0800 Subject: [PATCH 1089/2290] f2fs: compress: fix to check unreleased compressed cluster [ Upstream commit eb8fbaa53374e0a2d4381190abfe708481517bbb ] Compressed cluster may not be released due to we can fail in release_compress_blocks(), fix to handle reserved compressed cluster correctly in reserve_compress_blocks(). Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Sheng Yong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 46e4960a9dcf7..6c9f03e3be8e9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3609,7 +3609,13 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) goto next; } - if (__is_valid_data_blkaddr(blkaddr)) { + /* + * compressed cluster was not released due to it + * fails in release_compress_blocks(), so NEW_ADDR + * is a possible case. + */ + if (blkaddr == NEW_ADDR || + __is_valid_data_blkaddr(blkaddr)) { compr_blocks++; continue; } @@ -3619,6 +3625,11 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) } reserved = cluster_size - compr_blocks; + + /* for the case all blocks in cluster were reserved */ + if (reserved == 1) + goto next; + ret = inc_valid_block_count(sbi, dn->inode, &reserved); if (ret) return ret; -- GitLab From 621da84f10154d47ab4d4d46ec2c7d4a0999b7c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Nov 2022 10:15:18 +0100 Subject: [PATCH 1090/2290] f2fs: simplify __allocate_data_block [ Upstream commit 3cf684f2f8e0229714fb6d051508b42d3320e78f ] Just use a simple if block for the conditional call to inc_valid_block_count. Signed-off-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3f0ba71451c27..77b825fdeec85 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1420,13 +1420,12 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) return err; dn->data_blkaddr = f2fs_data_blkaddr(dn); - if (dn->data_blkaddr != NULL_ADDR) - goto alloc; - - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) - return err; + if (dn->data_blkaddr == NULL_ADDR) { + err = inc_valid_block_count(sbi, dn->inode, &count); + if (unlikely(err)) + return err; + } -alloc: set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, -- GitLab From 17bfaa58db0c52f7e14210e76b3862162a66ae96 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 10 Dec 2023 17:20:35 +0800 Subject: [PATCH 1091/2290] f2fs: delete obsolete FI_FIRST_BLOCK_WRITTEN [ Upstream commit a53936361330e4c55c0654605178281387d9c761 ] Commit 3c6c2bebef79 ("f2fs: avoid punch_hole overhead when releasing volatile data") introduced FI_FIRST_BLOCK_WRITTEN as below reason: This patch is to avoid some punch_hole overhead when releasing volatile data. If volatile data was not written yet, we just can make the first page as zero. After commit 7bc155fec5b3 ("f2fs: kill volatile write support"), we won't support volatile write, but it missed to remove obsolete FI_FIRST_BLOCK_WRITTEN, delete it in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 2 -- fs/f2fs/data.c | 2 -- fs/f2fs/f2fs.h | 6 ------ fs/f2fs/file.c | 3 --- fs/f2fs/gc.c | 2 -- fs/f2fs/inode.c | 25 ------------------------- 6 files changed, 40 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 553950962842a..5973fda2349c7 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1388,8 +1388,6 @@ unlock_continue: add_compr_block_stat(inode, cc->valid_nr_cpages); set_inode_flag(cc->inode, FI_APPEND_WRITE); - if (cc->cluster_idx == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); if (quota_inode) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 77b825fdeec85..95e737fc75ef0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2739,8 +2739,6 @@ got_it: f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); out_writepage: f2fs_put_dnode(&dn); out: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 16dacf811481c..2364b6f7500b2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -764,7 +764,6 @@ enum { FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ @@ -3248,11 +3247,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline bool f2fs_is_first_block_written(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); -} - static inline bool f2fs_is_drop_cache(struct inode *inode) { return is_inode_flag_set(inode, FI_DROP_CACHE); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6c9f03e3be8e9..4d634b5b6011f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -602,9 +602,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) valid_blocks++; } - if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) - clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); - f2fs_invalidate_blocks(sbi, blkaddr); if (!released || blkaddr != COMPRESS_ADDR) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8161355658562..d4662ccb94c8f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1410,8 +1410,6 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0010579f17368..f0f2584fed66e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -74,20 +74,6 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static int __written_first_block(struct f2fs_sb_info *sbi, - struct f2fs_inode *ri) -{ - block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - - if (!__is_valid_data_blkaddr(addr)) - return 1; - if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); - return -EFSCORRUPTED; - } - return 0; -} - static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { int extra_size = get_extra_isize(inode); @@ -336,7 +322,6 @@ static int do_read_inode(struct inode *inode) struct page *node_page; struct f2fs_inode *ri; projid_t i_projid; - int err; /* Check if ino is within scope */ if (f2fs_check_nid_range(sbi, inode->i_ino)) @@ -417,16 +402,6 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (S_ISREG(inode->i_mode)) { - err = __written_first_block(sbi, ri); - if (err < 0) { - f2fs_put_page(node_page, 1); - return err; - } - if (!err) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); - } - if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; -- GitLab From 3c2e2c8059da54cab89bc6f7973cba821406a83e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 10 Dec 2023 17:20:36 +0800 Subject: [PATCH 1092/2290] f2fs: delete obsolete FI_DROP_CACHE [ Upstream commit bb6e1c8fa5b9b95bbb8e39b6105f8f6550e070fc ] FI_DROP_CACHE was introduced in commit 1e84371ffeef ("f2fs: change atomic and volatile write policies") for volatile write feature, after commit 7bc155fec5b3 ("f2fs: kill volatile write support"), we won't support volatile write, let's delete related codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 3 --- fs/f2fs/f2fs.h | 6 ------ 2 files changed, 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 95e737fc75ef0..a5d0d0b892875 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2817,9 +2817,6 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, zero_user_segment(page, offset, PAGE_SIZE); write: - if (f2fs_is_drop_cache(inode)) - goto out; - /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2364b6f7500b2..0f7193a0ab422 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -764,7 +764,6 @@ enum { FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ - FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ @@ -3247,11 +3246,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline bool f2fs_is_drop_cache(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_DROP_CACHE); -} - static inline void *inline_data_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); -- GitLab From ebe3a9f3c286ad376ed433b163c57462f939f18c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 10 Dec 2023 17:20:37 +0800 Subject: [PATCH 1093/2290] f2fs: introduce get_dnode_addr() to clean up codes [ Upstream commit 2020cd48e41cb8470bb1ca0835033d13d3178425 ] Just cleanup, no logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 11 ++--------- fs/f2fs/f2fs.h | 18 +++++++++++++++--- fs/f2fs/file.c | 8 +------- fs/f2fs/inode.c | 32 ++++++++++++++------------------ 4 files changed, 32 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a5d0d0b892875..65b69d38f332c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1105,16 +1105,9 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, static void __set_data_blkaddr(struct dnode_of_data *dn) { - struct f2fs_node *rn = F2FS_NODE(dn->node_page); - __le32 *addr_array; - int base = 0; + __le32 *addr = get_dnode_addr(dn->inode, dn->node_page); - if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) - base = get_extra_isize(dn->inode); - - /* Get physical address of data block */ - addr_array = blkaddr_in_node(rn); - addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); + addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0f7193a0ab422..108ba10679da2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3246,12 +3246,13 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page); static inline void *inline_data_addr(struct inode *inode, struct page *page) { - struct f2fs_inode *ri = F2FS_INODE(page); - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, page); - return (void *)&(ri->i_addr[extra_size + DEF_INLINE_RESERVED_SIZE]); + return (void *)(addr + DEF_INLINE_RESERVED_SIZE); } static inline int f2fs_has_inline_dentry(struct inode *inode) @@ -3386,6 +3387,17 @@ static inline int get_inline_xattr_addrs(struct inode *inode) return F2FS_I(inode)->i_inline_xattr_size; } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page) +{ + int base = 0; + + if (IS_INODE(node_page) && f2fs_has_extra_attr(inode)) + base = get_extra_isize(inode); + + return blkaddr_in_node(F2FS_NODE(node_page)) + base; +} + #define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4d634b5b6011f..9a81d51ddf9e2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -560,20 +560,14 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct f2fs_node *raw_node; int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; - int base = 0; bool compressed_cluster = false; int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); - if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) - base = get_extra_isize(dn->inode); - - raw_node = F2FS_NODE(dn->node_page); - addr = blkaddr_in_node(raw_node) + base + ofs; + addr = get_dnode_addr(dn->inode, dn->node_page) + ofs; /* Assumption: truncateion starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f0f2584fed66e..869bb6ec107cc 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -59,35 +59,31 @@ void f2fs_set_inode_flags(struct inode *inode) S_ENCRYPTED|S_VERITY|S_CASEFOLD); } -static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +static void __get_inode_rdev(struct inode *inode, struct page *node_page) { - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, node_page); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - if (ri->i_addr[extra_size]) - inode->i_rdev = old_decode_dev( - le32_to_cpu(ri->i_addr[extra_size])); + if (addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(addr[0])); else - inode->i_rdev = new_decode_dev( - le32_to_cpu(ri->i_addr[extra_size + 1])); + inode->i_rdev = new_decode_dev(le32_to_cpu(addr[1])); } } -static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +static void __set_inode_rdev(struct inode *inode, struct page *node_page) { - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, node_page); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[extra_size] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[extra_size + 1] = 0; + addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); + addr[1] = 0; } else { - ri->i_addr[extra_size] = 0; - ri->i_addr[extra_size + 1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[extra_size + 2] = 0; + addr[0] = 0; + addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); + addr[2] = 0; } } } @@ -400,7 +396,7 @@ static int do_read_inode(struct inode *inode) } /* get rdev by using inline_info */ - __get_inode_rdev(inode, ri); + __get_inode_rdev(inode, node_page); if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; @@ -672,7 +668,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) } } - __set_inode_rdev(inode, ri); + __set_inode_rdev(inode, node_page); /* deleted inode */ if (inode->i_nlink == 0) -- GitLab From bba2a0ba00eb7244ca7a574bc13ff71321e9360d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 10 Dec 2023 17:20:38 +0800 Subject: [PATCH 1094/2290] f2fs: update blkaddr in __set_data_blkaddr() for cleanup [ Upstream commit 59d0d4c3eae0f3dd8886ed59f89f21fa09e324f5 ] This patch allows caller to pass blkaddr to f2fs_set_data_blkaddr() and let __set_data_blkaddr() inside f2fs_set_data_blkaddr() to update dn->data_blkaddr w/ last value of blkaddr. Just cleanup, no logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 54607494875e ("f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 13 ++++++------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 12 ++++-------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 65b69d38f332c..1aa7d443cf364 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1103,10 +1103,11 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return 0; } -static void __set_data_blkaddr(struct dnode_of_data *dn) +static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { __le32 *addr = get_dnode_addr(dn->inode, dn->node_page); + dn->data_blkaddr = blkaddr; addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } @@ -1116,18 +1117,17 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) * ->node_page * update block addresses in the node page */ -void f2fs_set_data_blkaddr(struct dnode_of_data *dn) +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); - __set_data_blkaddr(dn); + __set_data_blkaddr(dn, blkaddr); if (set_page_dirty(dn->node_page)) dn->node_changed = true; } void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - dn->data_blkaddr = blkaddr; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, blkaddr); f2fs_update_read_extent_cache(dn); } @@ -1154,8 +1154,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) block_t blkaddr = f2fs_data_blkaddr(dn); if (blkaddr == NULL_ADDR) { - dn->data_blkaddr = NEW_ADDR; - __set_data_blkaddr(dn); + __set_data_blkaddr(dn, NEW_ADDR); count--; } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 108ba10679da2..7f34c7d0d156e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3781,7 +3781,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, sector_t *sector); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); -void f2fs_set_data_blkaddr(struct dnode_of_data *dn); +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); int f2fs_reserve_new_block(struct dnode_of_data *dn); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9a81d51ddf9e2..aa3ded9825f0c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -585,8 +585,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) if (blkaddr == NULL_ADDR) continue; - dn->data_blkaddr = NULL_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, @@ -1488,8 +1487,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, } f2fs_invalidate_blocks(sbi, dn->data_blkaddr); - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NEW_ADDR); } f2fs_update_read_extent_cache_range(dn, start, 0, index - start); @@ -3440,8 +3438,7 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (blkaddr != NEW_ADDR) continue; - dn->data_blkaddr = NULL_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NULL_ADDR); } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); @@ -3611,8 +3608,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) continue; } - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NEW_ADDR); } reserved = cluster_size - compr_blocks; -- GitLab From 67eba3e674a19e242b38cac032789cb509662884 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:30 +0800 Subject: [PATCH 1095/2290] f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode [ Upstream commit 54607494875edd636aff3c21ace3ad9a7da758a9 ] In reserve_compress_blocks(), we update blkaddrs of dnode in prior to inc_valid_block_count(), it may cause inconsistent status bewteen i_blocks and blkaddrs once inc_valid_block_count() fails. To fix this issue, it needs to reverse their invoking order. Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 5 +++-- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/file.c | 26 ++++++++++++++------------ fs/f2fs/segment.c | 2 +- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1aa7d443cf364..b83b8ac29f430 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1142,7 +1142,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + err = inc_valid_block_count(sbi, dn->inode, &count, true); + if (unlikely(err)) return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, @@ -1413,7 +1414,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr == NULL_ADDR) { - err = inc_valid_block_count(sbi, dn->inode, &count); + err = inc_valid_block_count(sbi, dn->inode, &count, true); if (unlikely(err)) return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7f34c7d0d156e..2afb91471b535 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2286,7 +2286,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t *count) + struct inode *inode, blkcnt_t *count, bool partial) { blkcnt_t diff = 0, release = 0; block_t avail_user_block_count; @@ -2327,6 +2327,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, avail_user_block_count = 0; } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + if (!partial) { + spin_unlock(&sbi->stat_lock); + goto enospc; + } + diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) diff = *count; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index aa3ded9825f0c..96b59c87f30c7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3587,14 +3587,16 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) blkcnt_t reserved; int ret; - for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { - blkaddr = f2fs_data_blkaddr(dn); + for (i = 0; i < cluster_size; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); if (i == 0) { - if (blkaddr == COMPRESS_ADDR) - continue; - dn->ofs_in_node += cluster_size; - goto next; + if (blkaddr != COMPRESS_ADDR) { + dn->ofs_in_node += cluster_size; + goto next; + } + continue; } /* @@ -3607,8 +3609,6 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) compr_blocks++; continue; } - - f2fs_set_data_blkaddr(dn, NEW_ADDR); } reserved = cluster_size - compr_blocks; @@ -3617,12 +3617,14 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (reserved == 1) goto next; - ret = inc_valid_block_count(sbi, dn->inode, &reserved); - if (ret) + ret = inc_valid_block_count(sbi, dn->inode, &reserved, false); + if (unlikely(ret)) return ret; - if (reserved != cluster_size - compr_blocks) - return -ENOSPC; + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + if (f2fs_data_blkaddr(dn) == NULL_ADDR) + f2fs_set_data_blkaddr(dn, NEW_ADDR); + } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a3dabec1f216a..aa1ba2fdfe00d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -247,7 +247,7 @@ retry: } else { blkcnt_t count = 1; - err = inc_valid_block_count(sbi, inode, &count); + err = inc_valid_block_count(sbi, inode, &count, true); if (err) { f2fs_put_dnode(&dn); return err; -- GitLab From 98df108a9ab068b7b6fb8085802eb596cf98b482 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Jan 2024 10:23:13 +0800 Subject: [PATCH 1096/2290] f2fs: compress: fix to cover f2fs_disable_compressed_file() w/ i_sem [ Upstream commit 2f9420d3a94aeebd92db88f00f4f2f1a3bd3f6cf ] - f2fs_disable_compressed_file - check inode_has_data - f2fs_file_mmap - mkwrite - f2fs_get_block_locked : update metadata in compressed inode's disk layout - fi->i_flags &= ~F2FS_COMPR_FL - clear_inode_flag(inode, FI_COMPRESSED_FILE); we should use i_sem lock to prevent above race case. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2afb91471b535..a81091a5e282d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4367,15 +4367,24 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - if (!f2fs_compressed_file(inode)) + f2fs_down_write(&F2FS_I(inode)->i_sem); + + if (!f2fs_compressed_file(inode)) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return true; - if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) + } + if (f2fs_is_mmap_file(inode) || + (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return false; + } fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); clear_inode_flag(inode, FI_COMPRESSED_FILE); f2fs_mark_inode_dirty_sync(inode, true); + + f2fs_up_write(&F2FS_I(inode)->i_sem); return true; } -- GitLab From fe4de493572a4263554903bf9c3afc5c196e15f0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Jan 2024 22:49:15 +0800 Subject: [PATCH 1097/2290] f2fs: fix to avoid potential panic during recovery [ Upstream commit 21ec68234826b1b54ab980a8df6e33c74cfbee58 ] During recovery, if FAULT_BLOCK is on, it is possible that f2fs_reserve_new_block() will return -ENOSPC during recovery, then it may trigger panic. Also, if fault injection rate is 1 and only FAULT_BLOCK fault type is on, it may encounter deadloop in loop of block reservation. Let's change as below to fix these issues: - remove bug_on() to avoid panic. - limit the loop count of block reservation to avoid potential deadloop. Fixes: 956fa1ddc132 ("f2fs: fix to check return value of f2fs_reserve_new_block()") Reported-by: Zhiguo Niu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/recovery.c | 33 ++++++++++++++++----------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a81091a5e282d..f5d69893d2d92 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -74,6 +74,11 @@ struct f2fs_fault_info { extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type)) + +/* maximum retry count for injected failure */ +#define DEFAULT_FAILURE_RETRY_COUNT 8 +#else +#define DEFAULT_FAILURE_RETRY_COUNT 1 #endif /* diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 53a6487f91e44..f5efc37a2b513 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -582,6 +582,19 @@ truncate_out: return 0; } +static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn) +{ + int i, err = 0; + + for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) { + err = f2fs_reserve_new_block(dn); + if (!err) + break; + } + + return err; +} + static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page) { @@ -683,14 +696,8 @@ retry_dn: */ if (dest == NEW_ADDR) { f2fs_truncate_data_blocks_range(&dn, 1); - do { - err = f2fs_reserve_new_block(&dn); - if (err == -ENOSPC) { - f2fs_bug_on(sbi, 1); - break; - } - } while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; continue; @@ -698,16 +705,8 @@ retry_dn: /* dest is valid block, try to recover from src to dest */ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - if (src == NULL_ADDR) { - do { - err = f2fs_reserve_new_block(&dn); - if (err == -ENOSPC) { - f2fs_bug_on(sbi, 1); - break; - } - } while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; } -- GitLab From e85d53ebae882791b33ff0cd88e0905ef6f253eb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:05:00 +0100 Subject: [PATCH 1098/2290] scsi: csiostor: Avoid function pointer casts [ Upstream commit 9f3dbcb5632d6876226031d552ef6163bb3ad215 ] csiostor uses function pointer casts to keep the csio_ln_ev state machine hidden, but this causes warnings about control flow integrity (KCFI) violations in clang-16 and higher: drivers/scsi/csiostor/csio_lnode.c:1098:33: error: cast from 'void (*)(struct csio_lnode *, enum csio_ln_ev)' to 'csio_sm_state_t' (aka 'void (*)(void *, unsigned int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1098 | return (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/csiostor/csio_lnode.c:1369:29: error: cast from 'void (*)(struct csio_lnode *, enum csio_ln_ev)' to 'csio_sm_state_t' (aka 'void (*)(void *, unsigned int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1369 | if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_uninit)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/csiostor/csio_lnode.c:1373:29: error: cast from 'void (*)(struct csio_lnode *, enum csio_ln_ev)' to 'csio_sm_state_t' (aka 'void (*)(void *, unsigned int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1373 | if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/csiostor/csio_lnode.c:1377:29: error: cast from 'void (*)(struct csio_lnode *, enum csio_ln_ev)' to 'csio_sm_state_t' (aka 'void (*)(void *, unsigned int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1377 | if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_offline)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Move the enum into a shared header so the correct types can be used without the need for casts. Fixes: a3667aaed569 ("[SCSI] csiostor: Chelsio FCoE offload driver") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213100518.457623-1-arnd@kernel.org Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/csiostor/csio_defs.h | 18 ++++++++++++++++-- drivers/scsi/csiostor/csio_lnode.c | 8 ++++---- drivers/scsi/csiostor/csio_lnode.h | 13 ------------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/csiostor/csio_defs.h b/drivers/scsi/csiostor/csio_defs.h index c38017b4af982..e50e93e7fe5a1 100644 --- a/drivers/scsi/csiostor/csio_defs.h +++ b/drivers/scsi/csiostor/csio_defs.h @@ -73,7 +73,21 @@ csio_list_deleted(struct list_head *list) #define csio_list_prev(elem) (((struct list_head *)(elem))->prev) /* State machine */ -typedef void (*csio_sm_state_t)(void *, uint32_t); +struct csio_lnode; + +/* State machine evets */ +enum csio_ln_ev { + CSIO_LNE_NONE = (uint32_t)0, + CSIO_LNE_LINKUP, + CSIO_LNE_FAB_INIT_DONE, + CSIO_LNE_LINK_DOWN, + CSIO_LNE_DOWN_LINK, + CSIO_LNE_LOGO, + CSIO_LNE_CLOSE, + CSIO_LNE_MAX_EVENT, +}; + +typedef void (*csio_sm_state_t)(struct csio_lnode *ln, enum csio_ln_ev evt); struct csio_sm { struct list_head sm_list; @@ -83,7 +97,7 @@ struct csio_sm { static inline void csio_set_state(void *smp, void *state) { - ((struct csio_sm *)smp)->sm_state = (csio_sm_state_t)state; + ((struct csio_sm *)smp)->sm_state = state; } static inline void diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index d5ac938970232..5b3ffefae476d 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -1095,7 +1095,7 @@ csio_handle_link_down(struct csio_hw *hw, uint8_t portid, uint32_t fcfi, int csio_is_lnode_ready(struct csio_lnode *ln) { - return (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)); + return (csio_get_state(ln) == csio_lns_ready); } /*****************************************************************************/ @@ -1366,15 +1366,15 @@ csio_free_fcfinfo(struct kref *kref) void csio_lnode_state_to_str(struct csio_lnode *ln, int8_t *str) { - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_uninit)) { + if (csio_get_state(ln) == csio_lns_uninit) { strcpy(str, "UNINIT"); return; } - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)) { + if (csio_get_state(ln) == csio_lns_ready) { strcpy(str, "READY"); return; } - if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_offline)) { + if (csio_get_state(ln) == csio_lns_offline) { strcpy(str, "OFFLINE"); return; } diff --git a/drivers/scsi/csiostor/csio_lnode.h b/drivers/scsi/csiostor/csio_lnode.h index 372a67d122d38..607698a0f0631 100644 --- a/drivers/scsi/csiostor/csio_lnode.h +++ b/drivers/scsi/csiostor/csio_lnode.h @@ -53,19 +53,6 @@ extern int csio_fcoe_rnodes; extern int csio_fdmi_enable; -/* State machine evets */ -enum csio_ln_ev { - CSIO_LNE_NONE = (uint32_t)0, - CSIO_LNE_LINKUP, - CSIO_LNE_FAB_INIT_DONE, - CSIO_LNE_LINK_DOWN, - CSIO_LNE_DOWN_LINK, - CSIO_LNE_LOGO, - CSIO_LNE_CLOSE, - CSIO_LNE_MAX_EVENT, -}; - - struct csio_fcf_info { struct list_head list; uint8_t priority; -- GitLab From 987bc93ecdaba7710cfd44959dc194453e2e59d0 Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Mon, 19 Feb 2024 14:18:05 +0800 Subject: [PATCH 1099/2290] RDMA/hns: Fix mis-modifying default congestion control algorithm [ Upstream commit d20a7cf9f714f0763efb56f0f2eeca1cb91315ed ] Commit 27c5fd271d8b ("RDMA/hns: The UD mode can only be configured with DCQCN") adds a check of congest control alorithm for UD. But that patch causes a problem: hr_dev->caps.congest_type is global, used by all QPs, so modifying this field to DCQCN for UD QPs causes other QPs unable to use any other algorithm except DCQCN. Revert the modification in commit 27c5fd271d8b ("RDMA/hns: The UD mode can only be configured with DCQCN"). Add a new field cong_type to struct hns_roce_qp and configure DCQCN for UD QPs. Fixes: 27c5fd271d8b ("RDMA/hns: The UD mode can only be configured with DCQCN") Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Luoyouming Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20240219061805.668170-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_device.h | 17 +++++++++-------- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 ++++++++++------ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1112afa0af552..8748b65c87ea7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -595,6 +595,13 @@ struct hns_roce_work { u32 queue_num; }; +enum hns_roce_cong_type { + CONG_TYPE_DCQCN, + CONG_TYPE_LDCP, + CONG_TYPE_HC3, + CONG_TYPE_DIP, +}; + struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_wq rq; @@ -639,6 +646,7 @@ struct hns_roce_qp { struct list_head sq_node; /* all send qps are on a list */ struct hns_user_mmap_entry *dwqe_mmap_entry; u32 config; + enum hns_roce_cong_type cong_type; }; struct hns_roce_ib_iboe { @@ -710,13 +718,6 @@ struct hns_roce_eq_table { struct hns_roce_eq *eq; }; -enum cong_type { - CONG_TYPE_DCQCN, - CONG_TYPE_LDCP, - CONG_TYPE_HC3, - CONG_TYPE_DIP, -}; - struct hns_roce_caps { u64 fw_ver; u8 num_ports; @@ -847,7 +848,7 @@ struct hns_roce_caps { u16 default_aeq_period; u16 default_aeq_arm_st; u16 default_ceq_arm_st; - enum cong_type cong_type; + enum hns_roce_cong_type cong_type; }; enum hns_roce_device_state { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 58fbb1d3b7f41..d06b19e69a151 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4886,12 +4886,15 @@ static int check_cong_type(struct ib_qp *ibqp, struct hns_roce_congestion_algorithm *cong_alg) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - if (ibqp->qp_type == IB_QPT_UD) - hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_GSI) + hr_qp->cong_type = CONG_TYPE_DCQCN; + else + hr_qp->cong_type = hr_dev->caps.cong_type; /* different congestion types match different configurations */ - switch (hr_dev->caps.cong_type) { + switch (hr_qp->cong_type) { case CONG_TYPE_DCQCN: cong_alg->alg_sel = CONG_DCQCN; cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; @@ -4919,8 +4922,8 @@ static int check_cong_type(struct ib_qp *ibqp, default: ibdev_warn(&hr_dev->ib_dev, "invalid type(%u) for congestion selection.\n", - hr_dev->caps.cong_type); - hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + hr_qp->cong_type); + hr_qp->cong_type = CONG_TYPE_DCQCN; cong_alg->alg_sel = CONG_DCQCN; cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; cong_alg->dip_vld = DIP_INVALID; @@ -4939,6 +4942,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_congestion_algorithm cong_field; struct ib_device *ibdev = ibqp->device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); u32 dip_idx = 0; int ret; @@ -4951,7 +4955,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return ret; hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id + - hr_dev->caps.cong_type * HNS_ROCE_CONG_SIZE); + hr_qp->cong_type * HNS_ROCE_CONG_SIZE); hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID); hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel); hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL); -- GitLab From aaa8e143bfe14c0c08512d3eb94ef38285251045 Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Fri, 2 Feb 2024 19:53:13 -0800 Subject: [PATCH 1100/2290] RDMA/device: Fix a race between mad_client and cm_client init [ Upstream commit 7a8bccd8b29c321ac181369b42b04fecf05f98e2 ] The mad_client will be initialized in enable_device_and_get(), while the devices_rwsem will be downgraded to a read semaphore. There is a window that leads to the failed initialization for cm_client, since it can not get matched mad port from ib_mad_port_list, and the matched mad port will be added to the list after that. mad_client | cm_client ------------------|-------------------------------------------------------- ib_register_device| enable_device_and_get down_write(&devices_rwsem) xa_set_mark(&devices, DEVICE_REGISTERED) downgrade_write(&devices_rwsem) | |ib_cm_init |ib_register_client(&cm_client) |down_read(&devices_rwsem) |xa_for_each_marked (&devices, DEVICE_REGISTERED) |add_client_context |cm_add_one |ib_register_mad_agent |ib_get_mad_port |__ib_get_mad_port |list_for_each_entry(entry, &ib_mad_port_list, port_list) |return NULL |up_read(&devices_rwsem) | add_client_context| ib_mad_init_device| ib_mad_port_open | list_add_tail(&port_priv->port_list, &ib_mad_port_list) up_read(&devices_rwsem) | Fix it by using down_write(&devices_rwsem) in ib_register_client(). Fixes: d0899892edd0 ("RDMA/device: Provide APIs from the core code to help unregistration") Link: https://lore.kernel.org/r/20240203035313.98991-1-lishifeng@sangfor.com.cn Suggested-by: Jason Gunthorpe Signed-off-by: Shifeng Li Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/device.c | 37 +++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3a9b9a28d858f..453188db39d83 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1730,7 +1730,7 @@ static int assign_client_id(struct ib_client *client) { int ret; - down_write(&clients_rwsem); + lockdep_assert_held(&clients_rwsem); /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in @@ -1739,14 +1739,11 @@ static int assign_client_id(struct ib_client *client) client->client_id = highest_client_id; ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); if (ret) - goto out; + return ret; highest_client_id++; xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - -out: - up_write(&clients_rwsem); - return ret; + return 0; } static void remove_client_id(struct ib_client *client) @@ -1776,25 +1773,35 @@ int ib_register_client(struct ib_client *client) { struct ib_device *device; unsigned long index; + bool need_unreg = false; int ret; refcount_set(&client->uses, 1); init_completion(&client->uses_zero); + + /* + * The devices_rwsem is held in write mode to ensure that a racing + * ib_register_device() sees a consisent view of clients and devices. + */ + down_write(&devices_rwsem); + down_write(&clients_rwsem); ret = assign_client_id(client); if (ret) - return ret; + goto out; - down_read(&devices_rwsem); + need_unreg = true; xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { ret = add_client_context(device, client); - if (ret) { - up_read(&devices_rwsem); - ib_unregister_client(client); - return ret; - } + if (ret) + goto out; } - up_read(&devices_rwsem); - return 0; + ret = 0; +out: + up_write(&clients_rwsem); + up_write(&devices_rwsem); + if (need_unreg && ret) + ib_unregister_client(client); + return ret; } EXPORT_SYMBOL(ib_register_client); -- GitLab From b0455371cced8c1e6d26767cb0f013c1b219a076 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 21 Feb 2024 11:32:04 +0000 Subject: [PATCH 1101/2290] RDMA/rtrs-clt: Check strnlen return len in sysfs mpath_policy_store() [ Upstream commit 7a7b7f575a25aa68ee934ee8107294487efcb3fe ] strnlen() may return 0 (e.g. for "\0\n" string), it's better to check the result of strnlen() before using 'len - 1' expression for the 'buf' array index. Detected using the static analysis tool - Svace. Fixes: dc3b66a0ce70 ("RDMA/rtrs-clt: Add a minimum latency multipath policy") Signed-off-by: Alexey Kodanev Link: https://lore.kernel.org/r/20240221113204.147478-1-aleksei.kodanev@bell-sw.com Acked-by: Jack Wang Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index d3c436ead6946..4aa80c9388f05 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -133,7 +133,7 @@ static ssize_t mpath_policy_store(struct device *dev, /* distinguish "mi" and "min-latency" with length */ len = strnlen(buf, NAME_MAX); - if (buf[len - 1] == '\n') + if (len && buf[len - 1] == '\n') len--; if (!strncasecmp(buf, "round-robin", 11) || -- GitLab From 322eb43c0ea211713ecc40c12bddc0627d566ad7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Feb 2024 13:44:06 +0100 Subject: [PATCH 1102/2290] scsi: bfa: Fix function pointer type mismatch for hcb_qe->cbfn [ Upstream commit b69600231f751304db914c63b937f7098ed2895c ] Some callback functions used here take a boolean argument, others take a status argument. This breaks KCFI type checking, so clang now warns about the function pointer cast: drivers/scsi/bfa/bfad_bsg.c:2138:29: error: cast from 'void (*)(void *, enum bfa_status)' to 'bfa_cb_cbfn_t' (aka 'void (*)(void *, enum bfa_boolean)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] Assuming the code is actually correct here and the callers always match the argument types of the callee, rework this to replace the explicit cast with a union of the two pointer types. This does not change the behavior of the code, so if something is actually broken here, a larger rework may be necessary. Fixes: 37ea0558b87a ("[SCSI] bfa: Added support to collect and reset fcport stats") Fixes: 3ec4f2c8bff2 ("[SCSI] bfa: Added support to configure QOS and collect stats.") Reviewed-by: Kees Cook Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240222124433.2046570-1-arnd@kernel.org Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/bfa/bfa.h | 9 ++++++++- drivers/scsi/bfa/bfa_core.c | 4 +--- drivers/scsi/bfa/bfa_ioc.h | 8 ++++++-- drivers/scsi/bfa/bfad_bsg.c | 11 ++++------- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/bfa/bfa.h b/drivers/scsi/bfa/bfa.h index 7bd2ba1ad4d11..f30fe324e6ecc 100644 --- a/drivers/scsi/bfa/bfa.h +++ b/drivers/scsi/bfa/bfa.h @@ -20,7 +20,6 @@ struct bfa_s; typedef void (*bfa_isr_func_t) (struct bfa_s *bfa, struct bfi_msg_s *m); -typedef void (*bfa_cb_cbfn_status_t) (void *cbarg, bfa_status_t status); /* * Interrupt message handlers @@ -437,4 +436,12 @@ struct bfa_cb_pending_q_s { (__qe)->data = (__data); \ } while (0) +#define bfa_pending_q_init_status(__qe, __cbfn, __cbarg, __data) do { \ + bfa_q_qe_init(&((__qe)->hcb_qe.qe)); \ + (__qe)->hcb_qe.cbfn_status = (__cbfn); \ + (__qe)->hcb_qe.cbarg = (__cbarg); \ + (__qe)->hcb_qe.pre_rmv = BFA_TRUE; \ + (__qe)->data = (__data); \ +} while (0) + #endif /* __BFA_H__ */ diff --git a/drivers/scsi/bfa/bfa_core.c b/drivers/scsi/bfa/bfa_core.c index 6846ca8f7313c..3438d0b8ba062 100644 --- a/drivers/scsi/bfa/bfa_core.c +++ b/drivers/scsi/bfa/bfa_core.c @@ -1907,15 +1907,13 @@ bfa_comp_process(struct bfa_s *bfa, struct list_head *comp_q) struct list_head *qe; struct list_head *qen; struct bfa_cb_qe_s *hcb_qe; - bfa_cb_cbfn_status_t cbfn; list_for_each_safe(qe, qen, comp_q) { hcb_qe = (struct bfa_cb_qe_s *) qe; if (hcb_qe->pre_rmv) { /* qe is invalid after return, dequeue before cbfn() */ list_del(qe); - cbfn = (bfa_cb_cbfn_status_t)(hcb_qe->cbfn); - cbfn(hcb_qe->cbarg, hcb_qe->fw_status); + hcb_qe->cbfn_status(hcb_qe->cbarg, hcb_qe->fw_status); } else hcb_qe->cbfn(hcb_qe->cbarg, BFA_TRUE); } diff --git a/drivers/scsi/bfa/bfa_ioc.h b/drivers/scsi/bfa/bfa_ioc.h index 933a1c3890ff5..5e568d6d7b261 100644 --- a/drivers/scsi/bfa/bfa_ioc.h +++ b/drivers/scsi/bfa/bfa_ioc.h @@ -361,14 +361,18 @@ struct bfa_reqq_wait_s { void *cbarg; }; -typedef void (*bfa_cb_cbfn_t) (void *cbarg, bfa_boolean_t complete); +typedef void (*bfa_cb_cbfn_t) (void *cbarg, bfa_boolean_t complete); +typedef void (*bfa_cb_cbfn_status_t) (void *cbarg, bfa_status_t status); /* * Generic BFA callback element. */ struct bfa_cb_qe_s { struct list_head qe; - bfa_cb_cbfn_t cbfn; + union { + bfa_cb_cbfn_status_t cbfn_status; + bfa_cb_cbfn_t cbfn; + }; bfa_boolean_t once; bfa_boolean_t pre_rmv; /* set for stack based qe(s) */ bfa_status_t fw_status; /* to access fw status in comp proc */ diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index be8dfbe13e904..524e4e6979c9f 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -2135,8 +2135,7 @@ bfad_iocmd_fcport_get_stats(struct bfad_s *bfad, void *cmd) struct bfa_cb_pending_q_s cb_qe; init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, &iocmd->stats); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, &iocmd->stats); spin_lock_irqsave(&bfad->bfad_lock, flags); iocmd->status = bfa_fcport_get_stats(&bfad->bfa, &cb_qe); spin_unlock_irqrestore(&bfad->bfad_lock, flags); @@ -2159,7 +2158,7 @@ bfad_iocmd_fcport_reset_stats(struct bfad_s *bfad, void *cmd) struct bfa_cb_pending_q_s cb_qe; init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, &fcomp, NULL); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, NULL); spin_lock_irqsave(&bfad->bfad_lock, flags); iocmd->status = bfa_fcport_clear_stats(&bfad->bfa, &cb_qe); @@ -2443,8 +2442,7 @@ bfad_iocmd_qos_get_stats(struct bfad_s *bfad, void *cmd) struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(&bfad->bfa); init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, &iocmd->stats); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, &iocmd->stats); spin_lock_irqsave(&bfad->bfad_lock, flags); WARN_ON(!bfa_ioc_get_fcmode(&bfad->bfa.ioc)); @@ -2474,8 +2472,7 @@ bfad_iocmd_qos_reset_stats(struct bfad_s *bfad, void *cmd) struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(&bfad->bfa); init_completion(&fcomp.comp); - bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, - &fcomp, NULL); + bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, NULL); spin_lock_irqsave(&bfad->bfad_lock, flags); WARN_ON(!bfa_ioc_get_fcmode(&bfad->bfa.ioc)); -- GitLab From 8ede73123cd594a3eb9595e325aa2b01d0fb06ce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Feb 2024 00:08:18 +0800 Subject: [PATCH 1103/2290] f2fs: compress: fix to check zstd compress level correctly in mount option [ Upstream commit e39602da752cd1d0462e3fa04074146f6f2803f6 ] f2fs only support to config zstd compress level w/ a positive number due to layout design, but since commit e0c1b49f5b67 ("lib: zstd: Upgrade to latest upstream zstd version 1.4.10"), zstd supports negative compress level, so that zstd_min_clevel() may return a negative number, then w/ below mount option, .compress_level can be configed w/ a negative number, which is not allowed to f2fs, let's add check condition to avoid it. mount -o compress_algorithm=zstd:4294967295 /dev/sdx /mnt/f2fs Fixes: 00e120b5e4b5 ("f2fs: assign default compression level") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0c0d0671febea..c529ce5d986cc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -649,7 +649,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) #ifdef CONFIG_F2FS_FS_ZSTD static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) { - unsigned int level; + int level; int len = 4; if (strlen(str) == len) { @@ -663,9 +663,15 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) f2fs_info(sbi, "wrong format, e.g. :"); return -EINVAL; } - if (kstrtouint(str + 1, 10, &level)) + if (kstrtoint(str + 1, 10, &level)) return -EINVAL; + /* f2fs does not support negative compress level now */ + if (level < 0) { + f2fs_info(sbi, "do not support negative compress level: %d", level); + return -ERANGE; + } + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; -- GitLab From 8c0fe010cbe0ef6cb78c612cde9651dce3ad1b9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 24 Oct 2023 23:58:20 +0200 Subject: [PATCH 1104/2290] net: sunrpc: Fix an off by one in rpc_sockaddr2uaddr() [ Upstream commit d6f4de70f73a106986ee315d7d512539f2f3303a ] The intent is to check if the strings' are truncated or not. So, >= should be used instead of >, because strlcat() and snprintf() return the length of the output, excluding the trailing NULL. Fixes: a02d69261134 ("SUNRPC: Provide functions for managing universal addresses") Signed-off-by: Christophe JAILLET Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/addr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d435bffc61999..97ff11973c493 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -284,10 +284,10 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) } if (snprintf(portbuf, sizeof(portbuf), - ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf)) + ".%u.%u", port >> 8, port & 0xff) >= (int)sizeof(portbuf)) return NULL; - if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) + if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) >= sizeof(addrbuf)) return NULL; return kstrdup(addrbuf, gfp_flags); -- GitLab From 06e828b3f1b206de08ef520fc46a40b22e1869cb Mon Sep 17 00:00:00 2001 From: Jorge Mora Date: Thu, 25 Jan 2024 07:56:12 -0700 Subject: [PATCH 1105/2290] NFSv4.2: fix nfs4_listxattr kernel BUG at mm/usercopy.c:102 [ Upstream commit 251a658bbfceafb4d58c76b77682c8bf7bcfad65 ] A call to listxattr() with a buffer size = 0 returns the actual size of the buffer needed for a subsequent call. When size > 0, nfs4_listxattr() does not return an error because either generic_listxattr() or nfs4_listxattr_nfs4_label() consumes exactly all the bytes then size is 0 when calling nfs4_listxattr_nfs4_user() which then triggers the following kernel BUG: [ 99.403778] kernel BUG at mm/usercopy.c:102! [ 99.404063] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP [ 99.408463] CPU: 0 PID: 3310 Comm: python3 Not tainted 6.6.0-61.fc40.aarch64 #1 [ 99.415827] Call trace: [ 99.415985] usercopy_abort+0x70/0xa0 [ 99.416227] __check_heap_object+0x134/0x158 [ 99.416505] check_heap_object+0x150/0x188 [ 99.416696] __check_object_size.part.0+0x78/0x168 [ 99.416886] __check_object_size+0x28/0x40 [ 99.417078] listxattr+0x8c/0x120 [ 99.417252] path_listxattr+0x78/0xe0 [ 99.417476] __arm64_sys_listxattr+0x28/0x40 [ 99.417723] invoke_syscall+0x78/0x100 [ 99.417929] el0_svc_common.constprop.0+0x48/0xf0 [ 99.418186] do_el0_svc+0x24/0x38 [ 99.418376] el0_svc+0x3c/0x110 [ 99.418554] el0t_64_sync_handler+0x120/0x130 [ 99.418788] el0t_64_sync+0x194/0x198 [ 99.418994] Code: aa0003e3 d000a3e0 91310000 97f49bdb (d4210000) Issue is reproduced when generic_listxattr() returns 'system.nfs4_acl', thus calling lisxattr() with size = 16 will trigger the bug. Add check on nfs4_listxattr() to return ERANGE error when it is called with size > 0 and the return value is greater than size. Fixes: 012a211abd5d ("NFSv4.2: hook in the user extended attribute handlers") Signed-off-by: Jorge Mora Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ec3f0103e1a7f..7cc74f7451d67 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10592,29 +10592,33 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { ssize_t error, error2, error3; + size_t left = size; - error = generic_listxattr(dentry, list, size); + error = generic_listxattr(dentry, list, left); if (error < 0) return error; if (list) { list += error; - size -= error; + left -= error; } - error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size); + error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left); if (error2 < 0) return error2; if (list) { list += error2; - size -= error2; + left -= error2; } - error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size); + error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left); if (error3 < 0) return error3; - return error + error2 + error3; + error += error2 + error3; + if (size && error > size) + return -ERANGE; + return error; } static void nfs4_enable_swap(struct inode *inode) -- GitLab From 18631d43d487233237dc41e765d067d2915fae8b Mon Sep 17 00:00:00 2001 From: Jorge Mora Date: Thu, 25 Jan 2024 07:51:28 -0700 Subject: [PATCH 1106/2290] NFSv4.2: fix listxattr maximum XDR buffer size [ Upstream commit bcac8bff90a6ee1629f90669cdb9d28fb86049b0 ] Switch order of operations to avoid creating a short XDR buffer: e.g., buflen = 12, old xdrlen = 12, new xdrlen = 20. Having a short XDR buffer leads to lxa_maxcount be a few bytes less than what is needed to retrieve the whole list when using a buflen as returned by a call with size = 0: buflen = listxattr(path, NULL, 0); buf = malloc(buflen); buflen = listxattr(path, buf, buflen); For a file with one attribute (name = '123456'), the first call with size = 0 will return buflen = 12 ('user.123456\x00'). The second call with size = 12, sends LISTXATTRS with lxa_maxcount = 12 + 8 (cookie) + 4 (array count) = 24. The XDR buffer needs 8 (cookie) + 4 (array count) + 4 (name count) + 6 (name len) + 2 (padding) + 4 (eof) = 28 which is 4 bytes shorter than the lxa_maxcount provided in the call. Fixes: 04a5da690e8f ("NFSv4.2: define limits and sizes for user xattr handling") Signed-off-by: Jorge Mora Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs42.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index b59876b01a1e3..0282d93c8bccb 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -55,11 +55,14 @@ int nfs42_proc_removexattr(struct inode *inode, const char *name); * They would be 7 bytes long in the eventual buffer ("user.x\0"), and * 8 bytes long XDR-encoded. * - * Include the trailing eof word as well. + * Include the trailing eof word as well and make the result a multiple + * of 4 bytes. */ static inline u32 nfs42_listxattr_xdrsize(u32 buflen) { - return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4; + u32 size = 8 * buflen / (XATTR_USER_PREFIX_LEN + 2) + 4; + + return (size + 3) & ~3; } #endif /* CONFIG_NFS_V4_2 */ #endif /* __LINUX_FS_NFS_NFS4_2_H */ -- GitLab From 7486973accbc4a9046cdbcdf94028a9a7b67c890 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 19 Feb 2024 10:28:44 +0800 Subject: [PATCH 1107/2290] f2fs: compress: fix to check compress flag w/ .i_sem lock [ Upstream commit ea59b12ac69774c08aa95cd5b6100700ea0cce97 ] It needs to check compress flag w/ .i_sem lock, otherwise, compressed inode may be disabled after the check condition, it's not needed to set compress option on non-compress inode. Fixes: e1e8debec656 ("f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 96b59c87f30c7..be4cab941d299 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3967,16 +3967,20 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) sizeof(option))) return -EFAULT; - if (!f2fs_compressed_file(inode) || - option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || - option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || - option.algorithm >= COMPRESS_MAX) + if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || + option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || + option.algorithm >= COMPRESS_MAX) return -EINVAL; file_start_write(filp); inode_lock(inode); f2fs_down_write(&F2FS_I(inode)->i_sem); + if (!f2fs_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } + if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { ret = -EBUSY; goto out; -- GitLab From b91d54a2c4aa7a9028499d4bf869215e10097e19 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Feb 2024 12:32:05 -0800 Subject: [PATCH 1108/2290] f2fs: check number of blocks in a current section [ Upstream commit 7af2df0f67a1469762e59be3726a803882d83f6f ] In cfd66bb715fd ("f2fs: fix deadloop in foreground GC"), we needed to check the number of blocks in a section instead of the segment. Fixes: cfd66bb715fd ("f2fs: fix deadloop in foreground GC") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/segment.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f3951e8ad3948..aa9ad85e0901d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -586,23 +586,22 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, unsigned int node_blocks, unsigned int dent_blocks) { - unsigned int segno, left_blocks; + unsigned segno, left_blocks; int i; - /* check current node segment */ + /* check current node sections in the worst case. */ for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { segno = CURSEG_I(sbi, i)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; - + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (node_blocks > left_blocks) return false; } - /* check current data segment */ + /* check current data section for dentry blocks. */ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (dent_blocks > left_blocks) return false; return true; @@ -651,7 +650,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (free_secs > upper_secs) return false; - else if (free_secs <= lower_secs) + if (free_secs <= lower_secs) return true; return !curseg_space; } -- GitLab From aa25e54f44cb6969f4f39d2e1b5999bfc354f38e Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Wed, 28 Feb 2024 13:27:23 -0500 Subject: [PATCH 1109/2290] watchdog: stm32_iwdg: initialize default timeout [ Upstream commit dbd7c0088b7f44aa0b9276ed3449df075a7b5b54 ] The driver never sets a default timeout value, therefore it is initialized to zero. When CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED is enabled, the watchdog is started during probe. The kernel is supposed to automatically ping the watchdog from this point until userspace takes over, but this does not happen if the configured timeout is zero. A zero timeout causes watchdog_need_worker() to return false, so the heartbeat worker does not run and the system therefore resets soon after the driver is probed. This patch fixes this by setting an arbitrary non-zero default timeout. The default could be read from the hardware instead, but I didn't see any reason to add this complexity. This has been tested on an STM32F746. Fixes: 85fdc63fe256 ("drivers: watchdog: stm32_iwdg: set WDOG_HW_RUNNING at probe") Signed-off-by: Ben Wolsieffer Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240228182723.12855-1-ben.wolsieffer@hefring.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/stm32_iwdg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c index 570a71509d2a9..78d51deab87aa 100644 --- a/drivers/watchdog/stm32_iwdg.c +++ b/drivers/watchdog/stm32_iwdg.c @@ -21,6 +21,8 @@ #include #include +#define DEFAULT_TIMEOUT 10 + /* IWDG registers */ #define IWDG_KR 0x00 /* Key register */ #define IWDG_PR 0x04 /* Prescaler Register */ @@ -249,6 +251,7 @@ static int stm32_iwdg_probe(struct platform_device *pdev) wdd->parent = dev; wdd->info = &stm32_iwdg_info; wdd->ops = &stm32_iwdg_ops; + wdd->timeout = DEFAULT_TIMEOUT; wdd->min_timeout = DIV_ROUND_UP((RLR_MIN + 1) * PR_MIN, wdt->rate); wdd->max_hw_heartbeat_ms = ((RLR_MAX + 1) * wdt->data->max_prescaler * 1000) / wdt->rate; -- GitLab From 449684e376a051ca15502d6ebc8f25932927dc6b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 15:35:38 +0800 Subject: [PATCH 1110/2290] f2fs: ro: compress: fix to avoid caching unaligned extent [ Upstream commit 4b99ecd304290c4ef55666a62c89dfb2dbf0b2cd ] Mapping info from dump.f2fs: i_addr[0x2d] cluster flag [0xfffffffe : 4294967294] i_addr[0x2e] [0x 10428 : 66600] i_addr[0x2f] [0x 10429 : 66601] i_addr[0x30] [0x 1042a : 66602] f2fs_io fiemap 37 1 /mnt/f2fs/disk-58390c8c.raw Previsouly, it missed to align fofs and ofs_in_node to cluster_size, result in adding incorrect read extent cache, fix it. Before: f2fs_update_read_extent_tree_range: dev = (253,48), ino = 5, pgofs = 37, len = 4, blkaddr = 66600, c_len = 3 After: f2fs_update_read_extent_tree_range: dev = (253,48), ino = 5, pgofs = 36, len = 4, blkaddr = 66600, c_len = 3 Fixes: 94afd6d6e525 ("f2fs: extent cache: support unaligned extent") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/compress.c | 10 ++++++---- fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/node.c | 20 ++++++++++++++------ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5973fda2349c7..df6dfd7de6d0d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1843,16 +1843,18 @@ void f2fs_put_page_dic(struct page *page, bool in_task) * check whether cluster blocks are contiguous, and add extent cache entry * only if cluster blocks are logically and physically contiguous. */ -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node) { - bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR; + bool compressed = data_blkaddr(dn->inode, dn->node_page, + ofs_in_node) == COMPRESS_ADDR; int i = compressed ? 1 : 0; block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) break; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f5d69893d2d92..5ae1c4aa3ae92 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4258,7 +4258,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); void f2fs_put_page_dic(struct page *page, bool in_task); -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); @@ -4315,7 +4316,8 @@ static inline void f2fs_put_page_dic(struct page *page, bool in_task) { WARN_ON_ONCE(1); } -static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; } +static inline unsigned int f2fs_cluster_blocks_are_contiguous( + struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; } static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; } static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5db6740d31364..fcf22a50ff5db 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -850,21 +850,29 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && f2fs_sb_has_readonly(sbi)) { - unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn); + unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + unsigned int ofs_in_node = dn->ofs_in_node; + pgoff_t fofs = index; + unsigned int c_len; block_t blkaddr; + /* should align fofs and ofs_in_node to cluster_size */ + if (fofs % cluster_size) { + fofs = round_down(fofs, cluster_size); + ofs_in_node = round_down(ofs_in_node, cluster_size); + } + + c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node); if (!c_len) goto out; - blkaddr = f2fs_data_blkaddr(dn); + blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node); if (blkaddr == COMPRESS_ADDR) blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + 1); + ofs_in_node + 1); f2fs_update_read_extent_tree_range_compressed(dn->inode, - index, blkaddr, - F2FS_I(dn->inode)->i_cluster_size, - c_len); + fofs, blkaddr, cluster_size, c_len); } out: return 0; -- GitLab From 4d5e5a044b99c2c4e4b99262189f35efc862be3e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 18 Feb 2024 22:16:53 +0100 Subject: [PATCH 1111/2290] NFS: Fix an off by one in root_nfs_cat() [ Upstream commit 698ad1a538da0b6bf969cfee630b4e3a026afb87 ] The intent is to check if 'dest' is truncated or not. So, >= should be used instead of >, because strlcat() returns the length of 'dest' and 'src' excluding the trailing NULL. Fixes: 56463e50d1fc ("NFS: Use super.c for NFSROOT mount option parsing") Signed-off-by: Christophe JAILLET Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfsroot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 620329b7e6aeb..0b1c1d2e076c1 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -175,10 +175,10 @@ static int __init root_nfs_cat(char *dest, const char *src, size_t len = strlen(dest); if (len && dest[len - 1] != ',') - if (strlcat(dest, ",", destlen) > destlen) + if (strlcat(dest, ",", destlen) >= destlen) return -1; - if (strlcat(dest, src, destlen) > destlen) + if (strlcat(dest, src, destlen) >= destlen) return -1; return 0; } -- GitLab From 02b661956605839dd2d909fa08119288bc98b0ce Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 13 Jun 2023 15:51:57 +0800 Subject: [PATCH 1112/2290] f2fs: convert to use sbi directly [ Upstream commit c3355ea9d82fe6b1a4226c9a7d311f9c5715b456 ] F2FS_I_SB(inode) is redundant. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: b7d797d241c1 ("f2fs: compress: relocate some judgments in f2fs_reserve_compress_blocks") Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index be4cab941d299..c787a3f408ab3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3462,7 +3462,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) int ret; int writecount; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) @@ -3475,7 +3475,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3644,7 +3644,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) unsigned int reserved_blocks = 0; int ret; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) @@ -3660,7 +3660,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) goto out; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -4070,7 +4070,7 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) if (!f2fs_compressed_file(inode)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); @@ -4142,7 +4142,7 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) if (!f2fs_compressed_file(inode)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); -- GitLab From a4e063d67e9341fbc08990d3fef8f6f4c1206326 Mon Sep 17 00:00:00 2001 From: Xiuhong Wang Date: Wed, 6 Mar 2024 11:47:45 +0800 Subject: [PATCH 1113/2290] f2fs: compress: relocate some judgments in f2fs_reserve_compress_blocks [ Upstream commit b7d797d241c154d73ec5523f87f3b06d4f299da1 ] The following f2fs_io test will get a "0" result instead of -EINVAL, unisoc # ./f2fs_io compress file unisoc # ./f2fs_io reserve_cblocks file 0 it's not reasonable, so the judgement of atomic_read(&F2FS_I(inode)->i_compr_blocks) should be placed after the judgement of is_inode_flag_set(inode, FI_COMPRESS_RELEASED). Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Signed-off-by: Xiuhong Wang Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c787a3f408ab3..5dbd874ba3d8d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3657,9 +3657,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) - goto out; - f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3669,6 +3666,9 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } + if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) + goto unlock_inode; + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -3715,7 +3715,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } unlock_inode: inode_unlock(inode); -out: mnt_drop_write_file(filp); if (ret >= 0) { -- GitLab From 889846dfc8ee2cf31148a44bfd2faeb2faadc685 Mon Sep 17 00:00:00 2001 From: Xiuhong Wang Date: Wed, 6 Mar 2024 11:47:46 +0800 Subject: [PATCH 1114/2290] f2fs: compress: fix reserve_cblocks counting error when out of space [ Upstream commit 2f6d721e14b69d6e1251f69fa238b48e8374e25f ] When a file only needs one direct_node, performing the following operations will cause the file to be unrepairable: unisoc # ./f2fs_io compress test.apk unisoc #df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.2M 100% /data unisoc # ./f2fs_io release_cblocks test.apk 924 unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 4.8M 100% /data unisoc # dd if=/dev/random of=file4 bs=1M count=3 3145728 bytes (3.0 M) copied, 0.025 s, 120 M/s unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.8M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk F2FS_IOC_RESERVE_COMPRESS_BLOCKS failed: No space left on device adb reboot unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 11M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk 0 This is because the file has only one direct_node. After returning to -ENOSPC, reserved_blocks += ret will not be executed. As a result, the reserved_blocks at this time is still 0, which is not the real number of reserved blocks. Therefore, fsck cannot be set to repair the file. After this patch, the fsck flag will be set to fix this problem. unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.8M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk F2FS_IOC_RESERVE_COMPRESS_BLOCKS failed: No space left on device adb reboot then fsck will be executed unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 11M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk 924 Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Signed-off-by: Xiuhong Wang Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5dbd874ba3d8d..2fbc8d89c600b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3561,10 +3561,10 @@ out: return ret; } -static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, + unsigned int *reserved_blocks) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - unsigned int reserved_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; block_t blkaddr; int i; @@ -3628,12 +3628,12 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - reserved_blocks += reserved; + *reserved_blocks += reserved; next: count -= cluster_size; } - return reserved_blocks; + return 0; } static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) @@ -3694,7 +3694,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, F2FS_I(inode)->i_cluster_size); - ret = reserve_compress_blocks(&dn, count); + ret = reserve_compress_blocks(&dn, count, &reserved_blocks); f2fs_put_dnode(&dn); @@ -3702,13 +3702,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; page_idx += count; - reserved_blocks += ret; } filemap_invalidate_unlock(inode->i_mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (ret >= 0) { + if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, true); @@ -3717,7 +3716,7 @@ unlock_inode: inode_unlock(inode); mnt_drop_write_file(filp); - if (ret >= 0) { + if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && atomic_read(&F2FS_I(inode)->i_compr_blocks)) { -- GitLab From 4fd4a210de9dc4fcaafcc571893d41039875a699 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 29 Jan 2024 16:36:26 +0530 Subject: [PATCH 1115/2290] perf/x86/amd/core: Avoid register reset when CPU is dead [ Upstream commit ad8c91282c95f801c37812d59d2d9eba6899b384 ] When bringing a CPU online, some of the PMC and LBR related registers are reset. The same is done when a CPU is taken offline although that is unnecessary. This currently happens in the "cpu_dead" callback which is also incorrect as the callback runs on a control CPU instead of the one that is being taken offline. This also affects hibernation and suspend to RAM on some platforms as reported in the link below. Fixes: 21d59e3e2c40 ("perf/x86/amd/core: Detect PerfMonV2 support") Reported-by: Mario Limonciello Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/550a026764342cf7e5812680e3e2b91fe662b5ac.1706526029.git.sandipan.das@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 04f4b96dec6df..fd091b9dd7067 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,6 @@ static void amd_pmu_cpu_dead(int cpu) kfree(cpuhw->lbr_sel); cpuhw->lbr_sel = NULL; - amd_pmu_cpu_reset(cpu); if (!x86_pmu.amd_nb_constraints) return; -- GitLab From 76426abf9b980b46983f97de8e5b25047b4c9863 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 13 Mar 2024 11:08:41 +0000 Subject: [PATCH 1116/2290] afs: Revert "afs: Hide silly-rename files from userspace" [ Upstream commit 0aec3847d044273733285dcff90afda89ad461d2 ] This reverts commit 57e9d49c54528c49b8bffe6d99d782ea051ea534. This undoes the hiding of .__afsXXXX silly-rename files. The problem with hiding them is that rm can't then manually delete them. This also reverts commit 5f7a07646655fb4108da527565dcdc80124b14c4 ("afs: Fix endless loop in directory parsing") as that's a bugfix for the above. Fixes: 57e9d49c5452 ("afs: Hide silly-rename files from userspace") Reported-by: Markus Suvanto Link: https://lists.infradead.org/pipermail/linux-afs/2024-February/008102.html Signed-off-by: David Howells Link: https://lore.kernel.org/r/3085695.1710328121@warthog.procyon.org.uk Reviewed-by: Jeffrey E Altman cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/afs/dir.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 6e2c967fae6fc..07dc4ec73520c 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -473,16 +473,6 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, continue; } - /* Don't expose silly rename entries to userspace. */ - if (nlen > 6 && - dire->u.name[0] == '.' && - ctx->actor != afs_lookup_filldir && - ctx->actor != afs_lookup_one_filldir && - memcmp(dire->u.name, ".__afs", 6) == 0) { - ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); - continue; - } - /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, ntohl(dire->u.vnode), -- GitLab From 31db25e3141b20e2a76a9f219eeca52e3cab126c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Mar 2024 11:11:53 -0400 Subject: [PATCH 1117/2290] nfs: fix panic when nfs4_ff_layout_prepare_ds() fails [ Upstream commit 719fcafe07c12646691bd62d7f8d94d657fa0766 ] We've been seeing the following panic in production BUG: kernel NULL pointer dereference, address: 0000000000000065 PGD 2f485f067 P4D 2f485f067 PUD 2cc5d8067 PMD 0 RIP: 0010:ff_layout_cancel_io+0x3a/0x90 [nfs_layout_flexfiles] Call Trace: ? __die+0x78/0xc0 ? page_fault_oops+0x286/0x380 ? __rpc_execute+0x2c3/0x470 [sunrpc] ? rpc_new_task+0x42/0x1c0 [sunrpc] ? exc_page_fault+0x5d/0x110 ? asm_exc_page_fault+0x22/0x30 ? ff_layout_free_layoutreturn+0x110/0x110 [nfs_layout_flexfiles] ? ff_layout_cancel_io+0x3a/0x90 [nfs_layout_flexfiles] ? ff_layout_cancel_io+0x6f/0x90 [nfs_layout_flexfiles] pnfs_mark_matching_lsegs_return+0x1b0/0x360 [nfsv4] pnfs_error_mark_layout_for_return+0x9e/0x110 [nfsv4] ? ff_layout_send_layouterror+0x50/0x160 [nfs_layout_flexfiles] nfs4_ff_layout_prepare_ds+0x11f/0x290 [nfs_layout_flexfiles] ff_layout_pg_init_write+0xf0/0x1f0 [nfs_layout_flexfiles] __nfs_pageio_add_request+0x154/0x6c0 [nfs] nfs_pageio_add_request+0x26b/0x380 [nfs] nfs_do_writepage+0x111/0x1e0 [nfs] nfs_writepages_callback+0xf/0x30 [nfs] write_cache_pages+0x17f/0x380 ? nfs_pageio_init_write+0x50/0x50 [nfs] ? nfs_writepages+0x6d/0x210 [nfs] ? nfs_writepages+0x6d/0x210 [nfs] nfs_writepages+0x125/0x210 [nfs] do_writepages+0x67/0x220 ? generic_perform_write+0x14b/0x210 filemap_fdatawrite_wbc+0x5b/0x80 file_write_and_wait_range+0x6d/0xc0 nfs_file_fsync+0x81/0x170 [nfs] ? nfs_file_mmap+0x60/0x60 [nfs] __x64_sys_fsync+0x53/0x90 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Inspecting the core with drgn I was able to pull this >>> prog.crashed_thread().stack_trace()[0] #0 at 0xffffffffa079657a (ff_layout_cancel_io+0x3a/0x84) in ff_layout_cancel_io at fs/nfs/flexfilelayout/flexfilelayout.c:2021:27 >>> prog.crashed_thread().stack_trace()[0]['idx'] (u32)1 >>> prog.crashed_thread().stack_trace()[0]['flseg'].mirror_array[1].mirror_ds (struct nfs4_ff_layout_ds *)0xffffffffffffffed This is clear from the stack trace, we call nfs4_ff_layout_prepare_ds() which could error out initializing the mirror_ds, and then we go to clean it all up and our check is only for if (!mirror->mirror_ds). This is inconsistent with the rest of the users of mirror_ds, which have if (IS_ERR_OR_NULL(mirror_ds)) to keep from tripping over this exact scenario. Fix this up in ff_layout_cancel_io() to make sure we don't panic when we get an error. I also spot checked all the other instances of checking mirror_ds and we appear to be doing the correct checks everywhere, only unconditionally dereferencing mirror_ds when we know it would be valid. Signed-off-by: Josef Bacik Fixes: b739a5bd9d9f ("NFSv4/flexfiles: Cancel I/O if the layout is recalled or revoked") Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 81bbafab18a99..4376881be7918 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2016,7 +2016,7 @@ static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg) for (idx = 0; idx < flseg->mirror_array_cnt; idx++) { mirror = flseg->mirror_array[idx]; mirror_ds = mirror->mirror_ds; - if (!mirror_ds) + if (IS_ERR_OR_NULL(mirror_ds)) continue; ds = mirror->mirror_ds->ds; if (!ds) -- GitLab From b5a579ddf0bf52c0fe17ec85ea92644501125aba Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 1 Mar 2024 19:43:48 +0500 Subject: [PATCH 1118/2290] io_uring/net: correct the type of variable [ Upstream commit 86bcacc957fc2d0403aa0e652757eec59a5fd7ca ] The namelen is of type int. It shouldn't be made size_t which is unsigned. The signed number is needed for error checking before use. Fixes: c55978024d12 ("io_uring/net: move receive multishot out of the generic msghdr path") Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240301144349.2807544-1-usama.anjum@collabora.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 9fc0ffb0b6c12..0d4ee3d738fbf 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -516,7 +516,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) static int io_recvmsg_mshot_prep(struct io_kiocb *req, struct io_async_msghdr *iomsg, - size_t namelen, size_t controllen) + int namelen, size_t controllen) { if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { -- GitLab From 5c594bdbd5199f81c9064d53ebb82ac081c449ea Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 14 Feb 2024 10:07:25 +0000 Subject: [PATCH 1119/2290] comedi: comedi_test: Prevent timers rescheduling during deletion commit f53641a6e849034a44bf80f50245a75d7a376025 upstream. The comedi_test devices have a couple of timers (ai_timer and ao_timer) that can be started to simulate hardware interrupts. Their expiry functions normally reschedule the timer. The driver code calls either del_timer_sync() or del_timer() to delete the timers from the queue, but does not currently prevent the timers from rescheduling themselves so synchronized deletion may be ineffective. Add a couple of boolean members (one for each timer: ai_timer_enable and ao_timer_enable) to the device private data structure to indicate whether the timers are allowed to reschedule themselves. Set the member to true when adding the timer to the queue, and to false when deleting the timer from the queue in the waveform_ai_cancel() and waveform_ao_cancel() functions. The del_timer_sync() function is also called from the waveform_detach() function, but the timer enable members will already be set to false when that function is called, so no change is needed there. Fixes: 403fe7f34e33 ("staging: comedi: comedi_test: fix timer race conditions") Cc: stable@vger.kernel.org # 4.4+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20240214100747.16203-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/comedi_test.c | 30 ++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c index 0b5c0af1cebf0..626d53bf9146a 100644 --- a/drivers/comedi/drivers/comedi_test.c +++ b/drivers/comedi/drivers/comedi_test.c @@ -85,6 +85,8 @@ struct waveform_private { struct comedi_device *dev; /* parent comedi device */ u64 ao_last_scan_time; /* time of previous AO scan in usec */ unsigned int ao_scan_period; /* AO scan period in usec */ + bool ai_timer_enable:1; /* should AI timer be running? */ + bool ao_timer_enable:1; /* should AO timer be running? */ unsigned short ao_loopbacks[N_CHANS]; }; @@ -234,8 +236,12 @@ static void waveform_ai_timer(struct timer_list *t) time_increment = devpriv->ai_convert_time - now; else time_increment = 1; - mod_timer(&devpriv->ai_timer, - jiffies + usecs_to_jiffies(time_increment)); + spin_lock(&dev->spinlock); + if (devpriv->ai_timer_enable) { + mod_timer(&devpriv->ai_timer, + jiffies + usecs_to_jiffies(time_increment)); + } + spin_unlock(&dev->spinlock); } overrun: @@ -391,9 +397,12 @@ static int waveform_ai_cmd(struct comedi_device *dev, * Seem to need an extra jiffy here, otherwise timer expires slightly * early! */ + spin_lock_bh(&dev->spinlock); + devpriv->ai_timer_enable = true; devpriv->ai_timer.expires = jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1; add_timer(&devpriv->ai_timer); + spin_unlock_bh(&dev->spinlock); return 0; } @@ -402,6 +411,9 @@ static int waveform_ai_cancel(struct comedi_device *dev, { struct waveform_private *devpriv = dev->private; + spin_lock_bh(&dev->spinlock); + devpriv->ai_timer_enable = false; + spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ del_timer(&devpriv->ai_timer); @@ -493,8 +505,12 @@ static void waveform_ao_timer(struct timer_list *t) unsigned int time_inc = devpriv->ao_last_scan_time + devpriv->ao_scan_period - now; - mod_timer(&devpriv->ao_timer, - jiffies + usecs_to_jiffies(time_inc)); + spin_lock(&dev->spinlock); + if (devpriv->ao_timer_enable) { + mod_timer(&devpriv->ao_timer, + jiffies + usecs_to_jiffies(time_inc)); + } + spin_unlock(&dev->spinlock); } underrun: @@ -515,9 +531,12 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev, async->inttrig = NULL; devpriv->ao_last_scan_time = ktime_to_us(ktime_get()); + spin_lock_bh(&dev->spinlock); + devpriv->ao_timer_enable = true; devpriv->ao_timer.expires = jiffies + usecs_to_jiffies(devpriv->ao_scan_period); add_timer(&devpriv->ao_timer); + spin_unlock_bh(&dev->spinlock); return 1; } @@ -602,6 +621,9 @@ static int waveform_ao_cancel(struct comedi_device *dev, struct waveform_private *devpriv = dev->private; s->async->inttrig = NULL; + spin_lock_bh(&dev->spinlock); + devpriv->ao_timer_enable = false; + spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ del_timer(&devpriv->ao_timer); -- GitLab From 0e3aa17499641077c31cdf10bb1abb05454e5522 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Jun 2023 12:45:42 +0200 Subject: [PATCH 1120/2290] remoteproc: stm32: use correct format strings on 64-bit [ Upstream commit 03bd158e1535e68bcd2b1e095b0ebcad7c84bd20 ] With CONFIG_ARCH_STM32 making it into arch/arm64, a couple of format strings no longer work, since they rely on size_t being compatible with %x, or they print an 'int' using %z: drivers/remoteproc/stm32_rproc.c: In function 'stm32_rproc_mem_alloc': drivers/remoteproc/stm32_rproc.c:122:22: error: format '%x' expects argument of type 'unsigned int', but argument 5 has type 'size_t' {aka 'long unsigned int'} [-Werror=format=] drivers/remoteproc/stm32_rproc.c:122:40: note: format string is defined here 122 | dev_dbg(dev, "map memory: %pa+%x\n", &mem->dma, mem->len); | ~^ | | | unsigned int | %lx drivers/remoteproc/stm32_rproc.c:125:30: error: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'size_t' {aka 'long unsigned int'} [-Werror=format=] drivers/remoteproc/stm32_rproc.c:125:65: note: format string is defined here 125 | dev_err(dev, "Unable to map memory region: %pa+%x\n", | ~^ | | | unsigned int | %lx drivers/remoteproc/stm32_rproc.c: In function 'stm32_rproc_get_loaded_rsc_table': drivers/remoteproc/stm32_rproc.c:646:30: error: format '%zx' expects argument of type 'size_t', but argument 4 has type 'int' [-Werror=format=] drivers/remoteproc/stm32_rproc.c:646:66: note: format string is defined here 646 | dev_err(dev, "Unable to map memory region: %pa+%zx\n", | ~~^ | | | long unsigned int | %x Fix up all three instances to work across architectures, and enable compile testing for this driver to ensure it builds everywhere. Reviewed-by: Arnaud Pouliquen Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Arnd Bergmann Stable-dep-of: 32381bbccba4 ("remoteproc: stm32: Fix incorrect type in assignment for va") Signed-off-by: Sasha Levin --- drivers/remoteproc/Kconfig | 2 +- drivers/remoteproc/stm32_rproc.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 1660197866531..d93113b6ffaa1 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -313,7 +313,7 @@ config ST_SLIM_REMOTEPROC config STM32_RPROC tristate "STM32 remoteproc support" - depends on ARCH_STM32 + depends on ARCH_STM32 || COMPILE_TEST depends on REMOTEPROC select MAILBOX help diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 8746cbb1f168d..e432febf4337b 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -118,10 +118,10 @@ static int stm32_rproc_mem_alloc(struct rproc *rproc, struct device *dev = rproc->dev.parent; void *va; - dev_dbg(dev, "map memory: %pa+%x\n", &mem->dma, mem->len); + dev_dbg(dev, "map memory: %pad+%zx\n", &mem->dma, mem->len); va = ioremap_wc(mem->dma, mem->len); if (IS_ERR_OR_NULL(va)) { - dev_err(dev, "Unable to map memory region: %pa+%x\n", + dev_err(dev, "Unable to map memory region: %pad+0x%zx\n", &mem->dma, mem->len); return -ENOMEM; } @@ -627,7 +627,7 @@ stm32_rproc_get_loaded_rsc_table(struct rproc *rproc, size_t *table_sz) ddata->rsc_va = devm_ioremap_wc(dev, rsc_pa, RSC_TBL_SIZE); if (IS_ERR_OR_NULL(ddata->rsc_va)) { - dev_err(dev, "Unable to map memory region: %pa+%zx\n", + dev_err(dev, "Unable to map memory region: %pa+%x\n", &rsc_pa, RSC_TBL_SIZE); ddata->rsc_va = NULL; return ERR_PTR(-ENOMEM); -- GitLab From b4f425956362fe0a4de7216f34bbfc192a0fdc6a Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 17 Jan 2024 14:53:11 +0100 Subject: [PATCH 1121/2290] remoteproc: stm32: Fix incorrect type in assignment for va [ Upstream commit 32381bbccba4c21145c571701f8f7fb1d9b3a92e ] The sparse tool complains about the attribute conversion between a _iomem void * and a void *: stm32_rproc.c:122:12: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void *va @@ got void [noderef] __iomem * @@ stm32_rproc.c:122:12: sparse: expected void *va stm32_rproc.c:122:12: sparse: got void [noderef] __iomem * Add '__force' to explicitly specify that the cast is intentional. This conversion is necessary to cast to virtual addresses pointer,used, by the remoteproc core. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312150052.HCiNKlqB-lkp@intel.com/ Fixes: 13140de09cc2 ("remoteproc: stm32: add an ST stm32_rproc driver") Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20240117135312.3381936-2-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/stm32_rproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index e432febf4337b..722cf1cdc2cb0 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -119,7 +119,7 @@ static int stm32_rproc_mem_alloc(struct rproc *rproc, void *va; dev_dbg(dev, "map memory: %pad+%zx\n", &mem->dma, mem->len); - va = ioremap_wc(mem->dma, mem->len); + va = (__force void *)ioremap_wc(mem->dma, mem->len); if (IS_ERR_OR_NULL(va)) { dev_err(dev, "Unable to map memory region: %pad+0x%zx\n", &mem->dma, mem->len); @@ -136,7 +136,7 @@ static int stm32_rproc_mem_release(struct rproc *rproc, struct rproc_mem_entry *mem) { dev_dbg(rproc->dev.parent, "unmap memory: %pa\n", &mem->dma); - iounmap(mem->va); + iounmap((__force __iomem void *)mem->va); return 0; } -- GitLab From e660319fc67c50a9310873fde832f1c04b5a7bc9 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 17 Jan 2024 14:53:12 +0100 Subject: [PATCH 1122/2290] remoteproc: stm32: Fix incorrect type assignment returned by stm32_rproc_get_loaded_rsc_tablef [ Upstream commit c77b35ce66af25bdd6fde60b62e35b9b316ea5c2 ] The sparse tool complains about the remove of the _iomem attribute. stm32_rproc.c:660:17: warning: cast removes address space '__iomem' of expression Add '__force' to explicitly specify that the cast is intentional. This conversion is necessary to cast to addresses pointer, which are then managed by the remoteproc core as a pointer to a resource_table structure. Fixes: 8a471396d21c ("remoteproc: stm32: Move resource table setup to rproc_ops") Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20240117135312.3381936-3-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/stm32_rproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 722cf1cdc2cb0..385e931603ad3 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -641,7 +641,7 @@ done: * entire area by overwriting it with the initial values stored in rproc->clean_table. */ *table_sz = RSC_TBL_SIZE; - return (struct resource_table *)ddata->rsc_va; + return (__force struct resource_table *)ddata->rsc_va; } static const struct rproc_ops st_rproc_ops = { -- GitLab From 22ae3d106391b51234167d9eed4dbfc37a366583 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 23 Jan 2024 17:51:09 -0500 Subject: [PATCH 1123/2290] usb: phy: generic: Get the vbus supply [ Upstream commit 75fd6485cccef269ac9eb3b71cf56753341195ef ] While support for working with a vbus was added, the regulator was never actually gotten (despite what was documented). Fix this by actually getting the supply from the device tree. Fixes: 7acc9973e3c4 ("usb: phy: generic: add vbus support") Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20240123225111.1629405-3-sean.anderson@seco.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/phy/phy-generic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 3dc5c04e7cbf9..953df04b40d40 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -265,6 +265,13 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return -EPROBE_DEFER; } + nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); + if (PTR_ERR(nop->vbus_draw) == -ENODEV) + nop->vbus_draw = NULL; + if (IS_ERR(nop->vbus_draw)) + return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), + "could not get vbus regulator\n"); + nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); if (PTR_ERR(nop->vbus_draw) == -ENODEV) nop->vbus_draw = NULL; -- GitLab From 52af9897e787bdaffa839f5d6fb535448a3c559a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 22 Jan 2024 12:03:17 +0100 Subject: [PATCH 1124/2290] tty: vt: fix 20 vs 0x20 typo in EScsiignore [ Upstream commit 0e6a92f67c8a94707f7bb27ac29e2bdf3e7c167d ] The if (c >= 20 && c <= 0x3f) test added in commit 7a99565f8732 is wrong. 20 is DC4 in ascii and it makes no sense to consider that as the bottom limit. Instead, it should be 0x20 as in the other test in the commit above. This is supposed to NOT change anything as we handle interesting 20-0x20 asciis far before this if. So for sakeness, change to 0x20 (which is SPACE). Signed-off-by: "Jiri Slaby (SUSE)" Fixes: 7a99565f8732 ("vt: ignore csi sequences with intermediate characters.") Cc: Martin Hostettler Link: https://lore.kernel.org/all/ZaP45QY2WEsDqoxg@neutronstar.dyndns.org/ Tested-by: Helge Deller # parisc STI console Link: https://lore.kernel.org/r/20240122110401.7289-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 981d2bfcf9a5b..9e30ef2b6eb8c 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2515,7 +2515,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } return; case EScsiignore: - if (c >= 20 && c <= 0x3f) + if (c >= 0x20 && c <= 0x3f) return; vc->vc_state = ESnormal; return; -- GitLab From e955764b41676ebaba44b478f4e7e2afb43a0140 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 18 Jan 2024 10:22:01 -0500 Subject: [PATCH 1125/2290] serial: max310x: fix syntax error in IRQ error message [ Upstream commit 8ede8c6f474255b2213cccd7997b993272a8e2f9 ] Replace g with q. Helpful when grepping thru source code or logs for "request" keyword. Fixes: f65444187a66 ("serial: New serial driver MAX310X") Reviewed-by: Andy Shevchenko Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240118152213.2644269-6-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 163a89f84c9c2..444f89eb2d4b7 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1459,7 +1459,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty if (!ret) return 0; - dev_err(dev, "Unable to reguest IRQ %i\n", irq); + dev_err(dev, "Unable to request IRQ %i\n", irq); out_uart: for (i = 0; i < devtype->nr; i++) { -- GitLab From ad7362db2fd718f2c9d12eefa5cb50a0376fc328 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 19 Jan 2024 10:45:08 +0000 Subject: [PATCH 1126/2290] tty: serial: samsung: fix tx_empty() to return TIOCSER_TEMT [ Upstream commit 314c2b399288f0058a8c5b6683292cbde5f1531b ] The core expects for tx_empty() either TIOCSER_TEMT when the tx is empty or 0 otherwise. s3c24xx_serial_txempty_nofifo() might return 0x4, and at least uart_get_lsr_info() tries to clear exactly TIOCSER_TEMT (BIT(1)). Fix tx_empty() to return TIOCSER_TEMT. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Tudor Ambarus Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20240119104526.1221243-2-tudor.ambarus@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/samsung_tty.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index aa2c51b84116f..589daed19e625 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -996,11 +996,10 @@ static unsigned int s3c24xx_serial_tx_empty(struct uart_port *port) if ((ufstat & info->tx_fifomask) != 0 || (ufstat & info->tx_fifofull)) return 0; - - return 1; + return TIOCSER_TEMT; } - return s3c24xx_serial_txempty_nofifo(port); + return s3c24xx_serial_txempty_nofifo(port) ? TIOCSER_TEMT : 0; } /* no modem control lines */ -- GitLab From 8aa68d9fc22cca4ea6dab7eac2639f090811c248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 11 Jan 2024 12:56:36 +0100 Subject: [PATCH 1127/2290] arm64: dts: broadcom: bcmbca: bcm4908: drop invalid switch cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 27058b95fbb784406ea4c40b20caa3f04937140c ] Ethernet switch does not have addressable subnodes. This fixes: arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dtb: ethernet-switch@0: '#address-cells', '#size-cells' do not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/net/dsa/brcm,sf2.yaml# Fixes: 527a3ac9bdf8 ("arm64: dts: broadcom: bcm4908: describe internal switch") Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20240111115636.12095-1-zajec5@gmail.com Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi index df71348542064..a4c5a38905b03 100644 --- a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi @@ -180,9 +180,6 @@ brcm,num-gphy = <5>; brcm,num-rgmii-ports = <2>; - #address-cells = <1>; - #size-cells = <0>; - ports: ports { #address-cells = <1>; #size-cells = <0>; -- GitLab From c609ce8b3c6fcc3a89184727ae6375d27136528f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 3 Feb 2024 00:57:59 +0900 Subject: [PATCH 1128/2290] kconfig: fix infinite loop when expanding a macro at the end of file [ Upstream commit af8bbce92044dc58e4cc039ab94ee5d470a621f5 ] A macro placed at the end of a file with no newline causes an infinite loop. [Test Kconfig] $(info,hello) \ No newline at end of file I realized that flex-provided input() returns 0 instead of EOF when it reaches the end of a file. Fixes: 104daea149c4 ("kconfig: reference environment variables directly and remove 'option env='") Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/lexer.l | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l index cc386e4436834..2c2b3e6f248ca 100644 --- a/scripts/kconfig/lexer.l +++ b/scripts/kconfig/lexer.l @@ -302,8 +302,11 @@ static char *expand_token(const char *in, size_t n) new_string(); append_string(in, n); - /* get the whole line because we do not know the end of token. */ - while ((c = input()) != EOF) { + /* + * get the whole line because we do not know the end of token. + * input() returns 0 (not EOF!) when it reachs the end of file. + */ + while ((c = input()) != 0) { if (c == '\n') { unput(c); break; -- GitLab From bc493a56abf85a264544b8c10b30838bcb3f8613 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 8 Jan 2024 12:19:06 +0000 Subject: [PATCH 1129/2290] hwtracing: hisi_ptt: Move type check to the beginning of hisi_ptt_pmu_event_init() [ Upstream commit 06226d120a28f146abd3637799958a4dc4dbb7a1 ] When perf_init_event() calls perf_try_init_event() to init pmu driver, searches for the next pmu driver only when the return value is -ENOENT. Therefore, hisi_ptt_pmu_event_init() needs to check the type at the beginning of the function. Otherwise, in the case of perf-task mode, perf_try_init_event() returns -EOPNOTSUPP and skips subsequent pmu drivers, causes perf_init_event() to fail. Fixes: ff0de066b463 ("hwtracing: hisi_ptt: Add trace function support for HiSilicon PCIe Tune and Trace device") Signed-off-by: Yang Jihong Reviewed-by: Yicong Yang Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240108121906.3514820-1-yangjihong1@huawei.com Signed-off-by: Sasha Levin --- drivers/hwtracing/ptt/hisi_ptt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index 8d8fa8e8afe04..20a9cddb3723a 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -654,6 +654,9 @@ static int hisi_ptt_pmu_event_init(struct perf_event *event) int ret; u32 val; + if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type) + return -ENOENT; + if (event->cpu < 0) { dev_dbg(event->pmu->dev, "Per-task mode not supported\n"); return -EOPNOTSUPP; @@ -662,9 +665,6 @@ static int hisi_ptt_pmu_event_init(struct perf_event *event) if (event->attach_state & PERF_ATTACH_TASK) return -EOPNOTSUPP; - if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type) - return -ENOENT; - ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config); if (ret < 0) return ret; -- GitLab From bea2dfd5fa3f7a5d1ff8de5599efe846427fdbcd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 Feb 2024 21:02:58 -0800 Subject: [PATCH 1130/2290] rtc: mt6397: select IRQ_DOMAIN instead of depending on it [ Upstream commit 544c42f798e1651dcb04fb0395219bf0f1c2607e ] IRQ_DOMAIN is a hidden (not user visible) symbol. Users cannot set it directly thru "make *config", so drivers should select it instead of depending on it if they need it. Relying on it being set for a dependency is risky. Consistently using "select" or "depends on" can also help reduce Kconfig circular dependency issues. Therefore, change the use of "depends on" for IRQ_DOMAIN to "select" for RTC_DRV_MT6397. Fixes: 04d3ba70a3c9 ("rtc: mt6397: add IRQ domain dependency") Cc: Arnd Bergmann Cc: Eddie Huang Cc: Sean Wang Cc: Matthias Brugger Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mediatek@lists.infradead.org Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: linux-rtc@vger.kernel.org Cc: Marc Zyngier Cc: Philipp Zabel Cc: Peter Rosin Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20240213050258.6167-1-rdunlap@infradead.org Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index bb63edb507da4..87dc050ca004c 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1843,7 +1843,8 @@ config RTC_DRV_MT2712 config RTC_DRV_MT6397 tristate "MediaTek PMIC based RTC" - depends on MFD_MT6397 || (COMPILE_TEST && IRQ_DOMAIN) + depends on MFD_MT6397 || COMPILE_TEST + select IRQ_DOMAIN help This selects the MediaTek(R) RTC driver. RTC is part of MediaTek MT6397 PMIC. You should enable MT6397 PMIC MFD before select -- GitLab From d2b48ecc760ae540b298bcf88a050c8bceef9662 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Feb 2024 17:04:57 +0200 Subject: [PATCH 1131/2290] serial: 8250_exar: Don't remove GPIO device on suspend [ Upstream commit 73b5a5c00be39e23b194bad10e1ea8bb73eee176 ] It seems a copy&paste mistake that suspend callback removes the GPIO device. There is no counterpart of this action, means once suspended there is no more GPIO device available untile full unbind-bind cycle is performed. Remove suspicious GPIO device removal in suspend. Fixes: d0aeaa83f0b0 ("serial: exar: split out the exar code from 8250_pci") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240219150627.2101198-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_exar.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index dca1abe363248..55451ff846520 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -714,6 +714,7 @@ static void exar_pci_remove(struct pci_dev *pcidev) for (i = 0; i < priv->nr; i++) serial8250_unregister_port(priv->line[i]); + /* Ensure that every init quirk is properly torn down */ if (priv->board->exit) priv->board->exit(pcidev); } @@ -728,10 +729,6 @@ static int __maybe_unused exar_suspend(struct device *dev) if (priv->line[i] >= 0) serial8250_suspend_port(priv->line[i]); - /* Ensure that every init quirk is properly torn down */ - if (priv->board->exit) - priv->board->exit(pcidev); - return 0; } -- GitLab From 1d4ff6a8f85e78c1a8dec849e781f58162dbabbe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Mar 2024 10:04:48 +0300 Subject: [PATCH 1132/2290] staging: greybus: fix get_channel_from_mode() failure path [ Upstream commit 34164202a5827f60a203ca9acaf2d9f7d432aac8 ] The get_channel_from_mode() function is supposed to return the channel which matches the mode. But it has a bug where if it doesn't find a matching channel then it returns the last channel. It should return NULL instead. Also remove an unnecessary NULL check on "channel". Fixes: 2870b52bae4c ("greybus: lights: add lights implementation") Signed-off-by: Dan Carpenter Reviewed-by: Rui Miguel Silva Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/379c0cb4-39e0-4293-8a18-c7b1298e5420@moroto.mountain Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/greybus/light.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c index 87d36948c6106..c6bd86a5335ab 100644 --- a/drivers/staging/greybus/light.c +++ b/drivers/staging/greybus/light.c @@ -100,15 +100,15 @@ static struct led_classdev *get_channel_cdev(struct gb_channel *channel) static struct gb_channel *get_channel_from_mode(struct gb_light *light, u32 mode) { - struct gb_channel *channel = NULL; + struct gb_channel *channel; int i; for (i = 0; i < light->channels_count; i++) { channel = &light->channels[i]; - if (channel && channel->mode == mode) - break; + if (channel->mode == mode) + return channel; } - return channel; + return NULL; } static int __gb_lights_flash_intensity_set(struct gb_channel *channel, -- GitLab From 2ca629b90dc9a0b3d6c1125b905fd0e2c886a1f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Mar 2024 18:17:34 +0000 Subject: [PATCH 1133/2290] usb: gadget: net2272: Use irqflags in the call to net2272_probe_fin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 600556809f04eb3bbccd05218215dcd7b285a9a9 ] Currently the variable irqflags is being set but is not being used, it appears it should be used in the call to net2272_probe_fin rather than IRQF_TRIGGER_LOW being used. Kudos to Uwe Kleine-König for suggesting the fix. Cleans up clang scan build warning: drivers/usb/gadget/udc/net2272.c:2610:15: warning: variable 'irqflags' set but not used [-Wunused-but-set-variable] Fixes: ceb80363b2ec ("USB: net2272: driver for PLX NET2272 USB device controller") Signed-off-by: Colin Ian King Acked-by: Alan Stern Link: https://lore.kernel.org/r/20240307181734.2034407-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/net2272.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 538c1b9a28835..c42d5aa99e81a 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -2650,7 +2650,7 @@ net2272_plat_probe(struct platform_device *pdev) goto err_req; } - ret = net2272_probe_fin(dev, IRQF_TRIGGER_LOW); + ret = net2272_probe_fin(dev, irqflags); if (ret) goto err_io; -- GitLab From d23e49f4e4c1dc6fc74a2007c58d16ce0f2431b6 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Tue, 5 Mar 2024 15:36:28 +0100 Subject: [PATCH 1134/2290] ASoC: rockchip: i2s-tdm: Fix inaccurate sampling rates [ Upstream commit 9e2ab4b18ebd46813fc3459207335af4d368e323 ] The sample rates set by the rockchip_i2s_tdm driver in master mode are inaccurate up to 5% in several cases, due to the driver logic to configure clocks and a nasty interaction with the Common Clock Framework. To understand what happens, here is the relevant section of the clock tree (slightly simplified), along with the names used in the driver: vpll0 _OR_ vpll1 "mclk_root" clk_i2s2_8ch_tx_src "mclk_parent" clk_i2s2_8ch_tx_mux clk_i2s2_8ch_tx "mclk" or "mclk_tx" This is what happens when playing back e.g. at 192 kHz using audio-graph-card (when recording the same applies, only s/tx/rx/): 0. at probe, rockchip_i2s_tdm_set_sysclk() stores the passed frequency in i2s_tdm->mclk_tx_freq (*) which is 50176000, and that is never modified afterwards 1. when playback is started, rockchip_i2s_tdm_hw_params() is called and does the following two calls 2. rockchip_i2s_tdm_calibrate_mclk(): 2a. selects mclk_root0 (vpll0) as a parent for mclk_parent (mclk_tx_src), which is OK because the vpll0 rate is a good for 192000 (and sumbultiple) rates 2b. sets the mclk_root frequency based on ppm calibration computations 2c. sets mclk_tx_src to 49152000 (= 256 * 192000), which is also OK as it is a multiple of the required bit clock 3. rockchip_i2s_tdm_set_mclk() 3a. calls clk_set_rate() to set the rate of mclk_tx (clk_i2s2_8ch_tx) to the value of i2s_tdm->mclk_tx_freq (*), i.e. 50176000 which is not a multiple of the sampling frequency -- this is not OK 3a1. clk_set_rate() reacts by reparenting clk_i2s2_8ch_tx_src to vpll1 -- this is not OK because the default vpll1 rate can be divided to get 44.1 kHz and related rates, not 192 kHz The result is that the driver does a lot of ad-hoc decisions about clocks and ends up in using the wrong parent at an unoptimal rate. Step 0 is one part of the problem: unless the card driver calls set_sysclk at each stream start, whatever rate is set in mclk_tx_freq during boot will be taken and used until reboot. Moreover the driver does not care if its value is not a multiple of any audio frequency. Another part of the problem is that the whole reparenting and clock rate setting logic is conflicting with the CCF algorithms to achieve largely the same goal: selecting the best parent and setting the closest clock rate. And it turns out that only calling once clk_set_rate() on clk_i2s2_8ch_tx picks the correct vpll and sets the correct rate. The fix is based on removing the custom logic in the driver to select the parent and set the various clocks, and just let the Clock Framework do it all. As a side effect, the set_sysclk() op becomes useless because we now let the CCF compute the appropriate value for the sampling rate. It also implies that the whole calibration logic is now dead code and so it is removed along with the "PCM Clock Compensation in PPM" kcontrol, which has always been broken anyway. The handling of the 4 optional clocks also becomes dead code and is removed. The actual rates have been tested playing 30 seconds of audio at various sampling rates before and after this change using sox: time play -r -n synth 30 sine 950 gain -3 The time reported in the table below is the 'real' value reported by the 'time' command in the above command line. rate before after --------- ------ ------ 8000 Hz 30.60s 30.63s 11025 Hz 30.45s 30.51s 16000 Hz 30.47s 30.50s 22050 Hz 30.78s 30.41s 32000 Hz 31.02s 30.43s 44100 Hz 30.78s 30.41s 48000 Hz 29.81s 30.45s 88200 Hz 30.78s 30.41s 96000 Hz 29.79s 30.42s 176400 Hz 27.40s 30.41s 192000 Hz 29.79s 30.42s While the tests are running the clock tree confirms that: * without the patch, vpll1 is always used and clk_i2s2_8ch_tx always produces 50176000 Hz, which cannot be divided for most audio rates except the slowest ones, generating inaccurate rates * with the patch: - for 192000 Hz vpll0 is used - for 176400 Hz vpll1 is used - clk_i2s2_8ch_tx always produces (256 * ) Hz Tested on the RK3308 using the internal audio codec. Fixes: 081068fd6414 ("ASoC: rockchip: add support for i2s-tdm controller") Signed-off-by: Luca Ceresoli Link: https://msgid.link/r/20240305-rk3308-audio-codec-v4-1-312acdbe628f@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/rockchip/rockchip_i2s_tdm.c | 352 +------------------------- 1 file changed, 6 insertions(+), 346 deletions(-) diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 2550bd2a5e78c..2e36a97077b99 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -27,8 +27,6 @@ #define DEFAULT_MCLK_FS 256 #define CH_GRP_MAX 4 /* The max channel 8 / 2 */ #define MULTIPLEX_CH_MAX 10 -#define CLK_PPM_MIN -1000 -#define CLK_PPM_MAX 1000 #define TRCM_TXRX 0 #define TRCM_TX 1 @@ -55,20 +53,6 @@ struct rk_i2s_tdm_dev { struct clk *hclk; struct clk *mclk_tx; struct clk *mclk_rx; - /* The mclk_tx_src is parent of mclk_tx */ - struct clk *mclk_tx_src; - /* The mclk_rx_src is parent of mclk_rx */ - struct clk *mclk_rx_src; - /* - * The mclk_root0 and mclk_root1 are root parent and supplies for - * the different FS. - * - * e.g: - * mclk_root0 is VPLL0, used for FS=48000Hz - * mclk_root1 is VPLL1, used for FS=44100Hz - */ - struct clk *mclk_root0; - struct clk *mclk_root1; struct regmap *regmap; struct regmap *grf; struct snd_dmaengine_dai_dma_data capture_dma_data; @@ -78,19 +62,11 @@ struct rk_i2s_tdm_dev { struct rk_i2s_soc_data *soc_data; bool is_master_mode; bool io_multiplex; - bool mclk_calibrate; bool tdm_mode; - unsigned int mclk_rx_freq; - unsigned int mclk_tx_freq; - unsigned int mclk_root0_freq; - unsigned int mclk_root1_freq; - unsigned int mclk_root0_initial_freq; - unsigned int mclk_root1_initial_freq; unsigned int frame_width; unsigned int clk_trcm; unsigned int i2s_sdis[CH_GRP_MAX]; unsigned int i2s_sdos[CH_GRP_MAX]; - int clk_ppm; int refcount; spinlock_t lock; /* xfer lock */ bool has_playback; @@ -116,12 +92,6 @@ static void i2s_tdm_disable_unprepare_mclk(struct rk_i2s_tdm_dev *i2s_tdm) { clk_disable_unprepare(i2s_tdm->mclk_tx); clk_disable_unprepare(i2s_tdm->mclk_rx); - if (i2s_tdm->mclk_calibrate) { - clk_disable_unprepare(i2s_tdm->mclk_tx_src); - clk_disable_unprepare(i2s_tdm->mclk_rx_src); - clk_disable_unprepare(i2s_tdm->mclk_root0); - clk_disable_unprepare(i2s_tdm->mclk_root1); - } } /** @@ -144,29 +114,9 @@ static int i2s_tdm_prepare_enable_mclk(struct rk_i2s_tdm_dev *i2s_tdm) ret = clk_prepare_enable(i2s_tdm->mclk_rx); if (ret) goto err_mclk_rx; - if (i2s_tdm->mclk_calibrate) { - ret = clk_prepare_enable(i2s_tdm->mclk_tx_src); - if (ret) - goto err_mclk_rx; - ret = clk_prepare_enable(i2s_tdm->mclk_rx_src); - if (ret) - goto err_mclk_rx_src; - ret = clk_prepare_enable(i2s_tdm->mclk_root0); - if (ret) - goto err_mclk_root0; - ret = clk_prepare_enable(i2s_tdm->mclk_root1); - if (ret) - goto err_mclk_root1; - } return 0; -err_mclk_root1: - clk_disable_unprepare(i2s_tdm->mclk_root0); -err_mclk_root0: - clk_disable_unprepare(i2s_tdm->mclk_rx_src); -err_mclk_rx_src: - clk_disable_unprepare(i2s_tdm->mclk_tx_src); err_mclk_rx: clk_disable_unprepare(i2s_tdm->mclk_tx); err_mclk_tx: @@ -566,159 +516,6 @@ static void rockchip_i2s_tdm_xfer_resume(struct snd_pcm_substream *substream, I2S_XFER_RXS_START); } -static int rockchip_i2s_tdm_clk_set_rate(struct rk_i2s_tdm_dev *i2s_tdm, - struct clk *clk, unsigned long rate, - int ppm) -{ - unsigned long rate_target; - int delta, ret; - - if (ppm == i2s_tdm->clk_ppm) - return 0; - - if (ppm < 0) - delta = -1; - else - delta = 1; - - delta *= (int)div64_u64((u64)rate * (u64)abs(ppm) + 500000, - 1000000); - - rate_target = rate + delta; - - if (!rate_target) - return -EINVAL; - - ret = clk_set_rate(clk, rate_target); - if (ret) - return ret; - - i2s_tdm->clk_ppm = ppm; - - return 0; -} - -static int rockchip_i2s_tdm_calibrate_mclk(struct rk_i2s_tdm_dev *i2s_tdm, - struct snd_pcm_substream *substream, - unsigned int lrck_freq) -{ - struct clk *mclk_root; - struct clk *mclk_parent; - unsigned int mclk_root_freq; - unsigned int mclk_root_initial_freq; - unsigned int mclk_parent_freq; - unsigned int div, delta; - u64 ppm; - int ret; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - mclk_parent = i2s_tdm->mclk_tx_src; - else - mclk_parent = i2s_tdm->mclk_rx_src; - - switch (lrck_freq) { - case 8000: - case 16000: - case 24000: - case 32000: - case 48000: - case 64000: - case 96000: - case 192000: - mclk_root = i2s_tdm->mclk_root0; - mclk_root_freq = i2s_tdm->mclk_root0_freq; - mclk_root_initial_freq = i2s_tdm->mclk_root0_initial_freq; - mclk_parent_freq = DEFAULT_MCLK_FS * 192000; - break; - case 11025: - case 22050: - case 44100: - case 88200: - case 176400: - mclk_root = i2s_tdm->mclk_root1; - mclk_root_freq = i2s_tdm->mclk_root1_freq; - mclk_root_initial_freq = i2s_tdm->mclk_root1_initial_freq; - mclk_parent_freq = DEFAULT_MCLK_FS * 176400; - break; - default: - dev_err(i2s_tdm->dev, "Invalid LRCK frequency: %u Hz\n", - lrck_freq); - return -EINVAL; - } - - ret = clk_set_parent(mclk_parent, mclk_root); - if (ret) - return ret; - - ret = rockchip_i2s_tdm_clk_set_rate(i2s_tdm, mclk_root, - mclk_root_freq, 0); - if (ret) - return ret; - - delta = abs(mclk_root_freq % mclk_parent_freq - mclk_parent_freq); - ppm = div64_u64((uint64_t)delta * 1000000, (uint64_t)mclk_root_freq); - - if (ppm) { - div = DIV_ROUND_CLOSEST(mclk_root_initial_freq, mclk_parent_freq); - if (!div) - return -EINVAL; - - mclk_root_freq = mclk_parent_freq * round_up(div, 2); - - ret = clk_set_rate(mclk_root, mclk_root_freq); - if (ret) - return ret; - - i2s_tdm->mclk_root0_freq = clk_get_rate(i2s_tdm->mclk_root0); - i2s_tdm->mclk_root1_freq = clk_get_rate(i2s_tdm->mclk_root1); - } - - return clk_set_rate(mclk_parent, mclk_parent_freq); -} - -static int rockchip_i2s_tdm_set_mclk(struct rk_i2s_tdm_dev *i2s_tdm, - struct snd_pcm_substream *substream, - struct clk **mclk) -{ - unsigned int mclk_freq; - int ret; - - if (i2s_tdm->clk_trcm) { - if (i2s_tdm->mclk_tx_freq != i2s_tdm->mclk_rx_freq) { - dev_err(i2s_tdm->dev, - "clk_trcm, tx: %d and rx: %d should be the same\n", - i2s_tdm->mclk_tx_freq, - i2s_tdm->mclk_rx_freq); - return -EINVAL; - } - - ret = clk_set_rate(i2s_tdm->mclk_tx, i2s_tdm->mclk_tx_freq); - if (ret) - return ret; - - ret = clk_set_rate(i2s_tdm->mclk_rx, i2s_tdm->mclk_rx_freq); - if (ret) - return ret; - - /* mclk_rx is also ok. */ - *mclk = i2s_tdm->mclk_tx; - } else { - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - *mclk = i2s_tdm->mclk_tx; - mclk_freq = i2s_tdm->mclk_tx_freq; - } else { - *mclk = i2s_tdm->mclk_rx; - mclk_freq = i2s_tdm->mclk_rx_freq; - } - - ret = clk_set_rate(*mclk, mclk_freq); - if (ret) - return ret; - } - - return 0; -} - static int rockchip_i2s_ch_to_io(unsigned int ch, bool substream_capture) { if (substream_capture) { @@ -849,19 +646,17 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct rk_i2s_tdm_dev *i2s_tdm = to_info(dai); - struct clk *mclk; - int ret = 0; unsigned int val = 0; unsigned int mclk_rate, bclk_rate, div_bclk = 4, div_lrck = 64; + int err; if (i2s_tdm->is_master_mode) { - if (i2s_tdm->mclk_calibrate) - rockchip_i2s_tdm_calibrate_mclk(i2s_tdm, substream, - params_rate(params)); + struct clk *mclk = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? + i2s_tdm->mclk_tx : i2s_tdm->mclk_rx; - ret = rockchip_i2s_tdm_set_mclk(i2s_tdm, substream, &mclk); - if (ret) - return ret; + err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params)); + if (err) + return err; mclk_rate = clk_get_rate(mclk); bclk_rate = i2s_tdm->frame_width * params_rate(params); @@ -969,96 +764,6 @@ static int rockchip_i2s_tdm_trigger(struct snd_pcm_substream *substream, return 0; } -static int rockchip_i2s_tdm_set_sysclk(struct snd_soc_dai *cpu_dai, int stream, - unsigned int freq, int dir) -{ - struct rk_i2s_tdm_dev *i2s_tdm = to_info(cpu_dai); - - /* Put set mclk rate into rockchip_i2s_tdm_set_mclk() */ - if (i2s_tdm->clk_trcm) { - i2s_tdm->mclk_tx_freq = freq; - i2s_tdm->mclk_rx_freq = freq; - } else { - if (stream == SNDRV_PCM_STREAM_PLAYBACK) - i2s_tdm->mclk_tx_freq = freq; - else - i2s_tdm->mclk_rx_freq = freq; - } - - dev_dbg(i2s_tdm->dev, "The target mclk_%s freq is: %d\n", - stream ? "rx" : "tx", freq); - - return 0; -} - -static int rockchip_i2s_tdm_clk_compensation_info(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) -{ - uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->count = 1; - uinfo->value.integer.min = CLK_PPM_MIN; - uinfo->value.integer.max = CLK_PPM_MAX; - uinfo->value.integer.step = 1; - - return 0; -} - -static int rockchip_i2s_tdm_clk_compensation_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); - struct rk_i2s_tdm_dev *i2s_tdm = snd_soc_dai_get_drvdata(dai); - - ucontrol->value.integer.value[0] = i2s_tdm->clk_ppm; - - return 0; -} - -static int rockchip_i2s_tdm_clk_compensation_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); - struct rk_i2s_tdm_dev *i2s_tdm = snd_soc_dai_get_drvdata(dai); - int ret = 0, ppm = 0; - int changed = 0; - unsigned long old_rate; - - if (ucontrol->value.integer.value[0] < CLK_PPM_MIN || - ucontrol->value.integer.value[0] > CLK_PPM_MAX) - return -EINVAL; - - ppm = ucontrol->value.integer.value[0]; - - old_rate = clk_get_rate(i2s_tdm->mclk_root0); - ret = rockchip_i2s_tdm_clk_set_rate(i2s_tdm, i2s_tdm->mclk_root0, - i2s_tdm->mclk_root0_freq, ppm); - if (ret) - return ret; - if (old_rate != clk_get_rate(i2s_tdm->mclk_root0)) - changed = 1; - - if (clk_is_match(i2s_tdm->mclk_root0, i2s_tdm->mclk_root1)) - return changed; - - old_rate = clk_get_rate(i2s_tdm->mclk_root1); - ret = rockchip_i2s_tdm_clk_set_rate(i2s_tdm, i2s_tdm->mclk_root1, - i2s_tdm->mclk_root1_freq, ppm); - if (ret) - return ret; - if (old_rate != clk_get_rate(i2s_tdm->mclk_root1)) - changed = 1; - - return changed; -} - -static struct snd_kcontrol_new rockchip_i2s_tdm_compensation_control = { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, - .name = "PCM Clock Compensation in PPM", - .info = rockchip_i2s_tdm_clk_compensation_info, - .get = rockchip_i2s_tdm_clk_compensation_get, - .put = rockchip_i2s_tdm_clk_compensation_put, -}; - static int rockchip_i2s_tdm_dai_probe(struct snd_soc_dai *dai) { struct rk_i2s_tdm_dev *i2s_tdm = snd_soc_dai_get_drvdata(dai); @@ -1068,9 +773,6 @@ static int rockchip_i2s_tdm_dai_probe(struct snd_soc_dai *dai) if (i2s_tdm->has_playback) dai->playback_dma_data = &i2s_tdm->playback_dma_data; - if (i2s_tdm->mclk_calibrate) - snd_soc_add_dai_controls(dai, &rockchip_i2s_tdm_compensation_control, 1); - return 0; } @@ -1110,7 +812,6 @@ static int rockchip_i2s_tdm_set_bclk_ratio(struct snd_soc_dai *dai, static const struct snd_soc_dai_ops rockchip_i2s_tdm_dai_ops = { .hw_params = rockchip_i2s_tdm_hw_params, .set_bclk_ratio = rockchip_i2s_tdm_set_bclk_ratio, - .set_sysclk = rockchip_i2s_tdm_set_sysclk, .set_fmt = rockchip_i2s_tdm_set_fmt, .set_tdm_slot = rockchip_dai_tdm_slot, .trigger = rockchip_i2s_tdm_trigger, @@ -1433,35 +1134,6 @@ static void rockchip_i2s_tdm_path_config(struct rk_i2s_tdm_dev *i2s_tdm, rockchip_i2s_tdm_tx_path_config(i2s_tdm, num); } -static int rockchip_i2s_tdm_get_calibrate_mclks(struct rk_i2s_tdm_dev *i2s_tdm) -{ - int num_mclks = 0; - - i2s_tdm->mclk_tx_src = devm_clk_get(i2s_tdm->dev, "mclk_tx_src"); - if (!IS_ERR(i2s_tdm->mclk_tx_src)) - num_mclks++; - - i2s_tdm->mclk_rx_src = devm_clk_get(i2s_tdm->dev, "mclk_rx_src"); - if (!IS_ERR(i2s_tdm->mclk_rx_src)) - num_mclks++; - - i2s_tdm->mclk_root0 = devm_clk_get(i2s_tdm->dev, "mclk_root0"); - if (!IS_ERR(i2s_tdm->mclk_root0)) - num_mclks++; - - i2s_tdm->mclk_root1 = devm_clk_get(i2s_tdm->dev, "mclk_root1"); - if (!IS_ERR(i2s_tdm->mclk_root1)) - num_mclks++; - - if (num_mclks < 4 && num_mclks != 0) - return -ENOENT; - - if (num_mclks == 4) - i2s_tdm->mclk_calibrate = 1; - - return 0; -} - static int rockchip_i2s_tdm_path_prepare(struct rk_i2s_tdm_dev *i2s_tdm, struct device_node *np, bool is_rx_path) @@ -1609,11 +1281,6 @@ static int rockchip_i2s_tdm_probe(struct platform_device *pdev) i2s_tdm->io_multiplex = of_property_read_bool(node, "rockchip,io-multiplex"); - ret = rockchip_i2s_tdm_get_calibrate_mclks(i2s_tdm); - if (ret) - return dev_err_probe(i2s_tdm->dev, ret, - "mclk-calibrate clocks missing"); - regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(regs)) { return dev_err_probe(i2s_tdm->dev, PTR_ERR(regs), @@ -1666,13 +1333,6 @@ static int rockchip_i2s_tdm_probe(struct platform_device *pdev) goto err_disable_hclk; } - if (i2s_tdm->mclk_calibrate) { - i2s_tdm->mclk_root0_initial_freq = clk_get_rate(i2s_tdm->mclk_root0); - i2s_tdm->mclk_root1_initial_freq = clk_get_rate(i2s_tdm->mclk_root1); - i2s_tdm->mclk_root0_freq = i2s_tdm->mclk_root0_initial_freq; - i2s_tdm->mclk_root1_freq = i2s_tdm->mclk_root1_initial_freq; - } - pm_runtime_enable(&pdev->dev); regmap_update_bits(i2s_tdm->regmap, I2S_DMACR, I2S_DMACR_TDL_MASK, -- GitLab From d2034a6b92cafb70097eb363edad77f394e0079b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 11 Mar 2024 17:20:37 +1000 Subject: [PATCH 1135/2290] nouveau: reset the bo resource bus info after an eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f35c9af45ea7a4b1115b193d84858b14d13517fc ] Later attempts to refault the bo won't happen and the whole GPU does to lunch. I think Christian's refactoring of this code out to the driver broke this not very well tested path. Fixes: 141b15e59175 ("drm/nouveau: move io_reserve_lru handling into the driver v5") Cc: Christian König Signed-off-by: Dave Airlie Acked-by: Christian König Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240311072037.287905-1-airlied@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 126b3c6e12f99..f2dca41e46c5f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1194,6 +1194,8 @@ out: drm_vma_node_unmap(&nvbo->bo.base.vma_node, bdev->dev_mapping); nouveau_ttm_io_mem_free_locked(drm, nvbo->bo.resource); + nvbo->bo.resource->bus.offset = 0; + nvbo->bo.resource->bus.addr = NULL; goto retry; } -- GitLab From 6df9cf77e0c7c2fe246844ddc545a6f8d3704865 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Mar 2024 12:01:21 -0800 Subject: [PATCH 1136/2290] tcp: Fix NEW_SYN_RECV handling in inet_twsk_purge() [ Upstream commit 1c4e97dd2d3c9a3e84f7e26346aa39bc426d3249 ] inet_twsk_purge() uses rcu to find TIME_WAIT and NEW_SYN_RECV objects to purge. These objects use SLAB_TYPESAFE_BY_RCU semantic and need special care. We need to use refcount_inc_not_zero(&sk->sk_refcnt). Reuse the existing correct logic I wrote for TIME_WAIT, because both structures have common locations for sk_state, sk_family, and netns pointer. If after the refcount_inc_not_zero() the object fields longer match the keys, use sock_gen_put(sk) to release the refcount. Then we can call inet_twsk_deschedule_put() for TIME_WAIT, inet_csk_reqsk_queue_drop_and_put() for NEW_SYN_RECV sockets, with BH disabled. Then we need to restart the loop because we had drop rcu_read_lock(). Fixes: 740ea3c4a0b2 ("tcp: Clean up kernel listener's reqsk in inet_twsk_purge()") Link: https://lore.kernel.org/netdev/CANn89iLvFuuihCtt9PME2uS1WJATnf5fKjDToa1WzVnRzHnPfg@mail.gmail.com/T/#u Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240308200122.64357-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/inet_timewait_sock.c | 41 ++++++++++++++++------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1d77d992e6e77..340a8f0c29800 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -281,12 +281,12 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); +/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) { - struct inet_timewait_sock *tw; - struct sock *sk; struct hlist_nulls_node *node; unsigned int slot; + struct sock *sk; for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; @@ -295,38 +295,35 @@ restart_rcu: rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { - if (sk->sk_state != TCP_TIME_WAIT) { - /* A kernel listener socket might not hold refcnt for net, - * so reqsk_timer_handler() could be fired after net is - * freed. Userspace listener and reqsk never exist here. - */ - if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV && - hashinfo->pernet)) { - struct request_sock *req = inet_reqsk(sk); - - inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); - } + int state = inet_sk_state_load(sk); + if ((1 << state) & ~(TCPF_TIME_WAIT | + TCPF_NEW_SYN_RECV)) continue; - } - tw = inet_twsk(sk); - if ((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count)) + if (sk->sk_family != family || + refcount_read(&sock_net(sk)->ns.count)) continue; - if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) continue; - if (unlikely((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count))) { - inet_twsk_put(tw); + if (unlikely(sk->sk_family != family || + refcount_read(&sock_net(sk)->ns.count))) { + sock_gen_put(sk); goto restart; } rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule_put(tw); + if (state == TCP_TIME_WAIT) { + inet_twsk_deschedule_put(inet_twsk(sk)); + } else { + struct request_sock *req = inet_reqsk(sk); + + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + } local_bh_enable(); goto restart_rcu; } -- GitLab From 9905a157048f441f1412e7bd13372f4a971d75c6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 8 Mar 2024 12:01:22 -0800 Subject: [PATCH 1137/2290] rds: tcp: Fix use-after-free of net in reqsk_timer_handler(). [ Upstream commit 2a750d6a5b365265dbda33330a6188547ddb5c24 ] syzkaller reported a warning of netns tracker [0] followed by KASAN splat [1] and another ref tracker warning [1]. syzkaller could not find a repro, but in the log, the only suspicious sequence was as follows: 18:26:22 executing program 1: r0 = socket$inet6_mptcp(0xa, 0x1, 0x106) ... connect$inet6(r0, &(0x7f0000000080)={0xa, 0x4001, 0x0, @loopback}, 0x1c) (async) The notable thing here is 0x4001 in connect(), which is RDS_TCP_PORT. So, the scenario would be: 1. unshare(CLONE_NEWNET) creates a per netns tcp listener in rds_tcp_listen_init(). 2. syz-executor connect()s to it and creates a reqsk. 3. syz-executor exit()s immediately. 4. netns is dismantled. [0] 5. reqsk timer is fired, and UAF happens while freeing reqsk. [1] 6. listener is freed after RCU grace period. [2] Basically, reqsk assumes that the listener guarantees netns safety until all reqsk timers are expired by holding the listener's refcount. However, this was not the case for kernel sockets. Commit 740ea3c4a0b2 ("tcp: Clean up kernel listener's reqsk in inet_twsk_purge()") fixed this issue only for per-netns ehash. Let's apply the same fix for the global ehash. [0]: ref_tracker: net notrefcnt@0000000065449cc3 has 1/1 users at sk_alloc (./include/net/net_namespace.h:337 net/core/sock.c:2146) inet6_create (net/ipv6/af_inet6.c:192 net/ipv6/af_inet6.c:119) __sock_create (net/socket.c:1572) rds_tcp_listen_init (net/rds/tcp_listen.c:279) rds_tcp_init_net (net/rds/tcp.c:577) ops_init (net/core/net_namespace.c:137) setup_net (net/core/net_namespace.c:340) copy_net_ns (net/core/net_namespace.c:497) create_new_namespaces (kernel/nsproxy.c:110) unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) ksys_unshare (kernel/fork.c:3429) __x64_sys_unshare (kernel/fork.c:3496) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) ... WARNING: CPU: 0 PID: 27 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179) [1]: BUG: KASAN: slab-use-after-free in inet_csk_reqsk_queue_drop (./include/net/inet_hashtables.h:180 net/ipv4/inet_connection_sock.c:952 net/ipv4/inet_connection_sock.c:966) Read of size 8 at addr ffff88801b370400 by task swapper/0/0 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) print_report (mm/kasan/report.c:378 mm/kasan/report.c:488) kasan_report (mm/kasan/report.c:603) inet_csk_reqsk_queue_drop (./include/net/inet_hashtables.h:180 net/ipv4/inet_connection_sock.c:952 net/ipv4/inet_connection_sock.c:966) reqsk_timer_handler (net/ipv4/inet_connection_sock.c:979 net/ipv4/inet_connection_sock.c:1092) call_timer_fn (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/timer.h:127 kernel/time/timer.c:1701) __run_timers.part.0 (kernel/time/timer.c:1752 kernel/time/timer.c:2038) run_timer_softirq (kernel/time/timer.c:2053) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554) irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632 kernel/softirq.c:644) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1076 (discriminator 14)) Allocated by task 258 on cpu 0 at 83.612050s: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (mm/kasan/common.c:68) __kasan_slab_alloc (mm/kasan/common.c:343) kmem_cache_alloc (mm/slub.c:3813 mm/slub.c:3860 mm/slub.c:3867) copy_net_ns (./include/linux/slab.h:701 net/core/net_namespace.c:421 net/core/net_namespace.c:480) create_new_namespaces (kernel/nsproxy.c:110) unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) ksys_unshare (kernel/fork.c:3429) __x64_sys_unshare (kernel/fork.c:3496) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) Freed by task 27 on cpu 0 at 329.158864s: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (mm/kasan/common.c:68) kasan_save_free_info (mm/kasan/generic.c:643) __kasan_slab_free (mm/kasan/common.c:265) kmem_cache_free (mm/slub.c:4299 mm/slub.c:4363) cleanup_net (net/core/net_namespace.c:456 net/core/net_namespace.c:446 net/core/net_namespace.c:639) process_one_work (kernel/workqueue.c:2638) worker_thread (kernel/workqueue.c:2700 kernel/workqueue.c:2787) kthread (kernel/kthread.c:388) ret_from_fork (arch/x86/kernel/process.c:153) ret_from_fork_asm (arch/x86/entry/entry_64.S:250) The buggy address belongs to the object at ffff88801b370000 which belongs to the cache net_namespace of size 4352 The buggy address is located 1024 bytes inside of freed 4352-byte region [ffff88801b370000, ffff88801b371100) [2]: WARNING: CPU: 0 PID: 95 at lib/ref_tracker.c:228 ref_tracker_free (lib/ref_tracker.c:228 (discriminator 1)) Modules linked in: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:ref_tracker_free (lib/ref_tracker.c:228 (discriminator 1)) ... Call Trace: __sk_destruct (./include/net/net_namespace.h:353 net/core/sock.c:2204) rcu_core (./arch/x86/include/asm/preempt.h:26 kernel/rcu/tree.c:2165 kernel/rcu/tree.c:2433) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554) irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632 kernel/softirq.c:644) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1076 (discriminator 14)) Reported-by: syzkaller Suggested-by: Eric Dumazet Fixes: 467fa15356ac ("RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240308200122.64357-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_minisocks.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 42844d20da020..b3bfa1a09df68 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -357,10 +357,6 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family) /* Even if tw_refcount == 1, we must clean up kernel reqsk */ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); } else if (!purged_once) { - /* The last refcount is decremented in tcp_sk_exit_batch() */ - if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1) - continue; - inet_twsk_purge(&tcp_hashinfo, family); purged_once = true; } -- GitLab From ec0e06c797a0582a22fa4eecfe13f29b87b15c17 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Tue, 12 Mar 2024 12:36:22 +0530 Subject: [PATCH 1138/2290] octeontx2-af: Use matching wake_up API variant in CGX command interface [ Upstream commit e642921dfeed1e15e73f78f2c3b6746f72b6deb2 ] Use wake_up API instead of wake_up_interruptible, since wait_event_timeout API is used for waiting on command completion. Fixes: 1463f382f58d ("octeontx2-af: Add support for CGX link management") Signed-off-by: Linu Cherian Signed-off-by: Sunil Goutham Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 90be87dc105d3..e6fe599f7bf3a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1346,7 +1346,7 @@ static irqreturn_t cgx_fwi_event_handler(int irq, void *data) /* Release thread waiting for completion */ lmac->cmd_pend = false; - wake_up_interruptible(&lmac->wq_cmd_cmplt); + wake_up(&lmac->wq_cmd_cmplt); break; case CGX_EVT_ASYNC: if (cgx_event_is_linkevent(event)) -- GitLab From abc9b13fd9217d2eec297c74e5dc6653c16d3ddb Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Wed, 6 Mar 2024 12:31:52 +0100 Subject: [PATCH 1139/2290] s390/vtime: fix average steal time calculation [ Upstream commit 367c50f78451d3bd7ad70bc5c89f9ba6dec46ca9 ] Current average steal timer calculation produces volatile and inflated values. The only user of this value is KVM so far and it uses that to decide whether or not to yield the vCPU which is seeing steal time. KVM compares average steal timer to a threshold and if the threshold is past then it does not allow CPU polling and yields it to host, else it keeps the CPU by polling. Since KVM's steal time threshold is very low by default (%10) it most likely is not effected much by the bloated average steal timer values because the operating region is pretty small. However there might be new users in the future who might rely on this number. Fix average steal timer calculation by changing the formula from: avg_steal_timer = avg_steal_timer / 2 + steal_timer; to the following: avg_steal_timer = (avg_steal_timer + steal_timer) / 2; This ensures that avg_steal_timer is actually a naive average of steal timer values. It now closely follows steal timer values but of course in a smoother manner. Fixes: 152e9b8676c6 ("s390/vtime: steal time exponential moving average") Signed-off-by: Mete Durlu Acked-by: Heiko Carstens Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/vtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 9436f3053b88c..003c926a0f4de 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -210,13 +210,13 @@ void vtime_flush(struct task_struct *tsk) virt_timer_expire(); steal = S390_lowcore.steal_timer; - avg_steal = S390_lowcore.avg_steal_timer / 2; + avg_steal = S390_lowcore.avg_steal_timer; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } - S390_lowcore.avg_steal_timer = avg_steal; + S390_lowcore.avg_steal_timer = avg_steal / 2; } static u64 vtime_delta(void) -- GitLab From bd2474a45df7c11412c2587de3d4e43760531418 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Mar 2024 20:46:28 +0000 Subject: [PATCH 1140/2290] net/sched: taprio: proper TCA_TAPRIO_TC_ENTRY_INDEX check [ Upstream commit 343041b59b7810f9cdca371f445dd43b35c740b1 ] taprio_parse_tc_entry() is not correctly checking TCA_TAPRIO_TC_ENTRY_INDEX attribute: int tc; // Signed value tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]); if (tc >= TC_QOPT_MAX_QUEUE) { NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range"); return -ERANGE; } syzbot reported that it could fed arbitary negative values: UBSAN: shift-out-of-bounds in net/sched/sch_taprio.c:1722:18 shift exponent -2147418108 is negative CPU: 0 PID: 5066 Comm: syz-executor367 Not tainted 6.8.0-rc7-syzkaller-00136-gc8a5c731fd12 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:217 [inline] __ubsan_handle_shift_out_of_bounds+0x3c7/0x420 lib/ubsan.c:386 taprio_parse_tc_entry net/sched/sch_taprio.c:1722 [inline] taprio_parse_tc_entries net/sched/sch_taprio.c:1768 [inline] taprio_change+0xb87/0x57d0 net/sched/sch_taprio.c:1877 taprio_init+0x9da/0xc80 net/sched/sch_taprio.c:2134 qdisc_create+0x9d4/0x1190 net/sched/sch_api.c:1355 tc_modify_qdisc+0xa26/0x1e40 net/sched/sch_api.c:1776 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6617 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f1b2dea3759 Code: 48 83 c4 28 c3 e8 d7 19 00 00 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd4de452f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f1b2def0390 RCX: 00007f1b2dea3759 RDX: 0000000000000000 RSI: 00000000200007c0 RDI: 0000000000000004 RBP: 0000000000000003 R08: 0000555500000000 R09: 0000555500000000 R10: 0000555500000000 R11: 0000000000000246 R12: 00007ffd4de45340 R13: 00007ffd4de45310 R14: 0000000000000001 R15: 00007ffd4de45340 Fixes: a54fc09e4cba ("net/sched: taprio: allow user input of per-tc max SDU") Reported-and-tested-by: syzbot+a340daa06412d6028918@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Vladimir Oltean Reviewed-by: Michal Kubiak Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_taprio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8d5eebb2dd1b1..1d4638aa4254f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -765,7 +765,8 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { - [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_TAPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32, + TC_QOPT_MAX_QUEUE), [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 }, }; -- GitLab From 1ff7ffcac109edb7542a1716756de4e4192d42f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Feb 2024 20:34:36 +0100 Subject: [PATCH 1141/2290] soc: fsl: dpio: fix kcalloc() argument order [ Upstream commit 72ebb41b88f9d7c10c5e159e0507074af0a22fe2 ] A previous bugfix added a call to kcalloc(), which starting in gcc-14 causes a harmless warning about the argument order: drivers/soc/fsl/dpio/dpio-service.c: In function 'dpaa2_io_service_enqueue_multiple_desc_fq': drivers/soc/fsl/dpio/dpio-service.c:526:29: error: 'kcalloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 526 | ed = kcalloc(sizeof(struct qbman_eq_desc), 32, GFP_KERNEL); | ^~~~~~ drivers/soc/fsl/dpio/dpio-service.c:526:29: note: earlier argument should specify number of elements, later size of each element Since the two are only multiplied, the order does not change the behavior, so just fix it now to shut up the compiler warning. Dmity independently came up with the same fix. Fixes: 5c4a5999b245 ("soc: fsl: dpio: avoid stack usage warning") Reported-by: Dmitry Antipov Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- drivers/soc/fsl/dpio/dpio-service.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c index 1d2b27e3ea63f..b811446e0fa55 100644 --- a/drivers/soc/fsl/dpio/dpio-service.c +++ b/drivers/soc/fsl/dpio/dpio-service.c @@ -523,7 +523,7 @@ int dpaa2_io_service_enqueue_multiple_desc_fq(struct dpaa2_io *d, struct qbman_eq_desc *ed; int i, ret; - ed = kcalloc(sizeof(struct qbman_eq_desc), 32, GFP_KERNEL); + ed = kcalloc(32, sizeof(struct qbman_eq_desc), GFP_KERNEL); if (!ed) return -ENOMEM; -- GitLab From 86d9b040421bbd26425f5a3edc226f57ecdecbfe Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 8 Mar 2024 12:16:23 -0800 Subject: [PATCH 1142/2290] tcp: Fix refcnt handling in __inet_hash_connect(). [ Upstream commit 04d9d1fc428ac9f581d55118d67e0cb546701feb ] syzbot reported a warning in sk_nulls_del_node_init_rcu(). The commit 66b60b0c8c4a ("dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished().") tried to fix an issue that an unconnected socket occupies an ehash entry when bhash2 allocation fails. In such a case, we need to revert changes done by check_established(), which does not hold refcnt when inserting socket into ehash. So, to revert the change, we need to __sk_nulls_add_node_rcu() instead of sk_nulls_add_node_rcu(). Otherwise, sock_put() will cause refcnt underflow and leak the socket. [0]: WARNING: CPU: 0 PID: 23948 at include/net/sock.h:799 sk_nulls_del_node_init_rcu+0x166/0x1a0 include/net/sock.h:799 Modules linked in: CPU: 0 PID: 23948 Comm: syz-executor.2 Not tainted 6.8.0-rc6-syzkaller-00159-gc055fc00c07b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 RIP: 0010:sk_nulls_del_node_init_rcu+0x166/0x1a0 include/net/sock.h:799 Code: e8 7f 71 c6 f7 83 fb 02 7c 25 e8 35 6d c6 f7 4d 85 f6 0f 95 c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc e8 1b 6d c6 f7 90 <0f> 0b 90 eb b2 e8 10 6d c6 f7 4c 89 e7 be 04 00 00 00 e8 63 e7 d2 RSP: 0018:ffffc900032d7848 EFLAGS: 00010246 RAX: ffffffff89cd0035 RBX: 0000000000000001 RCX: 0000000000040000 RDX: ffffc90004de1000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: 1ffff1100439ac26 R08: ffffffff89ccffe3 R09: 1ffff1100439ac28 R10: dffffc0000000000 R11: ffffed100439ac29 R12: ffff888021cd6140 R13: dffffc0000000000 R14: ffff88802a9bf5c0 R15: ffff888021cd6130 FS: 00007f3b823f16c0(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3b823f0ff8 CR3: 000000004674a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __inet_hash_connect+0x140f/0x20b0 net/ipv4/inet_hashtables.c:1139 dccp_v6_connect+0xcb9/0x1480 net/dccp/ipv6.c:956 __inet_stream_connect+0x262/0xf30 net/ipv4/af_inet.c:678 inet_stream_connect+0x65/0xa0 net/ipv4/af_inet.c:749 __sys_connect_file net/socket.c:2048 [inline] __sys_connect+0x2df/0x310 net/socket.c:2065 __do_sys_connect net/socket.c:2075 [inline] __se_sys_connect net/socket.c:2072 [inline] __x64_sys_connect+0x7a/0x90 net/socket.c:2072 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f3b8167dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f3b823f10c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00007f3b817abf80 RCX: 00007f3b8167dda9 RDX: 000000000000001c RSI: 0000000020000040 RDI: 0000000000000003 RBP: 00007f3b823f1120 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 000000000000000b R14: 00007f3b817abf80 R15: 00007ffd3beb57b8 Reported-by: syzbot+12c506c1aae251e70449@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=12c506c1aae251e70449 Fixes: 66b60b0c8c4a ("dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished().") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240308201623.65448-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 56776e1b1de52..0ad25e6783ac7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1117,7 +1117,7 @@ error: sock_prot_inuse_add(net, sk->sk_prot, -1); spin_lock(lock); - sk_nulls_del_node_init_rcu(sk); + __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); sk->sk_hash = 0; -- GitLab From 1ed222ca7396938eb1ab2d034f1ba0d8b00a7122 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Wed, 13 Mar 2024 00:27:19 +0900 Subject: [PATCH 1143/2290] hsr: Fix uninit-value access in hsr_get_node() [ Upstream commit ddbec99f58571301679addbc022256970ca3eac6 ] KMSAN reported the following uninit-value access issue [1]: ===================================================== BUG: KMSAN: uninit-value in hsr_get_node+0xa2e/0xa40 net/hsr/hsr_framereg.c:246 hsr_get_node+0xa2e/0xa40 net/hsr/hsr_framereg.c:246 fill_frame_info net/hsr/hsr_forward.c:577 [inline] hsr_forward_skb+0xe12/0x30e0 net/hsr/hsr_forward.c:615 hsr_dev_xmit+0x1a1/0x270 net/hsr/hsr_device.c:223 __netdev_start_xmit include/linux/netdevice.h:4940 [inline] netdev_start_xmit include/linux/netdevice.h:4954 [inline] xmit_one net/core/dev.c:3548 [inline] dev_hard_start_xmit+0x247/0xa10 net/core/dev.c:3564 __dev_queue_xmit+0x33b8/0x5130 net/core/dev.c:4349 dev_queue_xmit include/linux/netdevice.h:3134 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3087 [inline] packet_sendmsg+0x8b1d/0x9f30 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5e9/0xb10 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x318/0x740 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6334 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2787 packet_alloc_skb net/packet/af_packet.c:2936 [inline] packet_snd net/packet/af_packet.c:3030 [inline] packet_sendmsg+0x70e8/0x9f30 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 1 PID: 5033 Comm: syz-executor334 Not tainted 6.7.0-syzkaller-00562-g9f8413c4a66f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 ===================================================== If the packet type ID field in the Ethernet header is either ETH_P_PRP or ETH_P_HSR, but it is not followed by an HSR tag, hsr_get_skb_sequence_nr() reads an invalid value as a sequence number. This causes the above issue. This patch fixes the issue by returning NULL if the Ethernet header is not followed by an HSR tag. Fixes: f266a683a480 ("net/hsr: Better frame dispatch") Reported-and-tested-by: syzbot+2ef3a8ce8e91b5a50098@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2ef3a8ce8e91b5a50098 [1] Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20240312152719.724530-1-syoshida@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/hsr/hsr_framereg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 0b01998780952..e44a039e36afe 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -235,6 +235,10 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, */ if (ethhdr->h_proto == htons(ETH_P_PRP) || ethhdr->h_proto == htons(ETH_P_HSR)) { + /* Check if skb contains hsr_ethhdr */ + if (skb->mac_len < sizeof(struct hsr_ethhdr)) + return NULL; + /* Use the existing sequence_nr from the tag as starting point * for filtering duplicate frames. */ -- GitLab From cb8ae8e5ec286b790555e523b60eeadf675cdc64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:27:07 +0100 Subject: [PATCH 1144/2290] nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers [ Upstream commit b794d1c2ad6d7921f2867ce393815ad31b5b5a83 ] The reserved_tags are only needed for fabrics controllers. Right now only fabrics drivers call this helper, so this is harmless, but we'll use it in the PCIe driver soon. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Stable-dep-of: de105068fead ("nvme: fix reconnection fail due to reserved tag allocation") Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0c088db944706..98a8d90feb37d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -5029,7 +5029,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; set->queue_depth = ctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; + if (ctrl->ops->flags & NVME_F_FABRICS) + set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) -- GitLab From 1883ed12d702fdd0c49fae0351cf0060b15167a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:28:48 +0100 Subject: [PATCH 1145/2290] nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set [ Upstream commit 93b24f579c392bac2e491fee79ad5ce5a131992e ] Add the apple shared tag workaround to nvme_alloc_io_tag_set to prepare for using that helper in the PCIe driver. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Stable-dep-of: de105068fead ("nvme: fix reconnection fail due to reserved tag allocation") Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 98a8d90feb37d..951c8946701aa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -5029,7 +5029,13 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; set->queue_depth = ctrl->sqsize + 1; - if (ctrl->ops->flags & NVME_F_FABRICS) + /* + * Some Apple controllers requires tags to be unique across admin and + * the (only) I/O queue, so reserve the first 32 tags of the I/O queue. + */ + if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS) + set->reserved_tags = NVME_AQ_DEPTH; + else if (ctrl->ops->flags & NVME_F_FABRICS) set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; -- GitLab From 149afee5c7418ec5db9d7387b9c9a5c1eb7ea2a8 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Mon, 11 Mar 2024 10:09:27 +0800 Subject: [PATCH 1146/2290] nvme: fix reconnection fail due to reserved tag allocation [ Upstream commit de105068fead55ed5c07ade75e9c8e7f86a00d1d ] We found a issue on production environment while using NVMe over RDMA, admin_q reconnect failed forever while remote target and network is ok. After dig into it, we found it may caused by a ABBA deadlock due to tag allocation. In my case, the tag was hold by a keep alive request waiting inside admin_q, as we quiesced admin_q while reset ctrl, so the request maked as idle and will not process before reset success. As fabric_q shares tagset with admin_q, while reconnect remote target, we need a tag for connect command, but the only one reserved tag was held by keep alive command which waiting inside admin_q. As a result, we failed to reconnect admin_q forever. In order to fix this issue, I think we should keep two reserved tags for admin queue. Fixes: ed01fee283a0 ("nvme-fabrics: only reserve a single tag") Signed-off-by: Chunguang Xu Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 6 ++++-- drivers/nvme/host/fabrics.h | 7 ------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 951c8946701aa..d7516e99275b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4971,7 +4971,8 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, set->ops = ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; if (ctrl->ops->flags & NVME_F_FABRICS) - set->reserved_tags = NVMF_RESERVED_TAGS; + /* Reserved for fabric connect and keep alive */ + set->reserved_tags = 2; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_NO_SCHED; if (ctrl->ops->flags & NVME_F_BLOCKING) @@ -5036,7 +5037,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS) set->reserved_tags = NVME_AQ_DEPTH; else if (ctrl->ops->flags & NVME_F_FABRICS) - set->reserved_tags = NVMF_RESERVED_TAGS; + /* Reserved for fabric connect */ + set->reserved_tags = 1; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index dcac3df8a5f76..60c238caf7a97 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -18,13 +18,6 @@ /* default is -1: the fail fast mechanism is disabled */ #define NVMF_DEF_FAIL_FAST_TMO -1 -/* - * Reserved one command for internal usage. This command is used for sending - * the connect command, as well as for the keep alive command on the admin - * queue once live. - */ -#define NVMF_RESERVED_TAGS 1 - /* * Define a host as seen by the target. We allocate one at boot, but also * allow the override it when creating controllers. This is both to provide -- GitLab From 6b62bad2da1b338f452a9380639fc9b093d75a25 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 13 Mar 2024 22:50:18 +0000 Subject: [PATCH 1147/2290] net: mediatek: mtk_eth_soc: clear MAC_MCR_FORCE_LINK only when MAC is up [ Upstream commit f1b85ef15a99f06ed48871ce933d591127d2dcc0 ] Clearing bit MAC_MCR_FORCE_LINK which forces the link down too early can result in MAC ending up in a broken/blocked state. Fix this by handling this bit in the .mac_link_up and .mac_link_down calls instead of in .mac_finish. Fixes: b8fc9f30821e ("net: ethernet: mediatek: Add basic PHYLINK support") Suggested-by: Mason-cw Chang Signed-off-by: Daniel Golle Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 17e6ac4445afc..fecf3dd22dfaa 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -561,8 +561,7 @@ static int mtk_mac_finish(struct phylink_config *config, unsigned int mode, mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); mcr_new = mcr_cur; mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | - MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK | - MAC_MCR_RX_FIFO_CLR_DIS; + MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_RX_FIFO_CLR_DIS; /* Only update control register when needed! */ if (mcr_new != mcr_cur) @@ -610,7 +609,7 @@ static void mtk_mac_link_down(struct phylink_config *config, unsigned int mode, phylink_config); u32 mcr = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); - mcr &= ~(MAC_MCR_TX_EN | MAC_MCR_RX_EN); + mcr &= ~(MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_FORCE_LINK); mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } @@ -649,7 +648,7 @@ static void mtk_mac_link_up(struct phylink_config *config, if (rx_pause) mcr |= MAC_MCR_FORCE_RX_FC; - mcr |= MAC_MCR_TX_EN | MAC_MCR_RX_EN; + mcr |= MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_FORCE_LINK; mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } -- GitLab From f78807362828ad01db2a9ed005bf79501b620f27 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 13 Mar 2024 22:50:40 +0000 Subject: [PATCH 1148/2290] net: ethernet: mtk_eth_soc: fix PPE hanging issue [ Upstream commit ea80e3ed09ab2c2b75724faf5484721753e92c31 ] A patch to resolve an issue was found in MediaTek's GPL-licensed SDK: In the mtk_ppe_stop() function, the PPE scan mode is not disabled before disabling the PPE. This can potentially lead to a hang during the process of disabling the PPE. Without this patch, the PPE may experience a hang during the reboot test. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/b40da332dfe763932a82f9f62a4709457a15dd6c Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE") Suggested-by: Bc-bocun Chen Signed-off-by: Daniel Golle Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_ppe.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index d6eed204574a9..c64211e22ae70 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -811,7 +811,7 @@ void mtk_ppe_start(struct mtk_ppe *ppe) MTK_PPE_KEEPALIVE_DISABLE) | FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) | FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE, - MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) | + MTK_PPE_SCAN_MODE_CHECK_AGE) | FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM, MTK_PPE_ENTRIES_SHIFT); if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2)) @@ -895,17 +895,21 @@ int mtk_ppe_stop(struct mtk_ppe *ppe) mtk_ppe_cache_enable(ppe, false); - /* disable offload engine */ - ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN); - ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0); - /* disable aging */ val = MTK_PPE_TB_CFG_AGE_NON_L4 | MTK_PPE_TB_CFG_AGE_UNBIND | MTK_PPE_TB_CFG_AGE_TCP | MTK_PPE_TB_CFG_AGE_UDP | - MTK_PPE_TB_CFG_AGE_TCP_FIN; + MTK_PPE_TB_CFG_AGE_TCP_FIN | + MTK_PPE_TB_CFG_SCAN_MODE; ppe_clear(ppe, MTK_PPE_TB_CFG, val); - return mtk_ppe_wait_busy(ppe); + if (mtk_ppe_wait_busy(ppe)) + return -ETIMEDOUT; + + /* disable offload engine */ + ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN); + ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0); + + return 0; } -- GitLab From ef7eed7e11d23337310ecc2c014ecaeea52719c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2024 14:18:16 +0000 Subject: [PATCH 1149/2290] packet: annotate data-races around ignore_outgoing [ Upstream commit 6ebfad33161afacb3e1e59ed1c2feefef70f9f97 ] ignore_outgoing is read locklessly from dev_queue_xmit_nit() and packet_getsockopt() Add appropriate READ_ONCE()/WRITE_ONCE() annotations. syzbot reported: BUG: KCSAN: data-race in dev_queue_xmit_nit / packet_setsockopt write to 0xffff888107804542 of 1 bytes by task 22618 on cpu 0: packet_setsockopt+0xd83/0xfd0 net/packet/af_packet.c:4003 do_sock_setsockopt net/socket.c:2311 [inline] __sys_setsockopt+0x1d8/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2340 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 read to 0xffff888107804542 of 1 bytes by task 27 on cpu 1: dev_queue_xmit_nit+0x82/0x620 net/core/dev.c:2248 xmit_one net/core/dev.c:3527 [inline] dev_hard_start_xmit+0xcc/0x3f0 net/core/dev.c:3547 __dev_queue_xmit+0xf24/0x1dd0 net/core/dev.c:4335 dev_queue_xmit include/linux/netdevice.h:3091 [inline] batadv_send_skb_packet+0x264/0x300 net/batman-adv/send.c:108 batadv_send_broadcast_skb+0x24/0x30 net/batman-adv/send.c:127 batadv_iv_ogm_send_to_if net/batman-adv/bat_iv_ogm.c:392 [inline] batadv_iv_ogm_emit net/batman-adv/bat_iv_ogm.c:420 [inline] batadv_iv_send_outstanding_bat_ogm_packet+0x3f0/0x4b0 net/batman-adv/bat_iv_ogm.c:1700 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0x465/0x990 kernel/workqueue.c:3335 worker_thread+0x526/0x730 kernel/workqueue.c:3416 kthread+0x1d1/0x210 kernel/kthread.c:388 ret_from_fork+0x4b/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 value changed: 0x00 -> 0x01 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 27 Comm: kworker/u8:1 Tainted: G W 6.8.0-syzkaller-08073-g480e035fc4c7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Workqueue: bat_events batadv_iv_send_outstanding_bat_ogm_packet Fixes: fa788d986a3a ("packet: add sockopt to ignore outgoing packets") Reported-by: syzbot+c669c1136495a2e7c31f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/CANn89i+Z7MfbkBLOv=p7KZ7=K1rKHO4P1OL5LYDCtBiyqsa9oQ@mail.gmail.com/T/#t Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/dev.c | 2 +- net/packet/af_packet.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 60619fe8af5fc..9a48a7e26cf46 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2271,7 +2271,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); again: list_for_each_entry_rcu(ptype, ptype_list, list) { - if (ptype->ignore_outgoing) + if (READ_ONCE(ptype->ignore_outgoing)) continue; /* Never send packets back to the socket diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c3117350f5fbb..7188ca8d84693 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3981,7 +3981,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (val < 0 || val > 1) return -EINVAL; - po->prot_hook.ignore_outgoing = !!val; + WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val); return 0; } case PACKET_TX_HAS_OFF: @@ -4110,7 +4110,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_IGNORE_OUTGOING: - val = po->prot_hook.ignore_outgoing; + val = READ_ONCE(po->prot_hook.ignore_outgoing); break; case PACKET_ROLLOVER_STATS: if (!po->rollover) -- GitLab From d343a618bc3c90de6266efb946195cbd63ea705b Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 13 Mar 2024 19:37:58 +0100 Subject: [PATCH 1150/2290] net: veth: do not manipulate GRO when using XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d7db7775ea2e31502d46427f5efd385afc4ff1eb ] Commit d3256efd8e8b ("veth: allow enabling NAPI even without XDP") tried to fix the fact that GRO was not possible without XDP, because veth did not use NAPI without XDP. However, it also introduced the behaviour that GRO is always enabled, when XDP is enabled. While it might be desired for most cases, it is confusing for the user at best as the GRO flag suddenly changes, when an XDP program is attached. It also introduces some complexities in state management as was partially addressed in commit fe9f801355f0 ("net: veth: clear GRO when clearing XDP even when down"). But the biggest problem is that it is not possible to disable GRO at all, when an XDP program is attached, which might be needed for some use cases. Fix this by not touching the GRO flag on XDP enable/disable as the code already supports switching to NAPI if either GRO or XDP is requested. Link: https://lore.kernel.org/lkml/20240311124015.38106-1-ignat@cloudflare.com/ Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") Fixes: fe9f801355f0 ("net: veth: clear GRO when clearing XDP even when down") Signed-off-by: Ignat Korchagin Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/veth.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index dd9f5f1461921..8dcd3b6e143b9 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1444,8 +1444,6 @@ static netdev_features_t veth_fix_features(struct net_device *dev, if (peer_priv->_xdp_prog) features &= ~NETIF_F_GSO_SOFTWARE; } - if (priv->_xdp_prog) - features |= NETIF_F_GRO; return features; } @@ -1542,14 +1540,6 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (!old_prog) { - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding - * XDP is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } - peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } @@ -1560,14 +1550,6 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->flags & IFF_UP) veth_disable_xdp(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; -- GitLab From be4512b9ac6fc53e1ca8daccbda84f643215c547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 12:28:35 +0300 Subject: [PATCH 1151/2290] net: dsa: mt7530: prevent possible incorrect XTAL frequency selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f490c492e946d8ffbe65ad4efc66de3c5ede30a4 ] On MT7530, the HT_XTAL_FSEL field of the HWTRAP register stores a 2-bit value that represents the frequency of the crystal oscillator connected to the switch IC. The field is populated by the state of the ESW_P4_LED_0 and ESW_P4_LED_0 pins, which is done right after reset is deasserted. ESW_P4_LED_0 ESW_P3_LED_0 Frequency ----------------------------------------- 0 0 Reserved 0 1 20MHz 1 0 40MHz 1 1 25MHz On MT7531, the XTAL25 bit of the STRAP register stores this. The LAN0LED0 pin is used to populate the bit. 25MHz when the pin is high, 40MHz when it's low. These pins are also used with LEDs, therefore, their state can be set to something other than the bootstrapping configuration. For example, a link may be established on port 3 before the DSA subdriver takes control of the switch which would set ESW_P3_LED_0 to high. Currently on mt7530_setup() and mt7531_setup(), 1000 - 1100 usec delay is described between reset assertion and deassertion. Some switch ICs in real life conditions cannot always have these pins set back to the bootstrapping configuration before reset deassertion in this amount of delay. This causes wrong crystal frequency to be selected which puts the switch in a nonfunctional state after reset deassertion. The tests below are conducted on an MT7530 with a 40MHz crystal oscillator by Justin Swartz. With a cable from an active peer connected to port 3 before reset, an incorrect crystal frequency (0b11 = 25MHz) is selected: [1] [3] [5] : : : _____________________________ __________________ ESW_P4_LED_0 |_______| _____________________________ ESW_P3_LED_0 |__________________________ : : : : : : [4]...: : : [2]................: [1] Reset is asserted. [2] Period of 1000 - 1100 usec. [3] Reset is deasserted. [4] Period of 315 usec. HWTRAP register is populated with incorrect XTAL frequency. [5] Signals reflect the bootstrapped configuration. Increase the delay between reset_control_assert() and reset_control_deassert(), and gpiod_set_value_cansleep(priv->reset, 0) and gpiod_set_value_cansleep(priv->reset, 1) to 5000 - 5100 usec. This amount ensures a higher possibility that the switch IC will have these pins back to the bootstrapping configuration before reset deassertion. With a cable from an active peer connected to port 3 before reset, the correct crystal frequency (0b10 = 40MHz) is selected: [1] [2-1] [3] [5] : : : : _____________________________ __________________ ESW_P4_LED_0 |_______| ___________________ _______ ESW_P3_LED_0 |_________| |__________________ : : : : : : [2-2]...: [4]...: [2]................: [1] Reset is asserted. [2] Period of 5000 - 5100 usec. [2-1] ESW_P3_LED_0 goes low. [2-2] Remaining period of 5000 - 5100 usec. [3] Reset is deasserted. [4] Period of 310 usec. HWTRAP register is populated with bootstrapped XTAL frequency. [5] Signals reflect the bootstrapped configuration. ESW_P3_LED_0 low period before reset deassertion: 5000 usec - 5100 usec TEST RESET HOLD # (usec) --------------------- 1 5410 2 5440 3 4375 4 5490 5 5475 6 4335 7 4370 8 5435 9 4205 10 4335 11 3750 12 3170 13 4395 14 4375 15 3515 16 4335 17 4220 18 4175 19 4175 20 4350 Min 3170 Max 5490 Median 4342.500 Avg 4466.500 Revert commit 2920dd92b980 ("net: dsa: mt7530: disable LEDs before reset"). Changing the state of pins via reset assertion is simpler and more efficient than doing so by setting the LED controller off. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Fixes: c288575f7810 ("net: dsa: mt7530: Add the support of MT7531 switch") Co-developed-by: Justin Swartz Signed-off-by: Justin Swartz Signed-off-by: Arınç ÜNAL Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b988c8a40d536..80b346d4d990f 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2187,11 +2187,11 @@ mt7530_setup(struct dsa_switch *ds) */ if (priv->mcm) { reset_control_assert(priv->rstc); - usleep_range(1000, 1100); + usleep_range(5000, 5100); reset_control_deassert(priv->rstc); } else { gpiod_set_value_cansleep(priv->reset, 0); - usleep_range(1000, 1100); + usleep_range(5000, 5100); gpiod_set_value_cansleep(priv->reset, 1); } @@ -2401,11 +2401,11 @@ mt7531_setup(struct dsa_switch *ds) */ if (priv->mcm) { reset_control_assert(priv->rstc); - usleep_range(1000, 1100); + usleep_range(5000, 5100); reset_control_deassert(priv->rstc); } else { gpiod_set_value_cansleep(priv->reset, 0); - usleep_range(1000, 1100); + usleep_range(5000, 5100); gpiod_set_value_cansleep(priv->reset, 1); } -- GitLab From 1d830032bcbace99beb25ed4323ee84079a9a806 Mon Sep 17 00:00:00 2001 From: Arthur Grillo Date: Sat, 16 Mar 2024 13:25:20 -0300 Subject: [PATCH 1152/2290] drm: Fix drm_fixp2int_round() making it add 0.5 [ Upstream commit 807f96abdf14c80f534c78f2d854c2590963345c ] As well noted by Pekka[1], the rounding of drm_fixp2int_round is wrong. To round a number, you need to add 0.5 to the number and floor that, drm_fixp2int_round() is adding 0.0000076. Make it add 0.5. [1]: https://lore.kernel.org/all/20240301135327.22efe0dd.pekka.paalanen@collabora.com/ Fixes: 8b25320887d7 ("drm: Add fixed-point helper to get rounded integer values") Suggested-by: Pekka Paalanen Reviewed-by: Harry Wentland Reviewed-by: Melissa Wen Signed-off-by: Arthur Grillo Signed-off-by: Melissa Wen Link: https://patchwork.freedesktop.org/patch/msgid/20240316-drm_fixed-v2-1-c1bc2665b5ed@riseup.net Signed-off-by: Sasha Levin --- include/drm/drm_fixed.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index 6230088428cdb..a476a406e5997 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -70,7 +70,6 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) } #define DRM_FIXED_POINT 32 -#define DRM_FIXED_POINT_HALF 16 #define DRM_FIXED_ONE (1ULL << DRM_FIXED_POINT) #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1) #define DRM_FIXED_DIGITS_MASK (~DRM_FIXED_DECIMAL_MASK) @@ -89,7 +88,7 @@ static inline int drm_fixp2int(s64 a) static inline int drm_fixp2int_round(s64 a) { - return drm_fixp2int(a + (1 << (DRM_FIXED_POINT_HALF - 1))); + return drm_fixp2int(a + DRM_FIXED_ONE / 2); } static inline int drm_fixp2int_ceil(s64 a) -- GitLab From 80fc9b9c626b7be00b5fb9b0aa025901e1488ea7 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 9 Feb 2024 14:30:07 -0800 Subject: [PATCH 1153/2290] vdpa_sim: reset must not run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9588e7fc511f9c55b9835f14916e90ab940061b7 ] vdpasim_do_reset sets running to true, which is wrong, as it allows vdpasim_kick_vq to post work requests before the device has been configured. To fix, do not set running until VIRTIO_CONFIG_S_DRIVER_OK is set. Fixes: 0c89e2a3a9d0 ("vdpa_sim: Implement suspend vdpa op") Signed-off-by: Steve Sistare Reviewed-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <1707517807-137331-1-git-send-email-steven.sistare@oracle.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 61bde476cf9c8..e7fc25bfdd237 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -120,7 +120,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) for (i = 0; i < vdpasim->dev_attr.nas; i++) vhost_iotlb_reset(&vdpasim->iommu[i]); - vdpasim->running = true; + vdpasim->running = false; spin_unlock(&vdpasim->iommu_lock); vdpasim->features = 0; @@ -513,6 +513,7 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) spin_lock(&vdpasim->lock); vdpasim->status = status; + vdpasim->running = (status & VIRTIO_CONFIG_S_DRIVER_OK) != 0; spin_unlock(&vdpasim->lock); } -- GitLab From 07b6891ca62ebd532f4d4d4d0eed76ab607c2334 Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Fri, 16 Feb 2024 09:25:02 -0500 Subject: [PATCH 1154/2290] vdpa/mlx5: Allow CVQ size changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 749a4016839270163efc36ecddddd01de491a16b ] The MLX driver was not updating its control virtqueue size at set_vq_num and instead always initialized to MLX5_CVQ_MAX_ENT (16) at setup_cvq_vring. Qemu would try to set the size to 64 by default, however, because the CVQ size always was initialized to 16, an error would be thrown when sending >16 control messages (as used-ring entry 17 is initialized to 0). For example, starting a guest with x-svq=on and then executing the following command would produce the error below: # for i in {1..20}; do ifconfig eth0 hw ether XX:xx:XX:xx:XX:XX; done qemu-system-x86_64: Insufficient written data (0) [ 435.331223] virtio_net virtio0: Failed to set mac address by vq command. SIOCSIFHWADDR: Invalid argument Acked-by: Dragos Tatulea Acked-by: Eugenio Pérez Signed-off-by: Jonah Palmer Message-Id: <20240216142502.78095-1-jonah.palmer@oracle.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2b7e796c48897..74d295312466f 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -185,8 +185,6 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev); static bool mlx5_vdpa_debug; -#define MLX5_CVQ_MAX_ENT 16 - #define MLX5_LOG_VIO_FLAG(_feature) \ do { \ if (features & BIT_ULL(_feature)) \ @@ -1980,9 +1978,16 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) + if (!is_index_valid(mvdev, idx)) return; + if (is_ctrl_vq_idx(mvdev, idx)) { + struct mlx5_control_vq *cvq = &mvdev->cvq; + + cvq->vring.vring.num = num; + return; + } + mvq = &ndev->vqs[idx]; mvq->num_ent = num; } @@ -2512,7 +2517,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) u16 idx = cvq->vring.last_avail_idx; err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, + cvq->vring.vring.num, false, (struct vring_desc *)(uintptr_t)cvq->desc_addr, (struct vring_avail *)(uintptr_t)cvq->driver_addr, (struct vring_used *)(uintptr_t)cvq->device_addr); -- GitLab From 45a83b220c83e3c326513269afbf69ae6fc65cce Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 14 Mar 2024 16:49:06 -0600 Subject: [PATCH 1155/2290] wireguard: receive: annotate data-race around receiving_counter.counter [ Upstream commit bba045dc4d996d03dce6fe45726e78a1a1f6d4c3 ] Syzkaller with KCSAN identified a data-race issue when accessing keypair->receiving_counter.counter. Use READ_ONCE() and WRITE_ONCE() annotations to mark the data race as intentional. BUG: KCSAN: data-race in wg_packet_decrypt_worker / wg_packet_rx_poll write to 0xffff888107765888 of 8 bytes by interrupt on cpu 0: counter_validate drivers/net/wireguard/receive.c:321 [inline] wg_packet_rx_poll+0x3ac/0xf00 drivers/net/wireguard/receive.c:461 __napi_poll+0x60/0x3b0 net/core/dev.c:6536 napi_poll net/core/dev.c:6605 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6738 __do_softirq+0xc4/0x279 kernel/softirq.c:553 do_softirq+0x5e/0x90 kernel/softirq.c:454 __local_bh_enable_ip+0x64/0x70 kernel/softirq.c:381 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:167 [inline] _raw_spin_unlock_bh+0x36/0x40 kernel/locking/spinlock.c:210 spin_unlock_bh include/linux/spinlock.h:396 [inline] ptr_ring_consume_bh include/linux/ptr_ring.h:367 [inline] wg_packet_decrypt_worker+0x6c5/0x700 drivers/net/wireguard/receive.c:499 process_one_work kernel/workqueue.c:2633 [inline] ... read to 0xffff888107765888 of 8 bytes by task 3196 on cpu 1: decrypt_packet drivers/net/wireguard/receive.c:252 [inline] wg_packet_decrypt_worker+0x220/0x700 drivers/net/wireguard/receive.c:501 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2706 worker_thread+0x525/0x730 kernel/workqueue.c:2787 ... Fixes: a9e90d9931f3 ("wireguard: noise: separate receive counter from send counter") Reported-by: syzbot+d1de830e4ecdaac83d89@syzkaller.appspotmail.com Signed-off-by: Nikita Zhandarovich Signed-off-by: Jason A. Donenfeld Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wireguard/receive.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index a176653c88616..db01ec03bda00 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -251,7 +251,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) if (unlikely(!READ_ONCE(keypair->receiving.is_valid) || wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) || - keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) { + READ_ONCE(keypair->receiving_counter.counter) >= REJECT_AFTER_MESSAGES)) { WRITE_ONCE(keypair->receiving.is_valid, false); return false; } @@ -318,7 +318,7 @@ static bool counter_validate(struct noise_replay_counter *counter, u64 their_cou for (i = 1; i <= top; ++i) counter->backtrack[(i + index_current) & ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; - counter->counter = their_counter; + WRITE_ONCE(counter->counter, their_counter); } index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; @@ -463,7 +463,7 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", peer->device->dev->name, PACKET_CB(skb)->nonce, - keypair->receiving_counter.counter); + READ_ONCE(keypair->receiving_counter.counter)); goto next; } -- GitLab From 52287ed416a10bc3d3e204a3186d9509ab3ee634 Mon Sep 17 00:00:00 2001 From: Yewon Choi Date: Fri, 15 Mar 2024 18:28:38 +0900 Subject: [PATCH 1156/2290] rds: introduce acquire/release ordering in acquire/release_in_xmit() [ Upstream commit 1422f28826d2a0c11e5240b3e951c9e214d8656e ] acquire/release_in_xmit() work as bit lock in rds_send_xmit(), so they are expected to ensure acquire/release memory ordering semantics. However, test_and_set_bit/clear_bit() don't imply such semantics, on top of this, following smp_mb__after_atomic() does not guarantee release ordering (memory barrier actually should be placed before clear_bit()). Instead, we use clear_bit_unlock/test_and_set_bit_lock() here. Fixes: 0f4b1c7e89e6 ("rds: fix rds_send_xmit() serialization") Fixes: 1f9ecd7eacfd ("RDS: Pass rds_conn_path to rds_send_xmit()") Signed-off-by: Yewon Choi Reviewed-by: Michal Kubiak Link: https://lore.kernel.org/r/ZfQUxnNTO9AJmzwc@libra05 Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/rds/send.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/rds/send.c b/net/rds/send.c index a4ba45c430d81..0005fb43f2dfa 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -103,13 +103,12 @@ EXPORT_SYMBOL_GPL(rds_send_path_reset); static int acquire_in_xmit(struct rds_conn_path *cp) { - return test_and_set_bit(RDS_IN_XMIT, &cp->cp_flags) == 0; + return test_and_set_bit_lock(RDS_IN_XMIT, &cp->cp_flags) == 0; } static void release_in_xmit(struct rds_conn_path *cp) { - clear_bit(RDS_IN_XMIT, &cp->cp_flags); - smp_mb__after_atomic(); + clear_bit_unlock(RDS_IN_XMIT, &cp->cp_flags); /* * We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk -- GitLab From 87ca3d940f648bfe49d254fd6bc0c42b552e4af4 Mon Sep 17 00:00:00 2001 From: Felix Maurer Date: Fri, 15 Mar 2024 13:04:52 +0100 Subject: [PATCH 1157/2290] hsr: Handle failures in module init [ Upstream commit 3cf28cd492308e5f63ed00b29ea03ca016264376 ] A failure during registration of the netdev notifier was not handled at all. A failure during netlink initialization did not unregister the netdev notifier. Handle failures of netdev notifier registration and netlink initialization. Both functions should only return negative values on failure and thereby lead to the hsr module not being loaded. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Felix Maurer Reviewed-by: Shigeru Yoshida Reviewed-by: Breno Leitao Link: https://lore.kernel.org/r/3ce097c15e3f7ace98fc7fd9bcbf299f092e63d1.1710504184.git.fmaurer@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/hsr/hsr_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index b099c31501509..257b50124cee5 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -148,14 +148,21 @@ static struct notifier_block hsr_nb = { static int __init hsr_init(void) { - int res; + int err; BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_HLEN); - register_netdevice_notifier(&hsr_nb); - res = hsr_netlink_init(); + err = register_netdevice_notifier(&hsr_nb); + if (err) + return err; + + err = hsr_netlink_init(); + if (err) { + unregister_netdevice_notifier(&hsr_nb); + return err; + } - return res; + return 0; } static void __exit hsr_exit(void) -- GitLab From 6af7c8a2980b859b62acdb47a4d1098e467a9fca Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Fri, 15 Mar 2024 15:35:40 +0100 Subject: [PATCH 1158/2290] ipv4: raw: Fix sending packets from raw sockets via IPsec tunnels [ Upstream commit c9b3b81716c5b92132a6c1d4ac3c48a7b44082ab ] Since the referenced commit, the xfrm_inner_extract_output() function uses the protocol field to determine the address family. So not setting it for IPv4 raw sockets meant that such packets couldn't be tunneled via IPsec anymore. IPv6 raw sockets are not affected as they already set the protocol since 9c9c9ad5fae7 ("ipv6: set skb->protocol on tcp, raw and ip6_append_data genereated skbs"). Fixes: f4796398f21b ("xfrm: Remove inner/outer modes from output path") Signed-off-by: Tobias Brunner Reviewed-by: David Ahern Reviewed-by: Nicolas Dichtel Link: https://lore.kernel.org/r/c5d9a947-eb19-4164-ac99-468ea814ce20@strongswan.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/raw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7c63b91edbf7a..ee0efd0efec40 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -348,6 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, goto error; skb_reserve(skb, hlen); + skb->protocol = htons(ETH_P_IP); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; -- GitLab From b29a5055eeb0878b0a1fd8afce55795c282e06da Mon Sep 17 00:00:00 2001 From: Nikita Kiryushin Date: Fri, 15 Mar 2024 20:50:52 +0300 Subject: [PATCH 1159/2290] net: phy: fix phy_read_poll_timeout argument type in genphy_loopback [ Upstream commit 32fa4366cc4da1c97b725a0066adf43c6b298f37 ] read_poll_timeout inside phy_read_poll_timeout can set val negative in some cases (for example, __mdiobus_read inside phy_read can return -EOPNOTSUPP). Supposedly, commit 4ec732951702 ("net: phylib: fix phy_read*_poll_timeout()") should fix problems with wrong-signed vals, but I do not see how as val is sent to phy_read as is and __val = phy_read (not val) is checked for sign. Change val type for signed to allow better error handling as done in other phy_read_poll_timeout callers. This will not fix any error handling by itself, but allows, for example, to modify cond with appropriate sign check or check resulting val separately. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 014068dcb5b1 ("net: phy: genphy_loopback: add link speed configuration") Signed-off-by: Nikita Kiryushin Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20240315175052.8049-1-kiryushin@ancud.ru Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 45b07004669d6..f25b0d338ca8d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2640,8 +2640,8 @@ EXPORT_SYMBOL(genphy_resume); int genphy_loopback(struct phy_device *phydev, bool enable) { if (enable) { - u16 val, ctl = BMCR_LOOPBACK; - int ret; + u16 ctl = BMCR_LOOPBACK; + int ret, val; ctl |= mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); -- GitLab From 20e21c3c0195d915f33bc7321ee6b362177bf5bf Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 18 Mar 2024 18:35:06 +0100 Subject: [PATCH 1160/2290] dm-integrity: fix a memory leak when rechecking the data [ Upstream commit 55e565c42dce81a4e49c13262d5bc4eb4c2e588a ] Memory for the "checksums" pointer will leak if the data is rechecked after checksum failure (because the associated kfree won't happen due to 'goto skip_io'). Fix this by freeing the checksums memory before recheck, and just use the "checksum_onstack" memory for storing checksum during recheck. Fixes: c88f5e553fe3 ("dm-integrity: recheck the integrity tag after a failure") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3da4359f51645..e1bf91faa462b 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1856,12 +1856,12 @@ again: r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { + if (likely(checksums != checksums_onstack)) + kfree(checksums); if (r > 0) { - integrity_recheck(dio, checksums); + integrity_recheck(dio, checksums_onstack); goto skip_io; } - if (likely(checksums != checksums_onstack)) - kfree(checksums); goto error; } -- GitLab From cf7d8cba639ae792a42c2a137b495eac262ac36c Mon Sep 17 00:00:00 2001 From: Thinh Tran Date: Fri, 15 Mar 2024 15:55:35 -0500 Subject: [PATCH 1161/2290] net/bnx2x: Prevent access to a freed page in page_pool [ Upstream commit d27e2da94a42655861ca4baea30c8cd65546f25d ] Fix race condition leading to system crash during EEH error handling During EEH error recovery, the bnx2x driver's transmit timeout logic could cause a race condition when handling reset tasks. The bnx2x_tx_timeout() schedules reset tasks via bnx2x_sp_rtnl_task(), which ultimately leads to bnx2x_nic_unload(). In bnx2x_nic_unload() SGEs are freed using bnx2x_free_rx_sge_range(). However, this could overlap with the EEH driver's attempt to reset the device using bnx2x_io_slot_reset(), which also tries to free SGEs. This race condition can result in system crashes due to accessing freed memory locations in bnx2x_free_rx_sge() 799 static inline void bnx2x_free_rx_sge(struct bnx2x *bp, 800 struct bnx2x_fastpath *fp, u16 index) 801 { 802 struct sw_rx_page *sw_buf = &fp->rx_page_ring[index]; 803 struct page *page = sw_buf->page; .... where sw_buf was set to NULL after the call to dma_unmap_page() by the preceding thread. EEH: Beginning: 'slot_reset' PCI 0011:01:00.0#10000: EEH: Invoking bnx2x->slot_reset() bnx2x: [bnx2x_io_slot_reset:14228(eth1)]IO slot reset initializing... bnx2x 0011:01:00.0: enabling device (0140 -> 0142) bnx2x: [bnx2x_io_slot_reset:14244(eth1)]IO slot reset --> driver unload Kernel attempted to read user page (0) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc0080000025065fc Oops: Kernel access of bad area, sig: 11 [#1] ..... Call Trace: [c000000003c67a20] [c00800000250658c] bnx2x_io_slot_reset+0x204/0x610 [bnx2x] (unreliable) [c000000003c67af0] [c0000000000518a8] eeh_report_reset+0xb8/0xf0 [c000000003c67b60] [c000000000052130] eeh_pe_report+0x180/0x550 [c000000003c67c70] [c00000000005318c] eeh_handle_normal_event+0x84c/0xa60 [c000000003c67d50] [c000000000053a84] eeh_event_handler+0xf4/0x170 [c000000003c67da0] [c000000000194c58] kthread+0x1c8/0x1d0 [c000000003c67e10] [c00000000000cf64] ret_from_kernel_thread+0x5c/0x64 To solve this issue, we need to verify page pool allocations before freeing. Fixes: 4cace675d687 ("bnx2x: Alloc 4k fragment for each rx ring buffer element") Signed-off-by: Thinh Tran Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240315205535.1321-1-thinhtr@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index d8b1824c334d3..0bc1367fd6492 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -1002,9 +1002,6 @@ static inline void bnx2x_set_fw_mac_addr(__le16 *fw_hi, __le16 *fw_mid, static inline void bnx2x_free_rx_mem_pool(struct bnx2x *bp, struct bnx2x_alloc_pool *pool) { - if (!pool->page) - return; - put_page(pool->page); pool->page = NULL; @@ -1015,6 +1012,9 @@ static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp, { int i; + if (!fp->page_pool.page) + return; + if (fp->mode == TPA_MODE_DISABLED) return; -- GitLab From ac3f337f0a2ee559514d6c831890c2f642f80c27 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Jan 2023 17:33:48 +0530 Subject: [PATCH 1162/2290] octeontx2-af: recover CPT engine when it gets fault [ Upstream commit 07ea567d84cdf0add274d66db7c02b55b818d517 ] When CPT engine has uncorrectable errors, it will get halted and must be disabled and re-enabled. This patch adds code for the same. Signed-off-by: Srujana Challa Signed-off-by: David S. Miller Stable-dep-of: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 110 +++++++++++++----- 1 file changed, 80 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 38bbae5d9ae05..1ed16ce515bb1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -37,34 +37,60 @@ (_rsp)->free_sts_##etype = free_sts; \ }) -static irqreturn_t rvu_cpt_af_flt_intr_handler(int irq, void *ptr) +static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr) { struct rvu_block *block = ptr; struct rvu *rvu = block->rvu; int blkaddr = block->addr; - u64 reg0, reg1, reg2; - - reg0 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(0)); - reg1 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(1)); - if (!is_rvu_otx2(rvu)) { - reg2 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(2)); - dev_err_ratelimited(rvu->dev, - "Received CPTAF FLT irq : 0x%llx, 0x%llx, 0x%llx", - reg0, reg1, reg2); - } else { - dev_err_ratelimited(rvu->dev, - "Received CPTAF FLT irq : 0x%llx, 0x%llx", - reg0, reg1); + u64 reg, val; + int i, eng; + u8 grp; + + reg = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(vec)); + dev_err_ratelimited(rvu->dev, "Received CPTAF FLT%d irq : 0x%llx", vec, reg); + + i = -1; + while ((i = find_next_bit((unsigned long *)®, 64, i + 1)) < 64) { + switch (vec) { + case 0: + eng = i; + break; + case 1: + eng = i + 64; + break; + case 2: + eng = i + 128; + break; + } + grp = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng)) & 0xFF; + /* Disable and enable the engine which triggers fault */ + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng), 0x0); + val = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng)); + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng), val & ~1ULL); + + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng), grp); + rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng), val | 1ULL); } - - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(0), reg0); - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(1), reg1); - if (!is_rvu_otx2(rvu)) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(2), reg2); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(vec), reg); return IRQ_HANDLED; } +static irqreturn_t rvu_cpt_af_flt0_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_AF_INT_VEC_FLT0, ptr); +} + +static irqreturn_t rvu_cpt_af_flt1_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_AF_INT_VEC_FLT1, ptr); +} + +static irqreturn_t rvu_cpt_af_flt2_intr_handler(int irq, void *ptr) +{ + return cpt_af_flt_intr_handler(CPT_10K_AF_INT_VEC_FLT2, ptr); +} + static irqreturn_t rvu_cpt_af_rvu_intr_handler(int irq, void *ptr) { struct rvu_block *block = ptr; @@ -119,8 +145,10 @@ static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off) int i; /* Disable all CPT AF interrupts */ - for (i = 0; i < CPT_10K_AF_INT_VEC_RVU; i++) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF); + rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); @@ -151,7 +179,7 @@ static void cpt_unregister_interrupts(struct rvu *rvu, int blkaddr) /* Disable all CPT AF interrupts */ for (i = 0; i < CPT_AF_INT_VEC_RVU; i++) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), ~0ULL); rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); @@ -172,16 +200,31 @@ static int cpt_10k_register_interrupts(struct rvu_block *block, int off) { struct rvu *rvu = block->rvu; int blkaddr = block->addr; + irq_handler_t flt_fn; int i, ret; for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) { sprintf(&rvu->irq_name[(off + i) * NAME_SIZE], "CPTAF FLT%d", i); + + switch (i) { + case CPT_10K_AF_INT_VEC_FLT0: + flt_fn = rvu_cpt_af_flt0_intr_handler; + break; + case CPT_10K_AF_INT_VEC_FLT1: + flt_fn = rvu_cpt_af_flt1_intr_handler; + break; + case CPT_10K_AF_INT_VEC_FLT2: + flt_fn = rvu_cpt_af_flt2_intr_handler; + break; + } ret = rvu_cpt_do_register_interrupt(block, off + i, - rvu_cpt_af_flt_intr_handler, - &rvu->irq_name[(off + i) * NAME_SIZE]); + flt_fn, &rvu->irq_name[(off + i) * NAME_SIZE]); if (ret) goto err; - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); + if (i == CPT_10K_AF_INT_VEC_FLT2) + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0xFFFF); + else + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL); } ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RVU, @@ -208,8 +251,8 @@ static int cpt_register_interrupts(struct rvu *rvu, int blkaddr) { struct rvu_hwinfo *hw = rvu->hw; struct rvu_block *block; + irq_handler_t flt_fn; int i, offs, ret = 0; - char irq_name[16]; if (!is_block_implemented(rvu->hw, blkaddr)) return 0; @@ -226,13 +269,20 @@ static int cpt_register_interrupts(struct rvu *rvu, int blkaddr) return cpt_10k_register_interrupts(block, offs); for (i = CPT_AF_INT_VEC_FLT0; i < CPT_AF_INT_VEC_RVU; i++) { - snprintf(irq_name, sizeof(irq_name), "CPTAF FLT%d", i); + sprintf(&rvu->irq_name[(offs + i) * NAME_SIZE], "CPTAF FLT%d", i); + switch (i) { + case CPT_AF_INT_VEC_FLT0: + flt_fn = rvu_cpt_af_flt0_intr_handler; + break; + case CPT_AF_INT_VEC_FLT1: + flt_fn = rvu_cpt_af_flt1_intr_handler; + break; + } ret = rvu_cpt_do_register_interrupt(block, offs + i, - rvu_cpt_af_flt_intr_handler, - irq_name); + flt_fn, &rvu->irq_name[(offs + i) * NAME_SIZE]); if (ret) goto err; - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL); } ret = rvu_cpt_do_register_interrupt(block, offs + CPT_AF_INT_VEC_RVU, -- GitLab From 35d8af38f1993678321ecf5c6f99e128ea3dd944 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Jan 2023 17:33:49 +0530 Subject: [PATCH 1163/2290] octeontx2-af: add mbox for CPT LF reset [ Upstream commit f58cf765e8f5f4860ea094aa12c156d9195a4c28 ] On OcteonTX2 SoC, the admin function (AF) is the only one with all priviliges to configure HW and alloc resources, PFs and it's VFs have to request AF via mailbox for all their needs. This patch adds a new mailbox for CPT VFs to request for CPT LF reset. Signed-off-by: Srujana Challa Signed-off-by: David S. Miller Stable-dep-of: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 8 +++++ .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 33 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 03ebabd616353..5decd1919de03 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -196,6 +196,7 @@ M(CPT_STATS, 0xA05, cpt_sts, cpt_sts_req, cpt_sts_rsp) \ M(CPT_RXC_TIME_CFG, 0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req, \ msg_rsp) \ M(CPT_CTX_CACHE_SYNC, 0xA07, cpt_ctx_cache_sync, msg_req, msg_rsp) \ +M(CPT_LF_RESET, 0xA08, cpt_lf_reset, cpt_lf_rst_req, msg_rsp) \ /* SDP mbox IDs (range 0x1000 - 0x11FF) */ \ M(SET_SDP_CHAN_INFO, 0x1000, set_sdp_chan_info, sdp_chan_info_msg, msg_rsp) \ M(GET_SDP_CHAN_INFO, 0x1001, get_sdp_chan_info, msg_req, sdp_get_chan_info_msg) \ @@ -1702,6 +1703,13 @@ struct cpt_inst_lmtst_req { u64 rsvd; }; +/* Mailbox message format to request for CPT LF reset */ +struct cpt_lf_rst_req { + struct mbox_msghdr hdr; + u32 slot; + u32 rsvd; +}; + struct sdp_node_info { /* Node to which this PF belons to */ u8 node_id; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 1ed16ce515bb1..1cd34914cb86b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -851,6 +851,39 @@ int rvu_mbox_handler_cpt_ctx_cache_sync(struct rvu *rvu, struct msg_req *req, return rvu_cpt_ctx_flush(rvu, req->hdr.pcifunc); } +int rvu_mbox_handler_cpt_lf_reset(struct rvu *rvu, struct cpt_lf_rst_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + int cptlf, blkaddr, ret; + u16 actual_slot; + u64 ctl, ctl2; + + blkaddr = rvu_get_blkaddr_from_slot(rvu, BLKTYPE_CPT, pcifunc, + req->slot, &actual_slot); + if (blkaddr < 0) + return CPT_AF_ERR_LF_INVALID; + + block = &rvu->hw->block[blkaddr]; + + cptlf = rvu_get_lf(rvu, block, pcifunc, actual_slot); + if (cptlf < 0) + return CPT_AF_ERR_LF_INVALID; + ctl = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf)); + ctl2 = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf)); + + ret = rvu_lf_reset(rvu, block, cptlf); + if (ret) + dev_err(rvu->dev, "Failed to reset blkaddr %d LF%d\n", + block->addr, cptlf); + + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), ctl); + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf), ctl2); + + return 0; +} + static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr) { struct cpt_rxc_time_cfg_req req; -- GitLab From 8b1140c5808b0ad694984357676c0f569590bcec Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Jan 2023 17:33:51 +0530 Subject: [PATCH 1164/2290] octeontx2-af: optimize cpt pf identification [ Upstream commit 9adb04ff62f51265002c2c83e718bcf459e06e48 ] Optimize CPT PF identification in mbox handling for faster mbox response by doing it at AF driver probe instead of every mbox message. Signed-off-by: Srujana Challa Signed-off-by: David S. Miller Stable-dep-of: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 8 ++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 2 ++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 13 ++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index d88d86bf07b03..8f5b7d14e3f7c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1164,8 +1164,16 @@ cpt: goto nix_err; } + err = rvu_cpt_init(rvu); + if (err) { + dev_err(rvu->dev, "%s: Failed to initialize cpt\n", __func__); + goto mcs_err; + } + return 0; +mcs_err: + rvu_mcs_exit(rvu); nix_err: rvu_nix_freemem(rvu); npa_err: diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 0b76dfa979d4e..e1760f9298b17 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -506,6 +506,7 @@ struct rvu { struct ptp *ptp; int mcs_blk_cnt; + int cpt_pf_num; #ifdef CONFIG_DEBUG_FS struct rvu_debugfs rvu_dbg; @@ -872,6 +873,7 @@ void rvu_cpt_unregister_interrupts(struct rvu *rvu); int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int slot); int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc); +int rvu_cpt_init(struct rvu *rvu); #define NDC_AF_BANK_MASK GENMASK_ULL(7, 0) #define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 1cd34914cb86b..923af460db296 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -340,7 +340,7 @@ static int get_cpt_pf_num(struct rvu *rvu) static bool is_cpt_pf(struct rvu *rvu, u16 pcifunc) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; if (rvu_get_pf(pcifunc) != cpt_pf_num) return false; @@ -352,7 +352,7 @@ static bool is_cpt_pf(struct rvu *rvu, u16 pcifunc) static bool is_cpt_vf(struct rvu *rvu, u16 pcifunc) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; if (rvu_get_pf(pcifunc) != cpt_pf_num) return false; @@ -1023,7 +1023,7 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int s static int cpt_inline_inb_lf_cmd_send(struct rvu *rvu, int blkaddr, int nix_blkaddr) { - int cpt_pf_num = get_cpt_pf_num(rvu); + int cpt_pf_num = rvu->cpt_pf_num; struct cpt_inst_lmtst_req *req; dma_addr_t res_daddr; int timeout = 3000; @@ -1167,3 +1167,10 @@ unlock: return 0; } + +int rvu_cpt_init(struct rvu *rvu) +{ + /* Retrieve CPT PF number */ + rvu->cpt_pf_num = get_cpt_pf_num(rvu); + return 0; +} -- GitLab From 8a231bd4d6cb774e3b903ec81fab9b6ec08039b1 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Jan 2023 17:33:54 +0530 Subject: [PATCH 1165/2290] octeontx2-af: add mbox to return CPT_AF_FLT_INT info [ Upstream commit 8299ffe3dc3dc9ac2bd60e3a8332008f03156aca ] CPT HW would trigger the CPT AF FLT interrupt when CPT engines hits some uncorrectable errors and AF is the one which receives the interrupt and recovers the engines. This patch adds a mailbox for CPT VFs to request for CPT faulted and recovered engines info. Signed-off-by: Srujana Challa Signed-off-by: David S. Miller Stable-dep-of: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 17 +++++++++ .../net/ethernet/marvell/octeontx2/af/rvu.h | 4 +++ .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 35 +++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 5decd1919de03..bbb6658420f1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -197,6 +197,8 @@ M(CPT_RXC_TIME_CFG, 0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req, \ msg_rsp) \ M(CPT_CTX_CACHE_SYNC, 0xA07, cpt_ctx_cache_sync, msg_req, msg_rsp) \ M(CPT_LF_RESET, 0xA08, cpt_lf_reset, cpt_lf_rst_req, msg_rsp) \ +M(CPT_FLT_ENG_INFO, 0xA09, cpt_flt_eng_info, cpt_flt_eng_info_req, \ + cpt_flt_eng_info_rsp) \ /* SDP mbox IDs (range 0x1000 - 0x11FF) */ \ M(SET_SDP_CHAN_INFO, 0x1000, set_sdp_chan_info, sdp_chan_info_msg, msg_rsp) \ M(GET_SDP_CHAN_INFO, 0x1001, get_sdp_chan_info, msg_req, sdp_get_chan_info_msg) \ @@ -1710,6 +1712,21 @@ struct cpt_lf_rst_req { u32 rsvd; }; +/* Mailbox message format to request for CPT faulted engines */ +struct cpt_flt_eng_info_req { + struct mbox_msghdr hdr; + int blkaddr; + bool reset; + u32 rsvd; +}; + +struct cpt_flt_eng_info_rsp { + struct mbox_msghdr hdr; + u64 flt_eng_map[CPT_10K_AF_INT_VEC_RVU]; + u64 rcvrd_eng_map[CPT_10K_AF_INT_VEC_RVU]; + u64 rsvd; +}; + struct sdp_node_info { /* Node to which this PF belons to */ u8 node_id; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index e1760f9298b17..6a39006c334d7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -109,6 +109,8 @@ struct rvu_block { u64 lfreset_reg; unsigned char name[NAME_SIZE]; struct rvu *rvu; + u64 cpt_flt_eng_map[3]; + u64 cpt_rcvrd_eng_map[3]; }; struct nix_mcast { @@ -521,6 +523,8 @@ struct rvu { struct list_head mcs_intrq_head; /* mcs interrupt queue lock */ spinlock_t mcs_intrq_lock; + /* CPT interrupt lock */ + spinlock_t cpt_intr_lock; }; static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 923af460db296..6fb02b93c1718 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -70,6 +70,14 @@ static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr) rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL2(eng), grp); rvu_write64(rvu, blkaddr, CPT_AF_EXEX_CTL(eng), val | 1ULL); + + spin_lock(&rvu->cpt_intr_lock); + block->cpt_flt_eng_map[vec] |= BIT_ULL(i); + val = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_STS(eng)); + val = val & 0x3; + if (val == 0x1 || val == 0x2) + block->cpt_rcvrd_eng_map[vec] |= BIT_ULL(i); + spin_unlock(&rvu->cpt_intr_lock); } rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(vec), reg); @@ -884,6 +892,31 @@ int rvu_mbox_handler_cpt_lf_reset(struct rvu *rvu, struct cpt_lf_rst_req *req, return 0; } +int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_req *req, + struct cpt_flt_eng_info_rsp *rsp) +{ + struct rvu_block *block; + unsigned long flags; + int blkaddr, vec; + + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) + return blkaddr; + + block = &rvu->hw->block[blkaddr]; + for (vec = 0; vec < CPT_10K_AF_INT_VEC_RVU; vec++) { + spin_lock_irqsave(&rvu->cpt_intr_lock, flags); + rsp->flt_eng_map[vec] = block->cpt_flt_eng_map[vec]; + rsp->rcvrd_eng_map[vec] = block->cpt_rcvrd_eng_map[vec]; + if (req->reset) { + block->cpt_flt_eng_map[vec] = 0x0; + block->cpt_rcvrd_eng_map[vec] = 0x0; + } + spin_unlock_irqrestore(&rvu->cpt_intr_lock, flags); + } + return 0; +} + static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr) { struct cpt_rxc_time_cfg_req req; @@ -1172,5 +1205,7 @@ int rvu_cpt_init(struct rvu *rvu) { /* Retrieve CPT PF number */ rvu->cpt_pf_num = get_cpt_pf_num(rvu); + spin_lock_init(&rvu->cpt_intr_lock); + return 0; } -- GitLab From a64cc7599ecec984997b5b3b3dfd2e3c75493c54 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 18 Mar 2024 14:59:54 +0530 Subject: [PATCH 1166/2290] octeontx2: Detect the mbox up or down message via register [ Upstream commit a88e0f936ba9a301c78f6eacfd38737d003c130b ] A single line of interrupt is used to receive up notifications and down reply messages from AF to PF (similarly from PF to its VF). PF acts as bridge and forwards VF messages to AF and sends respsones back from AF to VF. When an async event like link event is received by up message when PF is in middle of forwarding VF message then mailbox errors occur because PF state machine is corrupted. Since VF is a separate driver or VF driver can be in a VM it is not possible to serialize from the start of communication at VF. Hence to differentiate between type of messages at PF this patch makes sender to set mbox data register with distinct values for up and down messages. Sender also checks whether previous interrupt is received before triggering current interrupt by waiting for mailbox data register to become zero. Fixes: 5a6d7c9daef3 ("octeontx2-pf: Mailbox communication with AF") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/mbox.c | 43 ++++++- .../net/ethernet/marvell/octeontx2/af/mbox.h | 6 + .../marvell/octeontx2/af/mcs_rvu_if.c | 17 ++- .../net/ethernet/marvell/octeontx2/af/rvu.c | 14 ++- .../net/ethernet/marvell/octeontx2/af/rvu.h | 2 + .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 20 ++-- .../marvell/octeontx2/nic/otx2_common.h | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 113 ++++++++++++------ .../ethernet/marvell/octeontx2/nic/otx2_vf.c | 71 ++++++----- 9 files changed, 205 insertions(+), 83 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 9690ac01f02c8..7d741e3ba8c51 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -214,11 +214,12 @@ int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid) } EXPORT_SYMBOL(otx2_mbox_busy_poll_for_rsp); -void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) +static void otx2_mbox_msg_send_data(struct otx2_mbox *mbox, int devid, u64 data) { struct otx2_mbox_dev *mdev = &mbox->dev[devid]; struct mbox_hdr *tx_hdr, *rx_hdr; void *hw_mbase = mdev->hwbase; + u64 intr_val; tx_hdr = hw_mbase + mbox->tx_start; rx_hdr = hw_mbase + mbox->rx_start; @@ -254,14 +255,52 @@ void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) spin_unlock(&mdev->mbox_lock); + /* Check if interrupt pending */ + intr_val = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + intr_val |= data; /* The interrupt should be fired after num_msgs is written * to the shared memory */ - writeq(1, (void __iomem *)mbox->reg_base + + writeq(intr_val, (void __iomem *)mbox->reg_base + (mbox->trigger | (devid << mbox->tr_shift))); } + +void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) +{ + otx2_mbox_msg_send_data(mbox, devid, MBOX_DOWN_MSG); +} EXPORT_SYMBOL(otx2_mbox_msg_send); +void otx2_mbox_msg_send_up(struct otx2_mbox *mbox, int devid) +{ + otx2_mbox_msg_send_data(mbox, devid, MBOX_UP_MSG); +} +EXPORT_SYMBOL(otx2_mbox_msg_send_up); + +bool otx2_mbox_wait_for_zero(struct otx2_mbox *mbox, int devid) +{ + u64 data; + + data = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + /* If data is non-zero wait for ~1ms and return to caller + * whether data has changed to zero or not after the wait. + */ + if (!data) + return true; + + usleep_range(950, 1000); + + data = readq((void __iomem *)mbox->reg_base + + (mbox->trigger | (devid << mbox->tr_shift))); + + return data == 0; +} +EXPORT_SYMBOL(otx2_mbox_wait_for_zero); + struct mbox_msghdr *otx2_mbox_alloc_msg_rsp(struct otx2_mbox *mbox, int devid, int size, int size_rsp) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index bbb6658420f1d..be70269e91684 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -16,6 +16,9 @@ #define MBOX_SIZE SZ_64K +#define MBOX_DOWN_MSG 1 +#define MBOX_UP_MSG 2 + /* AF/PF: PF initiated, PF/VF VF initiated */ #define MBOX_DOWN_RX_START 0 #define MBOX_DOWN_RX_SIZE (46 * SZ_1K) @@ -101,6 +104,7 @@ int otx2_mbox_regions_init(struct otx2_mbox *mbox, void __force **hwbase, struct pci_dev *pdev, void __force *reg_base, int direction, int ndevs, unsigned long *bmap); void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid); +void otx2_mbox_msg_send_up(struct otx2_mbox *mbox, int devid); int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid); int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid); struct mbox_msghdr *otx2_mbox_alloc_msg_rsp(struct otx2_mbox *mbox, int devid, @@ -118,6 +122,8 @@ static inline struct mbox_msghdr *otx2_mbox_alloc_msg(struct otx2_mbox *mbox, return otx2_mbox_alloc_msg_rsp(mbox, devid, size, 0); } +bool otx2_mbox_wait_for_zero(struct otx2_mbox *mbox, int devid); + /* Mailbox message types */ #define MBOX_MSG_MASK 0xFFFF #define MBOX_MSG_INVALID 0xFFFE diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index dfd23580e3b8e..d39d86e694ccf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -121,13 +121,17 @@ int mcs_add_intr_wq_entry(struct mcs *mcs, struct mcs_intr_event *event) static int mcs_notify_pfvf(struct mcs_intr_event *event, struct rvu *rvu) { struct mcs_intr_info *req; - int err, pf; + int pf; pf = rvu_get_pf(event->pcifunc); + mutex_lock(&rvu->mbox_lock); + req = otx2_mbox_alloc_msg_mcs_intr_notify(rvu, pf); - if (!req) + if (!req) { + mutex_unlock(&rvu->mbox_lock); return -ENOMEM; + } req->mcs_id = event->mcs_id; req->intr_mask = event->intr_mask; @@ -135,10 +139,11 @@ static int mcs_notify_pfvf(struct mcs_intr_event *event, struct rvu *rvu) req->hdr.pcifunc = event->pcifunc; req->lmac_id = event->lmac_id; - otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, pf); - err = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pf); - if (err) - dev_warn(rvu->dev, "MCS notification to pf %d failed\n", pf); + otx2_mbox_wait_for_zero(&rvu->afpf_wq_info.mbox_up, pf); + + otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pf); + + mutex_unlock(&rvu->mbox_lock); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 8f5b7d14e3f7c..59e6442ddf4a4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2114,7 +2114,7 @@ bad_message: } } -static void __rvu_mbox_handler(struct rvu_work *mwork, int type) +static void __rvu_mbox_handler(struct rvu_work *mwork, int type, bool poll) { struct rvu *rvu = mwork->rvu; int offset, err, id, devid; @@ -2181,6 +2181,9 @@ static void __rvu_mbox_handler(struct rvu_work *mwork, int type) } mw->mbox_wrk[devid].num_msgs = 0; + if (poll) + otx2_mbox_wait_for_zero(mbox, devid); + /* Send mbox responses to VF/PF */ otx2_mbox_msg_send(mbox, devid); } @@ -2188,15 +2191,18 @@ static void __rvu_mbox_handler(struct rvu_work *mwork, int type) static inline void rvu_afpf_mbox_handler(struct work_struct *work) { struct rvu_work *mwork = container_of(work, struct rvu_work, work); + struct rvu *rvu = mwork->rvu; - __rvu_mbox_handler(mwork, TYPE_AFPF); + mutex_lock(&rvu->mbox_lock); + __rvu_mbox_handler(mwork, TYPE_AFPF, true); + mutex_unlock(&rvu->mbox_lock); } static inline void rvu_afvf_mbox_handler(struct work_struct *work) { struct rvu_work *mwork = container_of(work, struct rvu_work, work); - __rvu_mbox_handler(mwork, TYPE_AFVF); + __rvu_mbox_handler(mwork, TYPE_AFVF, false); } static void __rvu_mbox_up_handler(struct rvu_work *mwork, int type) @@ -2371,6 +2377,8 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, } } + mutex_init(&rvu->mbox_lock); + mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL); if (!mbox_regions) { err = -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 6a39006c334d7..a3ae21398ca74 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -525,6 +525,8 @@ struct rvu { spinlock_t mcs_intrq_lock; /* CPT interrupt lock */ spinlock_t cpt_intr_lock; + + struct mutex mbox_lock; /* Serialize mbox up and down msgs */ }; static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index bcb4385d0621c..d1e6b12ecfa70 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -232,7 +232,7 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) struct cgx_link_user_info *linfo; struct cgx_link_info_msg *msg; unsigned long pfmap; - int err, pfid; + int pfid; linfo = &event->link_uinfo; pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id); @@ -250,16 +250,22 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) continue; } + mutex_lock(&rvu->mbox_lock); + /* Send mbox message to PF */ msg = otx2_mbox_alloc_msg_cgx_link_event(rvu, pfid); - if (!msg) + if (!msg) { + mutex_unlock(&rvu->mbox_lock); continue; + } + msg->link_info = *linfo; - otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, pfid); - err = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pfid); - if (err) - dev_warn(rvu->dev, "notification to pf %d failed\n", - pfid); + + otx2_mbox_wait_for_zero(&rvu->afpf_wq_info.mbox_up, pfid); + + otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pfid); + + mutex_unlock(&rvu->mbox_lock); } while (pfmap); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 44950c2542bb7..c15d1864a6371 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -785,7 +785,7 @@ static inline int otx2_sync_mbox_up_msg(struct mbox *mbox, int devid) if (!otx2_mbox_nonempty(&mbox->mbox_up, devid)) return 0; - otx2_mbox_msg_send(&mbox->mbox_up, devid); + otx2_mbox_msg_send_up(&mbox->mbox_up, devid); err = otx2_mbox_wait_for_rsp(&mbox->mbox_up, devid); if (err) return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a2d8ac6204054..05ee55022b92c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -292,8 +292,8 @@ static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) return 0; } -static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq, - int first, int mdevs, u64 intr, int type) +static void otx2_queue_vf_work(struct mbox *mw, struct workqueue_struct *mbox_wq, + int first, int mdevs, u64 intr) { struct otx2_mbox_dev *mdev; struct otx2_mbox *mbox; @@ -307,40 +307,26 @@ static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq, mbox = &mw->mbox; mdev = &mbox->dev[i]; - if (type == TYPE_PFAF) - otx2_sync_mbox_bbuf(mbox, i); hdr = mdev->mbase + mbox->rx_start; /* The hdr->num_msgs is set to zero immediately in the interrupt - * handler to ensure that it holds a correct value next time - * when the interrupt handler is called. - * pf->mbox.num_msgs holds the data for use in pfaf_mbox_handler - * pf>mbox.up_num_msgs holds the data for use in - * pfaf_mbox_up_handler. + * handler to ensure that it holds a correct value next time + * when the interrupt handler is called. pf->mw[i].num_msgs + * holds the data for use in otx2_pfvf_mbox_handler and + * pf->mw[i].up_num_msgs holds the data for use in + * otx2_pfvf_mbox_up_handler. */ if (hdr->num_msgs) { mw[i].num_msgs = hdr->num_msgs; hdr->num_msgs = 0; - if (type == TYPE_PFAF) - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), - sizeof(u64))); - queue_work(mbox_wq, &mw[i].mbox_wrk); } mbox = &mw->mbox_up; mdev = &mbox->dev[i]; - if (type == TYPE_PFAF) - otx2_sync_mbox_bbuf(mbox, i); hdr = mdev->mbase + mbox->rx_start; if (hdr->num_msgs) { mw[i].up_num_msgs = hdr->num_msgs; hdr->num_msgs = 0; - if (type == TYPE_PFAF) - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), - sizeof(u64))); - queue_work(mbox_wq, &mw[i].mbox_up_wrk); } } @@ -356,8 +342,10 @@ static void otx2_forward_msg_pfvf(struct otx2_mbox_dev *mdev, /* Msgs are already copied, trigger VF's mbox irq */ smp_wmb(); + otx2_mbox_wait_for_zero(pfvf_mbox, devid); + offset = pfvf_mbox->trigger | (devid << pfvf_mbox->tr_shift); - writeq(1, (void __iomem *)pfvf_mbox->reg_base + offset); + writeq(MBOX_DOWN_MSG, (void __iomem *)pfvf_mbox->reg_base + offset); /* Restore VF's mbox bounce buffer region address */ src_mdev->mbase = bbuf_base; @@ -547,7 +535,7 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) end: offset = mbox->rx_start + msg->next_msgoff; if (mdev->msgs_acked == (vf_mbox->up_num_msgs - 1)) - __otx2_mbox_reset(mbox, 0); + __otx2_mbox_reset(mbox, vf_idx); mdev->msgs_acked++; } } @@ -564,8 +552,7 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) if (vfs > 64) { intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(1)); otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), intr); - otx2_queue_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr, - TYPE_PFVF); + otx2_queue_vf_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr); if (intr) trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); vfs = 64; @@ -574,7 +561,7 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(0)); otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(0), intr); - otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF); + otx2_queue_vf_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr); if (intr) trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); @@ -822,20 +809,22 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work) struct mbox *af_mbox; struct otx2_nic *pf; int offset, id; + u16 num_msgs; af_mbox = container_of(work, struct mbox, mbox_wrk); mbox = &af_mbox->mbox; mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + num_msgs = rsp_hdr->num_msgs; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); pf = af_mbox->pfvf; - for (id = 0; id < af_mbox->num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2_process_pfaf_mbox_msg(pf, msg); offset = mbox->rx_start + msg->next_msgoff; - if (mdev->msgs_acked == (af_mbox->num_msgs - 1)) + if (mdev->msgs_acked == (num_msgs - 1)) __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } @@ -946,12 +935,14 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) int offset, id, devid = 0; struct mbox_hdr *rsp_hdr; struct mbox_msghdr *msg; + u16 num_msgs; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + num_msgs = rsp_hdr->num_msgs; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < af_mbox->up_num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); devid = msg->pcifunc & RVU_PFVF_FUNC_MASK; @@ -960,10 +951,11 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) otx2_process_mbox_msg_up(pf, msg); offset = mbox->rx_start + msg->next_msgoff; } - if (devid) { + /* Forward to VF iff VFs are really present */ + if (devid && pci_num_vf(pf->pdev)) { otx2_forward_vf_mbox_msgs(pf, &pf->mbox.mbox_up, MBOX_DIR_PFVF_UP, devid - 1, - af_mbox->up_num_msgs); + num_msgs); return; } @@ -973,16 +965,49 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq) { struct otx2_nic *pf = (struct otx2_nic *)pf_irq; - struct mbox *mbox; + struct mbox *mw = &pf->mbox; + struct otx2_mbox_dev *mdev; + struct otx2_mbox *mbox; + struct mbox_hdr *hdr; + u64 mbox_data; /* Clear the IRQ */ otx2_write64(pf, RVU_PF_INT, BIT_ULL(0)); - mbox = &pf->mbox; - trace_otx2_msg_interrupt(mbox->mbox.pdev, "AF to PF", BIT_ULL(0)); + mbox_data = otx2_read64(pf, RVU_PF_PFAF_MBOX0); + + if (mbox_data & MBOX_UP_MSG) { + mbox_data &= ~MBOX_UP_MSG; + otx2_write64(pf, RVU_PF_PFAF_MBOX0, mbox_data); + + mbox = &mw->mbox_up; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(pf->mbox_wq, &mw->mbox_up_wrk); + + trace_otx2_msg_interrupt(pf->pdev, "UP message from AF to PF", + BIT_ULL(0)); + } + + if (mbox_data & MBOX_DOWN_MSG) { + mbox_data &= ~MBOX_DOWN_MSG; + otx2_write64(pf, RVU_PF_PFAF_MBOX0, mbox_data); + + mbox = &mw->mbox; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(pf->mbox_wq, &mw->mbox_wrk); - otx2_queue_work(mbox, pf->mbox_wq, 0, 1, 1, TYPE_PFAF); + trace_otx2_msg_interrupt(pf->pdev, "DOWN reply from AF to PF", + BIT_ULL(0)); + } return IRQ_HANDLED; } @@ -3030,6 +3055,7 @@ static void otx2_vf_link_event_task(struct work_struct *work) struct otx2_vf_config *config; struct cgx_link_info_msg *req; struct mbox_msghdr *msghdr; + struct delayed_work *dwork; struct otx2_nic *pf; int vf_idx; @@ -3038,10 +3064,21 @@ static void otx2_vf_link_event_task(struct work_struct *work) vf_idx = config - config->pf->vf_configs; pf = config->pf; + mutex_lock(&pf->mbox.lock); + + dwork = &config->link_event_work; + + if (!otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx)) { + schedule_delayed_work(dwork, msecs_to_jiffies(100)); + mutex_unlock(&pf->mbox.lock); + return; + } + msghdr = otx2_mbox_alloc_msg_rsp(&pf->mbox_pfvf[0].mbox_up, vf_idx, sizeof(*req), sizeof(struct msg_rsp)); if (!msghdr) { dev_err(pf->dev, "Failed to create VF%d link event\n", vf_idx); + mutex_unlock(&pf->mbox.lock); return; } @@ -3050,7 +3087,11 @@ static void otx2_vf_link_event_task(struct work_struct *work) req->hdr.sig = OTX2_MBOX_REQ_SIG; memcpy(&req->link_info, &pf->linfo, sizeof(req->link_info)); - otx2_sync_mbox_up_msg(&pf->mbox_pfvf[0], vf_idx); + otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx); + + otx2_mbox_msg_send_up(&pf->mbox_pfvf[0].mbox_up, vf_idx); + + mutex_unlock(&pf->mbox.lock); } static int otx2_sriov_enable(struct pci_dev *pdev, int numvfs) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 404855bccb4b6..68fef947ccced 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -89,16 +89,20 @@ static void otx2vf_vfaf_mbox_handler(struct work_struct *work) struct otx2_mbox *mbox; struct mbox *af_mbox; int offset, id; + u16 num_msgs; af_mbox = container_of(work, struct mbox, mbox_wrk); mbox = &af_mbox->mbox; mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (af_mbox->num_msgs == 0) + num_msgs = rsp_hdr->num_msgs; + + if (num_msgs == 0) return; + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < af_mbox->num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2vf_process_vfaf_mbox_msg(af_mbox->pfvf, msg); offset = mbox->rx_start + msg->next_msgoff; @@ -151,6 +155,7 @@ static void otx2vf_vfaf_mbox_up_handler(struct work_struct *work) struct mbox *vf_mbox; struct otx2_nic *vf; int offset, id; + u16 num_msgs; vf_mbox = container_of(work, struct mbox, mbox_up_wrk); vf = vf_mbox->pfvf; @@ -158,12 +163,14 @@ static void otx2vf_vfaf_mbox_up_handler(struct work_struct *work) mdev = &mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (vf_mbox->up_num_msgs == 0) + num_msgs = rsp_hdr->num_msgs; + + if (num_msgs == 0) return; offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); - for (id = 0; id < vf_mbox->up_num_msgs; id++) { + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2vf_process_mbox_msg_up(vf, msg); offset = mbox->rx_start + msg->next_msgoff; @@ -178,40 +185,48 @@ static irqreturn_t otx2vf_vfaf_mbox_intr_handler(int irq, void *vf_irq) struct otx2_mbox_dev *mdev; struct otx2_mbox *mbox; struct mbox_hdr *hdr; + u64 mbox_data; /* Clear the IRQ */ otx2_write64(vf, RVU_VF_INT, BIT_ULL(0)); + mbox_data = otx2_read64(vf, RVU_VF_VFPF_MBOX0); + /* Read latest mbox data */ smp_rmb(); - /* Check for PF => VF response messages */ - mbox = &vf->mbox.mbox; - mdev = &mbox->dev[0]; - otx2_sync_mbox_bbuf(mbox, 0); + if (mbox_data & MBOX_DOWN_MSG) { + mbox_data &= ~MBOX_DOWN_MSG; + otx2_write64(vf, RVU_VF_VFPF_MBOX0, mbox_data); + + /* Check for PF => VF response messages */ + mbox = &vf->mbox.mbox; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); - trace_otx2_msg_interrupt(mbox->pdev, "PF to VF", BIT_ULL(0)); + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(vf->mbox_wq, &vf->mbox.mbox_wrk); - hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (hdr->num_msgs) { - vf->mbox.num_msgs = hdr->num_msgs; - hdr->num_msgs = 0; - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), sizeof(u64))); - queue_work(vf->mbox_wq, &vf->mbox.mbox_wrk); + trace_otx2_msg_interrupt(mbox->pdev, "DOWN reply from PF to VF", + BIT_ULL(0)); } - /* Check for PF => VF notification messages */ - mbox = &vf->mbox.mbox_up; - mdev = &mbox->dev[0]; - otx2_sync_mbox_bbuf(mbox, 0); - - hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - if (hdr->num_msgs) { - vf->mbox.up_num_msgs = hdr->num_msgs; - hdr->num_msgs = 0; - memset(mbox->hwbase + mbox->rx_start, 0, - ALIGN(sizeof(struct mbox_hdr), sizeof(u64))); - queue_work(vf->mbox_wq, &vf->mbox.mbox_up_wrk); + + if (mbox_data & MBOX_UP_MSG) { + mbox_data &= ~MBOX_UP_MSG; + otx2_write64(vf, RVU_VF_VFPF_MBOX0, mbox_data); + + /* Check for PF => VF notification messages */ + mbox = &vf->mbox.mbox_up; + mdev = &mbox->dev[0]; + otx2_sync_mbox_bbuf(mbox, 0); + + hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + if (hdr->num_msgs) + queue_work(vf->mbox_wq, &vf->mbox.mbox_up_wrk); + + trace_otx2_msg_interrupt(mbox->pdev, "UP message from PF to VF", + BIT_ULL(0)); } return IRQ_HANDLED; -- GitLab From e545e4b1c1c1c331a1f8e4e56e8cc1d2a82bd625 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 May 2023 13:52:28 -1000 Subject: [PATCH 1167/2290] net: octeontx2: Use alloc_ordered_workqueue() to create ordered workqueues [ Upstream commit 289f97467480266f9bd8cac7f1e05a478d523f79 ] BACKGROUND ========== When multiple work items are queued to a workqueue, their execution order doesn't match the queueing order. They may get executed in any order and simultaneously. When fully serialized execution - one by one in the queueing order - is needed, an ordered workqueue should be used which can be created with alloc_ordered_workqueue(). However, alloc_ordered_workqueue() was a later addition. Before it, an ordered workqueue could be obtained by creating an UNBOUND workqueue with @max_active==1. This originally was an implementation side-effect which was broken by 4c16bd327c74 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered"). Because there were users that depended on the ordered execution, 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") made workqueue allocation path to implicitly promote UNBOUND workqueues w/ @max_active==1 to ordered workqueues. While this has worked okay, overloading the UNBOUND allocation interface this way creates other issues. It's difficult to tell whether a given workqueue actually needs to be ordered and users that legitimately want a min concurrency level wq unexpectedly gets an ordered one instead. With planned UNBOUND workqueue updates to improve execution locality and more prevalence of chiplet designs which can benefit from such improvements, this isn't a state we wanna be in forever. This patch series audits all callsites that create an UNBOUND workqueue w/ @max_active==1 and converts them to alloc_ordered_workqueue() as necessary. WHAT TO LOOK FOR ================ The conversions are from alloc_workqueue(WQ_UNBOUND | flags, 1, args..) to alloc_ordered_workqueue(flags, args...) which don't cause any functional changes. If you know that fully ordered execution is not ncessary, please let me know. I'll drop the conversion and instead add a comment noting the fact to reduce confusion while conversion is in progress. If you aren't fully sure, it's completely fine to let the conversion through. The behavior will stay exactly the same and we can always reconsider later. As there are follow-up workqueue core changes, I'd really appreciate if the patch can be routed through the workqueue tree w/ your acks. Thanks. Signed-off-by: Tejun Heo Reviewed-by: Sunil Goutham Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Ratheesh Kannoth Cc: Srujana Challa Cc: Geetha sowjanya Cc: netdev@vger.kernel.org Stable-dep-of: 7558ce0d974c ("octeontx2-pf: Use default max_active works instead of one") Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 5 ++--- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 13 +++++-------- .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 5 ++--- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 59e6442ddf4a4..a7965b457bee9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -3055,9 +3055,8 @@ static int rvu_flr_init(struct rvu *rvu) cfg | BIT_ULL(22)); } - rvu->flr_wq = alloc_workqueue("rvu_afpf_flr", - WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, - 1); + rvu->flr_wq = alloc_ordered_workqueue("rvu_afpf_flr", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!rvu->flr_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 05ee55022b92c..3f044b161e8bf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -272,8 +272,7 @@ static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) { int vf; - pf->flr_wq = alloc_workqueue("otx2_pf_flr_wq", - WQ_UNBOUND | WQ_HIGHPRI, 1); + pf->flr_wq = alloc_ordered_workqueue("otx2_pf_flr_wq", WQ_HIGHPRI); if (!pf->flr_wq) return -ENOMEM; @@ -584,9 +583,8 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) if (!pf->mbox_pfvf) return -ENOMEM; - pf->mbox_pfvf_wq = alloc_workqueue("otx2_pfvf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_pfvf_wq = alloc_ordered_workqueue("otx2_pfvf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_pfvf_wq) return -ENOMEM; @@ -1088,9 +1086,8 @@ static int otx2_pfaf_mbox_init(struct otx2_nic *pf) int err; mbox->pfvf = pf; - pf->mbox_wq = alloc_workqueue("otx2_pfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_wq = alloc_ordered_workqueue("otx2_pfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 68fef947ccced..dcb8190de2407 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -308,9 +308,8 @@ static int otx2vf_vfaf_mbox_init(struct otx2_nic *vf) int err; mbox->pfvf = vf; - vf->mbox_wq = alloc_workqueue("otx2_vfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + vf->mbox_wq = alloc_ordered_workqueue("otx2_vfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!vf->mbox_wq) return -ENOMEM; -- GitLab From 53ae0f36690ca6a29f932c74721aa63c617fe692 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 18 Mar 2024 14:59:56 +0530 Subject: [PATCH 1168/2290] octeontx2-pf: Use default max_active works instead of one [ Upstream commit 7558ce0d974ced1dc07edc1197f750fe28c52e57 ] Only one execution context for the workqueue used for PF and VFs mailbox communication is incorrect since multiple works are queued simultaneously by all the VFs and PF link UP messages. Hence use default number of execution contexts by passing zero as max_active to alloc_workqueue function. With this fix in place, modify UP messages also to wait until completion. Fixes: d424b6c02415 ("octeontx2-pf: Enable SRIOV and added VF mbox handling") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 3f044b161e8bf..a6c5f6a2dab07 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -583,8 +583,9 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) if (!pf->mbox_pfvf) return -ENOMEM; - pf->mbox_pfvf_wq = alloc_ordered_workqueue("otx2_pfvf_mailbox", - WQ_HIGHPRI | WQ_MEM_RECLAIM); + pf->mbox_pfvf_wq = alloc_workqueue("otx2_pfvf_mailbox", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, 0); if (!pf->mbox_pfvf_wq) return -ENOMEM; @@ -3086,7 +3087,7 @@ static void otx2_vf_link_event_task(struct work_struct *work) otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx); - otx2_mbox_msg_send_up(&pf->mbox_pfvf[0].mbox_up, vf_idx); + otx2_sync_mbox_up_msg(&pf->mbox_pfvf[0], vf_idx); mutex_unlock(&pf->mbox.lock); } -- GitLab From 53e6709a2ff1a140549a3662a0c5ebf5c9c8d724 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 18 Mar 2024 14:59:57 +0530 Subject: [PATCH 1169/2290] octeontx2-pf: Send UP messages to VF only when VF is up. [ Upstream commit dfcf6355f53b1796cf7fd50a4f27b18ee6a3497a ] When PF sending link status messages to VF, it is possible that by the time link_event_task work function is executed VF might have brought down. Hence before sending VF link status message check whether VF is up to receive it. Fixes: ad513ed938c9 ("octeontx2-vf: Link event notification support") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a6c5f6a2dab07..7e2c30927c312 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -3062,6 +3062,9 @@ static void otx2_vf_link_event_task(struct work_struct *work) vf_idx = config - config->pf->vf_configs; pf = config->pf; + if (config->intf_down) + return; + mutex_lock(&pf->mbox.lock); dwork = &config->link_event_work; -- GitLab From 29d2550d79a8cbd31e0fbaa5c0e2a2efdc444e44 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 18 Mar 2024 14:59:58 +0530 Subject: [PATCH 1170/2290] octeontx2-af: Use separate handlers for interrupts [ Upstream commit 50e60de381c342008c0956fd762e1c26408f372c ] For PF to AF interrupt vector and VF to AF vector same interrupt handler is registered which is causing race condition. When two interrupts are raised to two CPUs at same time then two cores serve same event corrupting the data. Fixes: 7304ac4567bc ("octeontx2-af: Add mailbox IRQ and msg handlers") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index a7965b457bee9..a7034b47ed6c9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2528,10 +2528,9 @@ static void rvu_queue_work(struct mbox_wq_info *mw, int first, } } -static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) +static irqreturn_t rvu_mbox_pf_intr_handler(int irq, void *rvu_irq) { struct rvu *rvu = (struct rvu *)rvu_irq; - int vfs = rvu->vfs; u64 intr; intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT); @@ -2545,6 +2544,18 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) rvu_queue_work(&rvu->afpf_wq_info, 0, rvu->hw->total_pfs, intr); + return IRQ_HANDLED; +} + +static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) +{ + struct rvu *rvu = (struct rvu *)rvu_irq; + int vfs = rvu->vfs; + u64 intr; + + /* Sync with mbox memory region */ + rmb(); + /* Handle VF interrupts */ if (vfs > 64) { intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(1)); @@ -2881,7 +2892,7 @@ static int rvu_register_interrupts(struct rvu *rvu) /* Register mailbox interrupt handler */ sprintf(&rvu->irq_name[RVU_AF_INT_VEC_MBOX * NAME_SIZE], "RVUAF Mbox"); ret = request_irq(pci_irq_vector(rvu->pdev, RVU_AF_INT_VEC_MBOX), - rvu_mbox_intr_handler, 0, + rvu_mbox_pf_intr_handler, 0, &rvu->irq_name[RVU_AF_INT_VEC_MBOX * NAME_SIZE], rvu); if (ret) { dev_err(rvu->dev, -- GitLab From 5ad233dc731ab64cdc47b84a5c1f78fff6c024af Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Mar 2024 10:02:41 +0100 Subject: [PATCH 1171/2290] netfilter: nft_set_pipapo: release elements in clone only from destroy path [ Upstream commit b0e256f3dd2ba6532f37c5c22e07cb07a36031ee ] Clone already always provides a current view of the lookup table, use it to destroy the set, otherwise it is possible to destroy elements twice. This fix requires: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") which came after: 9827a0e6e23b ("netfilter: nft_set_pipapo: release elements in clone from abort path"). Fixes: 9827a0e6e23b ("netfilter: nft_set_pipapo: release elements in clone from abort path") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index e1969209b3abb..58eca26162735 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2240,8 +2240,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(ctx, set, m); - for_each_possible_cpu(cpu) pipapo_free_scratch(m, cpu); free_percpu(m->scratch); @@ -2253,8 +2251,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->clone) { m = priv->clone; - if (priv->dirty) - nft_set_pipapo_match_destroy(ctx, set, m); + nft_set_pipapo_match_destroy(ctx, set, m); for_each_possible_cpu(cpu) pipapo_free_scratch(priv->clone, cpu); -- GitLab From 9683cb6c2c6c0f45537bf0b8868b5d38fcb63fc7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Mar 2024 18:51:38 +0100 Subject: [PATCH 1172/2290] netfilter: nf_tables: do not compare internal table flags on updates [ Upstream commit 4a0e7f2decbf9bd72461226f1f5f7dcc4b08f139 ] Restore skipping transaction if table update does not modify flags. Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d3ba947f43761..0a86c019a75de 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1205,7 +1205,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags & ~NFT_TABLE_F_MASK) return -EOPNOTSUPP; - if (flags == ctx->table->flags) + if (flags == (ctx->table->flags & NFT_TABLE_F_MASK)) return 0; if ((nft_table_has_owner(ctx->table) && -- GitLab From f21ddce5b8c4caab6375ff7d612ff15e19f6d270 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 19 Mar 2024 13:44:34 -0700 Subject: [PATCH 1173/2290] rcu: add a helper to report consolidated flavor QS [ Upstream commit 1a77557d48cff187a169c2aec01c0dd78a5e7e50 ] When under heavy load, network processing can run CPU-bound for many tens of seconds. Even in preemptible kernels (non-RT kernel), this can block RCU Tasks grace periods, which can cause trace-event removal to take more than a minute, which is unacceptably long. This commit therefore creates a new helper function that passes through both RCU and RCU-Tasks quiescent states every 100 milliseconds. This hard-coded value suffices for current workloads. Suggested-by: Paul E. McKenney Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Yan Zhai Reviewed-by: Paul E. McKenney Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/90431d46ee112d2b0af04dbfe936faaca11810a5.1710877680.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski Stable-dep-of: d6dbbb11247c ("net: report RCU QS on threaded NAPI repolling") Signed-off-by: Sasha Levin --- include/linux/rcupdate.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d2507168b9c7b..319698087d66a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -268,6 +268,37 @@ do { \ cond_resched(); \ } while (0) +/** + * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states + * @old_ts: jiffies at start of processing. + * + * This helper is for long-running softirq handlers, such as NAPI threads in + * networking. The caller should initialize the variable passed in as @old_ts + * at the beginning of the softirq handler. When invoked frequently, this macro + * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will + * provide both RCU and RCU-Tasks quiescent states. Note that this macro + * modifies its old_ts argument. + * + * Because regions of code that have disabled softirq act as RCU read-side + * critical sections, this macro should be invoked with softirq (and + * preemption) enabled. + * + * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would + * have more chance to invoke schedule() calls and provide necessary quiescent + * states. As a contrast, calling cond_resched() only won't achieve the same + * effect because cond_resched() does not provide RCU-Tasks quiescent states. + */ +#define rcu_softirq_qs_periodic(old_ts) \ +do { \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \ + time_after(jiffies, (old_ts) + HZ / 10)) { \ + preempt_disable(); \ + rcu_softirq_qs(); \ + preempt_enable(); \ + (old_ts) = jiffies; \ + } \ +} while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. -- GitLab From 3890e7008c553fb582f14e01b64e84bc37959093 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 19 Mar 2024 13:44:37 -0700 Subject: [PATCH 1174/2290] net: report RCU QS on threaded NAPI repolling [ Upstream commit d6dbbb11247c71203785a2c9da474c36f4b19eae ] NAPI threads can keep polling packets under load. Currently it is only calling cond_resched() before repolling, but it is not sufficient to clear out the holdout of RCU tasks, which prevent BPF tracing programs from detaching for long period. This can be reproduced easily with following set up: ip netns add test1 ip netns add test2 ip -n test1 link add veth1 type veth peer name veth2 netns test2 ip -n test1 link set veth1 up ip -n test1 link set lo up ip -n test2 link set veth2 up ip -n test2 link set lo up ip -n test1 addr add 192.168.1.2/31 dev veth1 ip -n test1 addr add 1.1.1.1/32 dev lo ip -n test2 addr add 192.168.1.3/31 dev veth2 ip -n test2 addr add 2.2.2.2/31 dev lo ip -n test1 route add default via 192.168.1.3 ip -n test2 route add default via 192.168.1.2 for i in `seq 10 210`; do for j in `seq 10 210`; do ip netns exec test2 iptables -I INPUT -s 3.3.$i.$j -p udp --dport 5201 done done ip netns exec test2 ethtool -K veth2 gro on ip netns exec test2 bash -c 'echo 1 > /sys/class/net/veth2/threaded' ip netns exec test1 ethtool -K veth1 tso off Then run an iperf3 client/server and a bpftrace script can trigger it: ip netns exec test2 iperf3 -s -B 2.2.2.2 >/dev/null& ip netns exec test1 iperf3 -c 2.2.2.2 -B 1.1.1.1 -u -l 1500 -b 3g -t 100 >/dev/null& bpftrace -e 'kfunc:__napi_poll{@=count();} interval:s:1{exit();}' Report RCU quiescent states periodically will resolve the issue. Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Yan Zhai Acked-by: Paul E. McKenney Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/4c3b0d3f32d3b18949d75b18e5e1d9f13a24f025.1710877680.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 9a48a7e26cf46..65284eeec7de5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6645,6 +6645,8 @@ static int napi_threaded_poll(void *data) void *have; while (!napi_thread_wait(napi)) { + unsigned long last_qs = jiffies; + for (;;) { bool repoll = false; @@ -6659,6 +6661,7 @@ static int napi_threaded_poll(void *data) if (!repoll) break; + rcu_softirq_qs_periodic(last_qs); cond_resched(); } } -- GitLab From 5ff8f56c392bf5e728f5630820b6a42299a1fe23 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 19 Mar 2024 13:44:40 -0700 Subject: [PATCH 1175/2290] bpf: report RCU QS in cpumap kthread [ Upstream commit 00bf63122459e87193ee7f1bc6161c83a525569f ] When there are heavy load, cpumap kernel threads can be busy polling packets from redirect queues and block out RCU tasks from reaching quiescent states. It is insufficient to just call cond_resched() in such context. Periodically raise a consolidated RCU QS before cond_resched fixes the problem. Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP") Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Yan Zhai Acked-by: Paul E. McKenney Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/c17b9f1517e19d813da3ede5ed33ee18496bb5d8.1710877680.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- kernel/bpf/cpumap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 0508937048137..806a7c1b364b6 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -306,6 +306,7 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, static int cpu_map_kthread_run(void *data) { struct bpf_cpu_map_entry *rcpu = data; + unsigned long last_qs = jiffies; complete(&rcpu->kthread_running); set_current_state(TASK_INTERRUPTIBLE); @@ -331,10 +332,12 @@ static int cpu_map_kthread_run(void *data) if (__ptr_ring_empty(rcpu->queue)) { schedule(); sched = 1; + last_qs = jiffies; } else { __set_current_state(TASK_RUNNING); } } else { + rcu_softirq_qs_periodic(last_qs); sched = cond_resched(); } -- GitLab From f1fa919ea59655f73cb3972264e157b8831ba546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 12:33:41 +0300 Subject: [PATCH 1176/2290] net: dsa: mt7530: fix link-local frames that ingress vlan filtering ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e8bf353577f382c7066c661fed41b2adc0fc7c40 ] Whether VLAN-aware or not, on every VID VLAN table entry that has the CPU port as a member of it, frames are set to egress the CPU port with the VLAN tag stacked. This is so that VLAN tags can be appended after hardware special tag (called DSA tag in the context of Linux drivers). For user ports on a VLAN-unaware bridge, frame ingressing the user port egresses CPU port with only the special tag. For user ports on a VLAN-aware bridge, frame ingressing the user port egresses CPU port with the special tag and the VLAN tag. This causes issues with link-local frames, specifically BPDUs, because the software expects to receive them VLAN-untagged. There are two options to make link-local frames egress untagged. Setting CONSISTENT or UNTAGGED on the EG_TAG bits on the relevant register. CONSISTENT means frames egress exactly as they ingress. That means egressing with the VLAN tag they had at ingress or egressing untagged if they ingressed untagged. Although link-local frames are not supposed to be transmitted VLAN-tagged, if they are done so, when egressing through a CPU port, the special tag field will be broken. BPDU egresses CPU port with VLAN tag egressing stacked, received on software: 00:01:25.104821 AF Unknown (382365846), length 106: | STAG | | VLAN | 0x0000: 0000 6c27 614d 4143 0001 0000 8100 0001 ..l'aMAC........ 0x0010: 0026 4242 0300 0000 0000 0000 6c27 614d .&BB........l'aM 0x0020: 4143 0000 0000 0000 6c27 614d 4143 0000 AC......l'aMAC.. 0x0030: 0000 1400 0200 0f00 0000 0000 0000 0000 ................ BPDU egresses CPU port with VLAN tag egressing untagged, received on software: 00:23:56.628708 AF Unknown (25215488), length 64: | STAG | 0x0000: 0000 6c27 614d 4143 0001 0000 0026 4242 ..l'aMAC.....&BB 0x0010: 0300 0000 0000 0000 6c27 614d 4143 0000 ........l'aMAC.. 0x0020: 0000 0000 6c27 614d 4143 0000 0000 1400 ....l'aMAC...... 0x0030: 0200 0f00 0000 0000 0000 0000 ............ BPDU egresses CPU port with VLAN tag egressing tagged, received on software: 00:01:34.311963 AF Unknown (25215488), length 64: | Mess | 0x0000: 0000 6c27 614d 4143 0001 0001 0026 4242 ..l'aMAC.....&BB 0x0010: 0300 0000 0000 0000 6c27 614d 4143 0000 ........l'aMAC.. 0x0020: 0000 0000 6c27 614d 4143 0000 0000 1400 ....l'aMAC...... 0x0030: 0200 0f00 0000 0000 0000 0000 ............ To prevent confusing the software, force the frame to egress UNTAGGED instead of CONSISTENT. This way, frames can't possibly be received TAGGED by software which would have the special tag field broken. VLAN Tag Egress Procedure For all frames, one of these options set the earliest in this order will apply to the frame: - EG_TAG in certain registers for certain frames. This will apply to frame with matching MAC DA or EtherType. - EG_TAG in the address table. This will apply to frame at its incoming port. - EG_TAG in the PVC register. This will apply to frame at its incoming port. - EG_CON and [EG_TAG per port] in the VLAN table. This will apply to frame at its outgoing port. - EG_TAG in the PCR register. This will apply to frame at its outgoing port. EG_TAG in certain registers for certain frames: PPPoE Discovery_ARP/RARP: PPP_EG_TAG and ARP_EG_TAG in the APC register. IGMP_MLD: IGMP_EG_TAG and MLD_EG_TAG in the IMC register. BPDU and PAE: BPDU_EG_TAG and PAE_EG_TAG in the BPC register. REV_01 and REV_02: R01_EG_TAG and R02_EG_TAG in the RGAC1 register. REV_03 and REV_0E: R03_EG_TAG and R0E_EG_TAG in the RGAC2 register. REV_10 and REV_20: R10_EG_TAG and R20_EG_TAG in the RGAC3 register. REV_21 and REV_UN: R21_EG_TAG and RUN_EG_TAG in the RGAC4 register. With this change, it can be observed that a bridge interface with stp_state and vlan_filtering enabled will properly block ports now. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 23 +++++++++++++++-------- drivers/net/dsa/mt7530.h | 9 ++++++++- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 80b346d4d990f..86c410f9fef8c 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1001,16 +1001,23 @@ unlock_exit: static void mt753x_trap_frames(struct mt7530_priv *priv) { - /* Trap BPDUs to the CPU port(s) */ - mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, + /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them + * VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK | + MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | + MT753X_BPDU_PORT_FW_MASK, + MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | MT753X_BPDU_CPU_ONLY); - /* Trap 802.1X PAE frames to the CPU port(s) */ - mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_PORT_FW_MASK, - MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY)); - - /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */ - mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK, + /* Trap LLDP frames with :0E MAC DA to the CPU port(s) and egress them + * VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | + MT753X_R0E_PORT_FW_MASK, + MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); } diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 6202b0f8c3f34..a5b864fd7d60c 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -63,12 +63,18 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 -#define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) +#define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x) #define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) #define MT753X_PAE_PORT_FW(x) FIELD_PREP(MT753X_PAE_PORT_FW_MASK, x) +#define MT753X_BPDU_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_BPDU_EG_TAG(x) FIELD_PREP(MT753X_BPDU_EG_TAG_MASK, x) +#define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c +#define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) @@ -251,6 +257,7 @@ enum mt7530_port_mode { enum mt7530_vlan_port_eg_tag { MT7530_VLAN_EG_DISABLED = 0, MT7530_VLAN_EG_CONSISTENT = 1, + MT7530_VLAN_EG_UNTAGGED = 4, }; enum mt7530_vlan_port_attr { -- GitLab From 86c0c154a759f2af9612a04bdf29110f02dce956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 12:33:42 +0300 Subject: [PATCH 1177/2290] net: dsa: mt7530: fix handling of all link-local frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 69ddba9d170bdaee1dc0eb4ced38d7e4bb7b92af ] Currently, the MT753X switches treat frames with :01-0D and :0F MAC DAs as regular multicast frames, therefore flooding them to user ports. On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA must only be propagated to C-VLAN and MAC Bridge components. That means VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, these frames are supposed to be processed by the CPU (software). So we make the switch only forward them to the CPU port. And if received from a CPU port, forward to a single port. The software is responsible of making the switch conform to the latter by setting a single port as destination port on the special tag. This switch intellectual property cannot conform to this part of the standard fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC DAs, it also includes :22-FF which the scope of propagation is not supposed to be restricted for these MAC DAs. Set frames with :01-03 MAC DAs to be trapped to the CPU port(s). Add a comment for the remaining MAC DAs. Note that the ingress port must have a PVID assigned to it for the switch to forward untagged frames. A PVID is set by default on VLAN-aware and VLAN-unaware ports. However, when the network interface that pertains to the ingress port is attached to a vlan_filtering enabled bridge, the user can remove the PVID assignment from it which would prevent the link-local frames from being trapped to the CPU port. I am yet to see a way to forward link-local frames while preventing other untagged frames from being forwarded too. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 37 +++++++++++++++++++++++++++++++++---- drivers/net/dsa/mt7530.h | 13 +++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 86c410f9fef8c..07065c1af55e4 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -998,6 +998,21 @@ unlock_exit: mutex_unlock(&priv->reg_mutex); } +/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std + * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA + * must only be propagated to C-VLAN and MAC Bridge components. That means + * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, + * these frames are supposed to be processed by the CPU (software). So we make + * the switch only forward them to the CPU port. And if received from a CPU + * port, forward to a single port. The software is responsible of making the + * switch conform to the latter by setting a single port as destination port on + * the special tag. + * + * This switch intellectual property cannot conform to this part of the standard + * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC + * DAs, it also includes :22-FF which the scope of propagation is not supposed + * to be restricted for these MAC DAs. + */ static void mt753x_trap_frames(struct mt7530_priv *priv) { @@ -1012,13 +1027,27 @@ mt753x_trap_frames(struct mt7530_priv *priv) MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | MT753X_BPDU_CPU_ONLY); - /* Trap LLDP frames with :0E MAC DA to the CPU port(s) and egress them - * VLAN-untagged. + /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress + * them VLAN-untagged. + */ + mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK | + MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK | + MT753X_R01_PORT_FW_MASK, + MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); + + /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress + * them VLAN-untagged. */ mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | - MT753X_R0E_PORT_FW_MASK, + MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK | + MT753X_R03_PORT_FW_MASK, MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); } static int diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index a5b864fd7d60c..fa2afa67ceb07 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -71,12 +71,25 @@ enum mt753x_id { #define MT753X_BPDU_EG_TAG(x) FIELD_PREP(MT753X_BPDU_EG_TAG_MASK, x) #define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) +/* Register for :01 and :02 MAC DA frame control */ +#define MT753X_RGAC1 0x28 +#define MT753X_R02_EG_TAG_MASK GENMASK(24, 22) +#define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x) +#define MT753X_R02_PORT_FW_MASK GENMASK(18, 16) +#define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x) +#define MT753X_R01_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x) +#define MT753X_R01_PORT_FW_MASK GENMASK(2, 0) + /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c #define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) +#define MT753X_R03_EG_TAG_MASK GENMASK(8, 6) +#define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x) +#define MT753X_R03_PORT_FW_MASK GENMASK(2, 0) enum mt753x_bpdu_port_fw { MT753X_BPDU_FOLLOW_MFC, -- GitLab From 766ec94cc57492eab97cbbf1595bd516ab0cb0e4 Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Thu, 21 Mar 2024 15:08:57 +0800 Subject: [PATCH 1178/2290] spi: spi-mt65xx: Fix NULL pointer access in interrupt handler [ Upstream commit a20ad45008a7c82f1184dc6dee280096009ece55 ] The TX buffer in spi_transfer can be a NULL pointer, so the interrupt handler may end up writing to the invalid memory and cause crashes. Add a check to trans->tx_buf before using it. Fixes: 1ce24864bff4 ("spi: mediatek: Only do dma for 4-byte aligned buffers") Signed-off-by: Fei Shao Reviewed-by: AngeloGioacchino Del Regno Link: https://msgid.link/r/20240321070942.1587146-2-fshao@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-mt65xx.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 6e95efb50acbc..f9ec8742917a6 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -787,17 +787,19 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, len); mtk_spi_setup_packet(master); - cnt = mdata->xfer_len / 4; - iowrite32_rep(mdata->base + SPI_TX_DATA_REG, - trans->tx_buf + mdata->num_xfered, cnt); + if (trans->tx_buf) { + cnt = mdata->xfer_len / 4; + iowrite32_rep(mdata->base + SPI_TX_DATA_REG, + trans->tx_buf + mdata->num_xfered, cnt); - remainder = mdata->xfer_len % 4; - if (remainder > 0) { - reg_val = 0; - memcpy(®_val, - trans->tx_buf + (cnt * 4) + mdata->num_xfered, - remainder); - writel(reg_val, mdata->base + SPI_TX_DATA_REG); + remainder = mdata->xfer_len % 4; + if (remainder > 0) { + reg_val = 0; + memcpy(®_val, + trans->tx_buf + (cnt * 4) + mdata->num_xfered, + remainder); + writel(reg_val, mdata->base + SPI_TX_DATA_REG); + } } mtk_spi_enable_transfer(master); -- GitLab From 1bf455b2c67cf1af8830e8e855fe8b822a3a0554 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 20 Mar 2024 08:57:17 +0200 Subject: [PATCH 1179/2290] selftests: forwarding: Fix ping failure due to short timeout [ Upstream commit e4137851d4863a9bdc6aabc613bcb46c06d91e64 ] The tests send 100 pings in 0.1 second intervals and force a timeout of 11 seconds, which is borderline (especially on debug kernels), resulting in random failures in netdev CI [1]. Fix by increasing the timeout to 20 seconds. It should not prolong the test unless something is wrong, in which case the test will rightfully fail. [1] # selftests: net/forwarding: vxlan_bridge_1d_port_8472_ipv6.sh # INFO: Running tests with UDP port 8472 # TEST: ping: local->local [ OK ] # TEST: ping: local->remote 1 [FAIL] # Ping failed [...] Fixes: b07e9957f220 ("selftests: forwarding: Add VxLAN tests with a VLAN-unaware bridge for IPv6") Fixes: 728b35259e28 ("selftests: forwarding: Add VxLAN tests with a VLAN-aware bridge for IPv6") Reported-by: Paolo Abeni Closes: https://lore.kernel.org/netdev/24a7051fdcd1f156c3704bca39e4b3c41dfc7c4b.camel@redhat.com/ Signed-off-by: Ido Schimmel Reviewed-by: Hangbin Liu Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240320065717.4145325-1-idosch@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh | 4 ++-- .../testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh index ac97f07e5ce82..bd3f7d492af2b 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -354,7 +354,7 @@ __ping_ipv4() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome ARP noise. - PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 @@ -410,7 +410,7 @@ __ping_ipv6() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome neighbor discovery noise. - PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 100 diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh index d880df89bc8bd..e83fde79f40d0 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh @@ -457,7 +457,7 @@ __ping_ipv4() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome ARP noise. - PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 @@ -522,7 +522,7 @@ __ping_ipv6() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome neighbor discovery noise. - PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 100 -- GitLab From d0980ed818d73788e4817a438aed8ddb56d98219 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 25 Jan 2023 23:31:55 +0100 Subject: [PATCH 1180/2290] dm: address indent/space issues [ Upstream commit 255e2646496fcbf836a3dfe1b535692f09f11b45 ] Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Stable-dep-of: b4d78cfeb304 ("dm-integrity: align the outgoing bio in integrity_recheck") Signed-off-by: Sasha Levin --- drivers/md/dm-cache-policy.h | 2 +- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-integrity.c | 5 ++--- drivers/md/dm-log.c | 8 ++++---- drivers/md/dm-raid.c | 8 ++++---- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-table.c | 4 ++-- drivers/md/dm-thin.c | 6 +++--- drivers/md/dm-writecache.c | 2 +- drivers/md/persistent-data/dm-btree.c | 6 +++--- drivers/md/persistent-data/dm-space-map-common.c | 2 +- drivers/md/persistent-data/dm-space-map-common.h | 2 +- 12 files changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index 6ba3e9c91af53..8bc21d54884e9 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -75,7 +75,7 @@ struct dm_cache_policy { * background work. */ int (*get_background_work)(struct dm_cache_policy *p, bool idle, - struct policy_work **result); + struct policy_work **result); /* * You must pass in the same work pointer that you were given, not diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index e8c534b5870ac..25e51dc6e5598 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2535,7 +2535,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string type = &key_type_encrypted; set_key = set_key_encrypted; } else if (IS_ENABLED(CONFIG_TRUSTED_KEYS) && - !strncmp(key_string, "trusted:", key_desc - key_string + 1)) { + !strncmp(key_string, "trusted:", key_desc - key_string + 1)) { type = &key_type_trusted; set_key = set_key_trusted; } else { diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e1bf91faa462b..94382e43ea506 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2367,7 +2367,6 @@ offload_to_thread: else skip_check: dec_in_flight(dio); - } else { INIT_WORK(&dio->work, integrity_metadata); queue_work(ic->metadata_wq, &dio->work); @@ -4151,7 +4150,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { if (val < 1 << SECTOR_SHIFT || val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || - (val & (val -1))) { + (val & (val - 1))) { r = -EINVAL; ti->error = "Invalid block_size argument"; goto bad; @@ -4477,7 +4476,7 @@ try_smaller_buffer: if (ic->internal_hash) { size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); - if (!ic->recalc_wq ) { + if (!ic->recalc_wq) { ti->error = "Cannot allocate workqueue"; r = -ENOMEM; goto bad; diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 05141eea18d3c..b7dd5a0cd58ba 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -756,8 +756,8 @@ static void core_set_region_sync(struct dm_dirty_log *log, region_t region, log_clear_bit(lc, lc->recovering_bits, region); if (in_sync) { log_set_bit(lc, lc->sync_bits, region); - lc->sync_count++; - } else if (log_test_bit(lc->sync_bits, region)) { + lc->sync_count++; + } else if (log_test_bit(lc->sync_bits, region)) { lc->sync_count--; log_clear_bit(lc, lc->sync_bits, region); } @@ -765,9 +765,9 @@ static void core_set_region_sync(struct dm_dirty_log *log, region_t region, static region_t core_get_sync_count(struct dm_dirty_log *log) { - struct log_c *lc = (struct log_c *) log->context; + struct log_c *lc = (struct log_c *) log->context; - return lc->sync_count; + return lc->sync_count; } #define DMEMIT_SYNC \ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 7fbce214e00f5..bf833ca880bc1 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -362,8 +362,8 @@ static struct { const int mode; const char *param; } _raid456_journal_mode[] = { - { R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" }, - { R5C_JOURNAL_MODE_WRITE_BACK , "writeback" } + { R5C_JOURNAL_MODE_WRITE_THROUGH, "writethrough" }, + { R5C_JOURNAL_MODE_WRITE_BACK, "writeback" } }; /* Return MD raid4/5/6 journal mode for dm @journal_mode one */ @@ -1114,7 +1114,7 @@ too_many: * [stripe_cache ] Stripe cache size for higher RAIDs * [region_size ] Defines granularity of bitmap * [journal_dev ] raid4/5/6 journaling deviice - * (i.e. write hole closing log) + * (i.e. write hole closing log) * * RAID10-only options: * [raid10_copies <# copies>] Number of copies. (Default: 2) @@ -3999,7 +3999,7 @@ static int raid_preresume(struct dm_target *ti) } /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */ - if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && + if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) || (rs->requested_bitmap_chunk_sectors && mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index c38e63706d911..2327645fc0648 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -902,7 +902,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; kfree(ms); - return NULL; + return NULL; } ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e0367a672eabf..aabb2435070b8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -72,7 +72,7 @@ static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) n = get_child(n, CHILDREN_PER_NODE - 1); if (n >= t->counts[l]) - return (sector_t) - 1; + return (sector_t) -1; return get_node(t, l, n)[KEYS_PER_NODE - 1]; } @@ -1533,7 +1533,7 @@ static bool dm_table_any_dev_attr(struct dm_table *t, if (ti->type->iterate_devices && ti->type->iterate_devices(ti, func, data)) return true; - } + } return false; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 601f9e4e6234f..f24d89af7c5f0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1179,9 +1179,9 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO); discard_parent->bi_end_io = passdown_endio; discard_parent->bi_private = m; - if (m->maybe_shared) - passdown_double_checking_shared_status(m, discard_parent); - else { + if (m->maybe_shared) + passdown_double_checking_shared_status(m, discard_parent); + else { struct discard_op op; begin_discard(&op, tc, discard_parent); diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index c6ff43a8f0b25..a705e24d3e2b6 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -531,7 +531,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) req.notify.context = &endio; /* writing via async dm-io (implied by notify.fn above) won't return an error */ - (void) dm_io(&req, 1, ®ion, NULL); + (void) dm_io(&req, 1, ®ion, NULL); i = j; } diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 1cc783d7030d8..18d949d63543b 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -726,7 +726,7 @@ static int shadow_child(struct dm_btree_info *info, struct dm_btree_value_type * * nodes, so saves metadata space. */ static int split_two_into_three(struct shadow_spine *s, unsigned int parent_index, - struct dm_btree_value_type *vt, uint64_t key) + struct dm_btree_value_type *vt, uint64_t key) { int r; unsigned int middle_index; @@ -781,7 +781,7 @@ static int split_two_into_three(struct shadow_spine *s, unsigned int parent_inde if (shadow_current(s) != right) unlock_block(s->info, right); - return r; + return r; } @@ -1216,7 +1216,7 @@ int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root, static bool need_insert(struct btree_node *node, uint64_t *keys, unsigned int level, unsigned int index) { - return ((index >= le32_to_cpu(node->header.nr_entries)) || + return ((index >= le32_to_cpu(node->header.nr_entries)) || (le64_to_cpu(node->keys[index]) != keys[level])); } diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index af800efed9f3c..4833a3998c1d9 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -390,7 +390,7 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, } int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, - dm_block_t begin, dm_block_t end, dm_block_t *b) + dm_block_t begin, dm_block_t end, dm_block_t *b) { int r; uint32_t count; diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 706ceb85d6800..63d9a72e3265c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -120,7 +120,7 @@ int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result); int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, - dm_block_t begin, dm_block_t end, dm_block_t *result); + dm_block_t begin, dm_block_t end, dm_block_t *result); /* * The next three functions return (via nr_allocations) the net number of -- GitLab From 92b3c2437df8fe55a5c7816d9521b1fb7d0718b0 Mon Sep 17 00:00:00 2001 From: Hongyu Jin Date: Wed, 24 Jan 2024 13:35:53 +0800 Subject: [PATCH 1181/2290] dm io: Support IO priority [ Upstream commit 6e5f0f6383b4896c7e9b943d84b136149d0f45e9 ] Some IO will dispatch from kworker with different io_context settings than the submitting task, we may need to specify a priority to avoid losing priority. Add IO priority parameter to dm_io() and update all callers. Co-developed-by: Yibin Ding Signed-off-by: Yibin Ding Signed-off-by: Hongyu Jin Reviewed-by: Eric Biggers Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer Stable-dep-of: b4d78cfeb304 ("dm-integrity: align the outgoing bio in integrity_recheck") Signed-off-by: Sasha Levin --- drivers/md/dm-bufio.c | 6 +++--- drivers/md/dm-integrity.c | 12 ++++++------ drivers/md/dm-io.c | 23 +++++++++++++---------- drivers/md/dm-kcopyd.c | 4 ++-- drivers/md/dm-log.c | 4 ++-- drivers/md/dm-raid1.c | 6 +++--- drivers/md/dm-snap-persistent.c | 4 ++-- drivers/md/dm-verity-target.c | 2 +- drivers/md/dm-writecache.c | 8 ++++---- include/linux/dm-io.h | 3 ++- 10 files changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 100a6a236d92a..ec662f97ba828 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -614,7 +614,7 @@ static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, io_req.mem.ptr.vma = (char *)b->data + offset; } - r = dm_io(&io_req, 1, ®ion, NULL); + r = dm_io(&io_req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) b->end_io(b, errno_to_blk_status(r)); } @@ -1375,7 +1375,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) BUG_ON(dm_bufio_in_request()); - return dm_io(&io_req, 1, &io_reg, NULL); + return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); @@ -1398,7 +1398,7 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c BUG_ON(dm_bufio_in_request()); - return dm_io(&io_req, 1, &io_reg, NULL); + return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 94382e43ea506..aff818eb31fbb 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -579,7 +579,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) } } - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) return r; @@ -1089,7 +1089,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, io_loc.sector = ic->start + SB_SECTORS + sector; io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ? "reading journal" : "writing journal", r); @@ -1205,7 +1205,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, u io_loc.sector = target; io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { WARN_ONCE(1, "asynchronous dm_io failed: %d", r); fn(-1UL, data); @@ -1532,7 +1532,7 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat fr.io_reg.count = 0, fr.ic = ic; init_completion(&fr.comp); - r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); + r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT); BUG_ON(r); } @@ -1737,7 +1737,7 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks io_loc.sector = sector; io_loc.count = ic->sectors_per_block; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dio->bi_status = errno_to_blk_status(r); goto free_ret; @@ -2774,7 +2774,7 @@ next_chunk: io_loc.sector = get_data_sector(ic, area, offset); io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dm_integrity_io_error(ic, "reading data", r); goto err; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index e488b05e35fa3..ec97658387c39 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -295,7 +295,7 @@ static void km_dp_init(struct dpages *dp, void *data) *---------------------------------------------------------------*/ static void do_region(const blk_opf_t opf, unsigned int region, struct dm_io_region *where, struct dpages *dp, - struct io *io) + struct io *io, unsigned short ioprio) { struct bio *bio; struct page *page; @@ -344,6 +344,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, &io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); bio->bi_end_io = endio; + bio->bi_ioprio = ioprio; store_io_and_region_in_bio(bio, io, region); if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) { @@ -371,7 +372,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, static void dispatch_io(blk_opf_t opf, unsigned int num_regions, struct dm_io_region *where, struct dpages *dp, - struct io *io, int sync) + struct io *io, int sync, unsigned short ioprio) { int i; struct dpages old_pages = *dp; @@ -388,7 +389,7 @@ static void dispatch_io(blk_opf_t opf, unsigned int num_regions, for (i = 0; i < num_regions; i++) { *dp = old_pages; if (where[i].count || (opf & REQ_PREFLUSH)) - do_region(opf, i, where + i, dp, io); + do_region(opf, i, where + i, dp, io, ioprio); } /* @@ -413,7 +414,7 @@ static void sync_io_complete(unsigned long error, void *context) static int sync_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, blk_opf_t opf, struct dpages *dp, - unsigned long *error_bits) + unsigned long *error_bits, unsigned short ioprio) { struct io *io; struct sync_io sio; @@ -435,7 +436,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(opf, num_regions, where, dp, io, 1); + dispatch_io(opf, num_regions, where, dp, io, 1, ioprio); wait_for_completion_io(&sio.wait); @@ -447,7 +448,8 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, static int async_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, blk_opf_t opf, - struct dpages *dp, io_notify_fn fn, void *context) + struct dpages *dp, io_notify_fn fn, void *context, + unsigned short ioprio) { struct io *io; @@ -467,7 +469,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(opf, num_regions, where, dp, io, 0); + dispatch_io(opf, num_regions, where, dp, io, 0, ioprio); return 0; } @@ -509,7 +511,8 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, } int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *where, unsigned long *sync_error_bits) + struct dm_io_region *where, unsigned long *sync_error_bits, + unsigned short ioprio) { int r; struct dpages dp; @@ -520,11 +523,11 @@ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, if (!io_req->notify.fn) return sync_io(io_req->client, num_regions, where, - io_req->bi_opf, &dp, sync_error_bits); + io_req->bi_opf, &dp, sync_error_bits, ioprio); return async_io(io_req->client, num_regions, where, io_req->bi_opf, &dp, io_req->notify.fn, - io_req->notify.context); + io_req->notify.context, ioprio); } EXPORT_SYMBOL(dm_io); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 0ef78e56aa88c..fda51bd140ed3 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -572,9 +572,9 @@ static int run_io_job(struct kcopyd_job *job) io_job_start(job->kc->throttle); if (job->op == REQ_OP_READ) - r = dm_io(&io_req, 1, &job->source, NULL); + r = dm_io(&io_req, 1, &job->source, NULL, IOPRIO_DEFAULT); else - r = dm_io(&io_req, job->num_dests, job->dests, NULL); + r = dm_io(&io_req, job->num_dests, job->dests, NULL, IOPRIO_DEFAULT); return r; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index b7dd5a0cd58ba..da77878cb2c02 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -295,7 +295,7 @@ static int rw_header(struct log_c *lc, enum req_op op) { lc->io_req.bi_opf = op; - return dm_io(&lc->io_req, 1, &lc->header_location, NULL); + return dm_io(&lc->io_req, 1, &lc->header_location, NULL, IOPRIO_DEFAULT); } static int flush_header(struct log_c *lc) @@ -308,7 +308,7 @@ static int flush_header(struct log_c *lc) lc->io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - return dm_io(&lc->io_req, 1, &null_location, NULL); + return dm_io(&lc->io_req, 1, &null_location, NULL, IOPRIO_DEFAULT); } static int read_header(struct log_c *log) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 2327645fc0648..1004199ae77ac 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -273,7 +273,7 @@ static int mirror_flush(struct dm_target *ti) } error_bits = -1; - dm_io(&io_req, ms->nr_mirrors, io, &error_bits); + dm_io(&io_req, ms->nr_mirrors, io, &error_bits, IOPRIO_DEFAULT); if (unlikely(error_bits != 0)) { for (i = 0; i < ms->nr_mirrors; i++) if (test_bit(i, &error_bits)) @@ -543,7 +543,7 @@ static void read_async_bio(struct mirror *m, struct bio *bio) map_region(&io, m, bio); bio_set_m(bio, m); - BUG_ON(dm_io(&io_req, 1, &io, NULL)); + BUG_ON(dm_io(&io_req, 1, &io, NULL, IOPRIO_DEFAULT)); } static inline int region_in_sync(struct mirror_set *ms, region_t region, @@ -670,7 +670,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) */ bio_set_m(bio, get_default_mirror(ms)); - BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL)); + BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL, IOPRIO_DEFAULT)); } static void do_writes(struct mirror_set *ms, struct bio_list *writes) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 80b95746a43e0..eee1cd3aa3fcf 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -220,7 +220,7 @@ static void do_metadata(struct work_struct *work) { struct mdata_req *req = container_of(work, struct mdata_req, work); - req->result = dm_io(req->io_req, 1, req->where, NULL); + req->result = dm_io(req->io_req, 1, req->where, NULL, IOPRIO_DEFAULT); } /* @@ -244,7 +244,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf, struct mdata_req req; if (!metadata) - return dm_io(&io_req, 1, &where, NULL); + return dm_io(&io_req, 1, &where, NULL, IOPRIO_DEFAULT); req.where = &where; req.io_req = &io_req; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index b48e1b59e6da4..6a707b41dc865 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -503,7 +503,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, io_loc.bdev = v->data_dev->bdev; io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); - r = dm_io(&io_req, 1, &io_loc, NULL); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) goto free_ret; diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index a705e24d3e2b6..20fc84b24fc75 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -531,7 +531,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) req.notify.context = &endio; /* writing via async dm-io (implied by notify.fn above) won't return an error */ - (void) dm_io(&req, 1, ®ion, NULL); + (void) dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); i = j; } @@ -568,7 +568,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) req.notify.fn = NULL; req.notify.context = NULL; - r = dm_io(&req, 1, ®ion, NULL); + r = dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) writecache_error(wc, r, "error writing superblock"); } @@ -596,7 +596,7 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) req.client = wc->dm_io; req.notify.fn = NULL; - r = dm_io(&req, 1, ®ion, NULL); + r = dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); if (unlikely(r)) writecache_error(wc, r, "error flushing metadata: %d", r); } @@ -984,7 +984,7 @@ static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors req.client = wc->dm_io; req.notify.fn = NULL; - return dm_io(&req, 1, ®ion, NULL); + return dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); } static void writecache_resume(struct dm_target *ti) diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 92e7abfe04f92..70b3737052dd2 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -79,7 +79,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * error occurred doing io to the corresponding region. */ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *region, unsigned int long *sync_error_bits); + struct dm_io_region *region, unsigned int long *sync_error_bits, + unsigned short ioprio); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ -- GitLab From aa587257e146a6d7450225c02d28d6c1320d173a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 21 Mar 2024 17:48:45 +0100 Subject: [PATCH 1182/2290] dm-integrity: align the outgoing bio in integrity_recheck [ Upstream commit b4d78cfeb30476239cf08f4f40afc095c173d6e3 ] It is possible to set up dm-integrity with smaller sector size than the logical sector size of the underlying device. In this situation, dm-integrity guarantees that the outgoing bios have the same alignment as incoming bios (so, if you create a filesystem with 4k block size, dm-integrity would send 4k-aligned bios to the underlying device). This guarantee was broken when integrity_recheck was implemented. integrity_recheck sends bio that is aligned to ic->sectors_per_block. So if we set up integrity with 512-byte sector size on a device with logical block size 4k, we would be sending unaligned bio. This triggered a bug in one of our internal tests. This commit fixes it by determining the actual alignment of the incoming bio and then makes sure that the outgoing bio in integrity_recheck has the same alignment. Fixes: c88f5e553fe3 ("dm-integrity: recheck the integrity tag after a failure") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index aff818eb31fbb..9c9e2b50c63c3 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1709,7 +1709,6 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks struct bio_vec bv; sector_t sector, logical_sector, area, offset; struct page *page; - void *buffer; get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, @@ -1718,13 +1717,14 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks logical_sector = dio->range.logical_sector; page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); - buffer = page_to_virt(page); __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { unsigned pos = 0; do { + sector_t alignment; char *mem; + char *buffer = page_to_virt(page); int r; struct dm_io_request io_req; struct dm_io_region io_loc; @@ -1737,6 +1737,14 @@ static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checks io_loc.sector = sector; io_loc.count = ic->sectors_per_block; + /* Align the bio to logical block size */ + alignment = dio->range.logical_sector | bio_sectors(bio) | (PAGE_SIZE >> SECTOR_SHIFT); + alignment &= -alignment; + io_loc.sector = round_down(io_loc.sector, alignment); + io_loc.count += sector - io_loc.sector; + buffer += (sector - io_loc.sector) << SECTOR_SHIFT; + io_loc.count = round_up(io_loc.count, alignment); + r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); if (unlikely(r)) { dio->bi_status = errno_to_blk_status(r); -- GitLab From f07ffd18d787b0227f95d94f678232a30c0e27c4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Mar 2024 16:26:16 +0100 Subject: [PATCH 1183/2290] x86/efistub: Clear decompressor BSS in native EFI entrypoint [ Upstream commit b3810c5a2cc4a6665f7a65bed5393c75ce3f3aa2 ] The EFI stub on x86 no longer invokes the decompressor as a subsequent boot stage, but calls into the decompression code directly while running in the context of the EFI boot services. This means that when using the native EFI entrypoint (as opposed to the EFI handover protocol, which clears BSS explicitly), the firmware PE image loader is being relied upon to ensure that BSS is zeroed before the EFI stub is entered from the firmware. As Radek's report proves, this is a bad idea. Not all loaders do this correctly, which means some global variables that should be statically initialized to 0x0 may have junk in them. So clear BSS explicitly when entering via efi_pe_entry(). Note that zeroing BSS from C code is not generally safe, but in this case, the following assignment and dereference of a global pointer variable ensures that the memset() cannot be deferred or reordered. Cc: # v6.1+ Reported-by: Radek Podgorny Closes: https://lore.kernel.org/all/a99a831a-8ad5-4cb0-bff9-be637311f771@podgorny.cz Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/x86-stub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 784e1b2ae5ccd..aa07051459f52 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -21,6 +21,8 @@ #include "efistub.h" #include "x86-stub.h" +extern char _bss[], _ebss[]; + const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; static efi_loaded_image_t *image = NULL; @@ -432,6 +434,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_status_t status; char *cmdline_ptr; + memset(_bss, 0, _ebss - _bss); + efi_system_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ @@ -950,8 +954,6 @@ fail: void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, struct boot_params *boot_params) { - extern char _bss[], _ebss[]; - memset(_bss, 0, _ebss - _bss); efi_stub_entry(handle, sys_table_arg, boot_params); } -- GitLab From 4b0c54792394ab3d8b1df29a069844fc277b8f14 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 22 Mar 2024 17:01:45 +0100 Subject: [PATCH 1184/2290] x86/efistub: Don't clear BSS twice in mixed mode [ Upstream commit df7ecce842b846a04d087ba85fdb79a90e26a1b0 ] Clearing BSS should only be done once, at the very beginning. efi_pe_entry() is the entrypoint from the firmware, which may not clear BSS and so it is done explicitly. However, efi_pe_entry() is also used as an entrypoint by the mixed mode startup code, in which case BSS will already have been cleared, and doing it again at this point will corrupt global variables holding the firmware's GDT/IDT and segment selectors. So make the memset() conditional on whether the EFI stub is running in native mode. Fixes: b3810c5a2cc4a666 ("x86/efistub: Clear decompressor BSS in native EFI entrypoint") Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/x86-stub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index aa07051459f52..dc50dda40239e 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -434,7 +434,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_status_t status; char *cmdline_ptr; - memset(_bss, 0, _ebss - _bss); + if (efi_is_native()) + memset(_bss, 0, _ebss - _bss); efi_system_table = sys_table_arg; -- GitLab From 9b2e8276bfb6bdb26becbbcc509b3a7b93600384 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Jul 2023 21:56:49 +0200 Subject: [PATCH 1185/2290] remoteproc: stm32: fix incorrect optional pointers commit fb2bdd32b231b70e6a3f1054528692f604db25d8 upstream. Compile-testing without CONFIG_OF shows that the of_match_ptr() macro was used incorrectly here: drivers/remoteproc/stm32_rproc.c:662:34: warning: unused variable 'stm32_rproc_match' [-Wunused-const-variable] As in almost every driver, the solution is simply to remove the use of this macro. The same thing happened with the deprecated SIMPLE_DEV_PM_OPS(), but the corresponding warning was already shut up with __maybe_unused annotations, so fix those as well by using the correct DEFINE_SIMPLE_DEV_PM_OPS() macros and removing the extraneous __maybe_unused modifiers. For completeness, also add a pm_ptr() to let the PM ops be eliminated completely when CONFIG_PM is turned off. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202307242300.ia82qBTp-lkp@intel.com Fixes: 03bd158e1535 ("remoteproc: stm32: use correct format strings on 64-bit") Fixes: 410119ee29b6 ("remoteproc: stm32: wakeup the system by wdg irq") Fixes: 13140de09cc2 ("remoteproc: stm32: add an ST stm32_rproc driver") Signed-off-by: Arnd Bergmann Acked-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20230724195704.2432382-1-arnd@kernel.org Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/stm32_rproc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 385e931603ad3..74da0393172c5 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -889,7 +889,7 @@ static int stm32_rproc_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused stm32_rproc_suspend(struct device *dev) +static int stm32_rproc_suspend(struct device *dev) { struct rproc *rproc = dev_get_drvdata(dev); struct stm32_rproc *ddata = rproc->priv; @@ -900,7 +900,7 @@ static int __maybe_unused stm32_rproc_suspend(struct device *dev) return 0; } -static int __maybe_unused stm32_rproc_resume(struct device *dev) +static int stm32_rproc_resume(struct device *dev) { struct rproc *rproc = dev_get_drvdata(dev); struct stm32_rproc *ddata = rproc->priv; @@ -911,16 +911,16 @@ static int __maybe_unused stm32_rproc_resume(struct device *dev) return 0; } -static SIMPLE_DEV_PM_OPS(stm32_rproc_pm_ops, - stm32_rproc_suspend, stm32_rproc_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(stm32_rproc_pm_ops, + stm32_rproc_suspend, stm32_rproc_resume); static struct platform_driver stm32_rproc_driver = { .probe = stm32_rproc_probe, .remove = stm32_rproc_remove, .driver = { .name = "stm32-rproc", - .pm = &stm32_rproc_pm_ops, - .of_match_table = of_match_ptr(stm32_rproc_match), + .pm = pm_ptr(&stm32_rproc_pm_ops), + .of_match_table = stm32_rproc_match, }, }; module_platform_driver(stm32_rproc_driver); -- GitLab From e5cd595e23c1a075359a337c0e5c3a4f2dc28dd1 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 24 Mar 2024 14:37:56 -0400 Subject: [PATCH 1186/2290] Linux 6.1.83 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: Pavel Machek (CIP) Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Shuah Khan Tested-by: kernelci.org bot Tested-by: Mateusz Jończyk Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5345f3ebed0d..38657b3dda2cd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 82 +SUBLEVEL = 83 EXTRAVERSION = NAME = Curry Ramen -- GitLab From f1ee75aa66c79976d73179f22b311a17611888db Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 24 Jan 2023 10:33:18 -0600 Subject: [PATCH 1187/2290] x86/cpu: Support AMD Automatic IBRS commit e7862eda309ecfccc36bb5558d937ed3ace07f3f upstream. The AMD Zen4 core supports a new feature called Automatic IBRS. It is a "set-and-forget" feature that means that, like Intel's Enhanced IBRS, h/w manages its IBRS mitigation resources automatically across CPL transitions. The feature is advertised by CPUID_Fn80000021_EAX bit 8 and is enabled by setting MSR C000_0080 (EFER) bit 21. Enable Automatic IBRS by default if the CPU feature is present. It typically provides greater performance over the incumbent generic retpolines mitigation. Reuse the SPECTRE_V2_EIBRS spectre_v2_mitigation enum. AMD Automatic IBRS and Intel Enhanced IBRS have similar enablement. Add NO_EIBRS_PBRSB to cpu_vuln_whitelist, since AMD Automatic IBRS isn't affected by PBRSB-eIBRS. The kernel command line option spectre_v2=eibrs is used to select AMD Automatic IBRS, if available. Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Acked-by: Sean Christopherson Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20230124163319.2277355-8-kim.phillips@amd.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 6 +++--- .../admin-guide/kernel-parameters.txt | 6 +++--- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 20 +++++++++++-------- arch/x86/kernel/cpu/common.c | 19 ++++++++++-------- 6 files changed, 32 insertions(+), 22 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index a39bbfe9526b6..4d186f599d90f 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -621,9 +621,9 @@ kernel command line. retpoline,generic Retpolines retpoline,lfence LFENCE; indirect branch retpoline,amd alias for retpoline,lfence - eibrs enhanced IBRS - eibrs,retpoline enhanced IBRS + Retpolines - eibrs,lfence enhanced IBRS + LFENCE + eibrs Enhanced/Auto IBRS + eibrs,retpoline Enhanced/Auto IBRS + Retpolines + eibrs,lfence Enhanced/Auto IBRS + LFENCE ibrs use IBRS to protect kernel Not specifying this option is equivalent to diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2dfe75104e7de..f7dfdd3d021ea 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5765,9 +5765,9 @@ retpoline,generic - Retpolines retpoline,lfence - LFENCE; indirect branch retpoline,amd - alias for retpoline,lfence - eibrs - enhanced IBRS - eibrs,retpoline - enhanced IBRS + Retpolines - eibrs,lfence - enhanced IBRS + LFENCE + eibrs - Enhanced/Auto IBRS + eibrs,retpoline - Enhanced/Auto IBRS + Retpolines + eibrs,lfence - Enhanced/Auto IBRS + LFENCE ibrs - use IBRS to protect kernel Not specifying this option is equivalent to diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b97a70aa4de90..9a157942ae3dd 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -427,6 +427,7 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 005e41dc7ee5a..33a19ef23644d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -30,6 +30,7 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) @@ -38,6 +39,7 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) /* Intel MSRs. Some also available on other CPUs */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c68789fdc123b..e7074a1b6ba78 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1396,9 +1396,9 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", - [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", - [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", - [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines", [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; @@ -1467,7 +1467,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + pr_err("%s selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -1652,8 +1652,12 @@ static void __init spectre_v2_select_mitigation(void) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); if (spectre_v2_in_ibrs_mode(mode)) { - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - update_spec_ctrl(x86_spec_ctrl_base); + if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) { + msr_set_bit(MSR_EFER, _EFER_AUTOIBRS); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + update_spec_ctrl(x86_spec_ctrl_base); + } } switch (mode) { @@ -1737,8 +1741,8 @@ static void __init spectre_v2_select_mitigation(void) /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS * and Enhanced IBRS protect firmware too, so enable IBRS around - * firmware calls only when IBRS / Enhanced IBRS aren't otherwise - * enabled. + * firmware calls only when IBRS / Enhanced / Automatic IBRS aren't + * otherwise enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 758938c94b41e..ca243d7ba0ea5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1212,8 +1212,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* Zhaoxin Family 7 */ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), @@ -1362,8 +1362,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - if (ia32_cap & ARCH_CAP_IBRS_ALL) + /* + * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature + * flag and protect from vendor-specific bugs via the whitelist. + */ + if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { @@ -1425,11 +1433,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_RETBLEED); } - if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && - !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) - setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); - if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) setup_force_cpu_bug(X86_BUG_SMT_RSB); -- GitLab From 3769db86adec4dd3d88446792da1f9ca5d964e34 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 9 Aug 2022 17:32:02 +0200 Subject: [PATCH 1188/2290] x86/bugs: Use sysfs_emit() commit 1d30800c0c0ae1d086ffad2bdf0ba4403370f132 upstream. Those mitigations are very talkative; use the printing helper which pays attention to the buffer size. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220809153419.10182-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 103 ++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 52 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e7074a1b6ba78..750fb4fc2ac6a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2572,74 +2572,74 @@ static const char * const l1tf_vmx_states[] = { static ssize_t l1tf_show_state(char *buf) { if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) - return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG); if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && sched_smt_active())) { - return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, - l1tf_vmx_states[l1tf_vmx_mitigation]); + return sysfs_emit(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation]); } - return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, - l1tf_vmx_states[l1tf_vmx_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t itlb_multihit_show_state(char *buf) { if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || !boot_cpu_has(X86_FEATURE_VMX)) - return sprintf(buf, "KVM: Mitigation: VMX unsupported\n"); + return sysfs_emit(buf, "KVM: Mitigation: VMX unsupported\n"); else if (!(cr4_read_shadow() & X86_CR4_VMXE)) - return sprintf(buf, "KVM: Mitigation: VMX disabled\n"); + return sysfs_emit(buf, "KVM: Mitigation: VMX disabled\n"); else if (itlb_multihit_kvm_mitigation) - return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); + return sysfs_emit(buf, "KVM: Mitigation: Split huge pages\n"); else - return sprintf(buf, "KVM: Vulnerable\n"); + return sysfs_emit(buf, "KVM: Vulnerable\n"); } #else static ssize_t l1tf_show_state(char *buf) { - return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG); } static ssize_t itlb_multihit_show_state(char *buf) { - return sprintf(buf, "Processor vulnerable\n"); + return sysfs_emit(buf, "Processor vulnerable\n"); } #endif static ssize_t mds_show_state(char *buf) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - return sprintf(buf, "%s; SMT Host state unknown\n", - mds_strings[mds_mitigation]); + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); } if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { - return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : - sched_smt_active() ? "mitigated" : "disabled")); + return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); } - return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t tsx_async_abort_show_state(char *buf) { if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || (taa_mitigation == TAA_MITIGATION_OFF)) - return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); + return sysfs_emit(buf, "%s\n", taa_strings[taa_mitigation]); if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - return sprintf(buf, "%s; SMT Host state unknown\n", - taa_strings[taa_mitigation]); + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + taa_strings[taa_mitigation]); } - return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t mmio_stale_data_show_state(char *buf) @@ -2712,47 +2712,46 @@ static char *pbrsb_eibrs_state(void) static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) - return sprintf(buf, "Vulnerable: LFENCE\n"); + return sysfs_emit(buf, "Vulnerable: LFENCE\n"); if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) - return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); if (sched_smt_active() && unprivileged_ebpf_enabled() && spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) - return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sprintf(buf, "%s%s%s%s%s%s%s\n", - spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - pbrsb_eibrs_state(), - spectre_v2_module_string()); + return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + pbrsb_eibrs_state(), + spectre_v2_module_string()); } static ssize_t srbds_show_state(char *buf) { - return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); + return sysfs_emit(buf, "%s\n", srbds_strings[srbds_mitigation]); } static ssize_t retbleed_show_state(char *buf) { if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET || retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) - return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sysfs_emit(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); - return sprintf(buf, "%s; SMT %s\n", - retbleed_strings[retbleed_mitigation], - !sched_smt_active() ? "disabled" : - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? - "enabled with STIBP protection" : "vulnerable"); + return sysfs_emit(buf, "%s; SMT %s\n", retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); } - return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } static ssize_t gds_show_state(char *buf) @@ -2774,26 +2773,26 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); switch (bug) { case X86_BUG_CPU_MELTDOWN: if (boot_cpu_has(X86_FEATURE_PTI)) - return sprintf(buf, "Mitigation: PTI\n"); + return sysfs_emit(buf, "Mitigation: PTI\n"); if (hypervisor_is_type(X86_HYPER_XEN_PV)) - return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); + return sysfs_emit(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); break; case X86_BUG_SPECTRE_V1: - return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); + return sysfs_emit(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: - return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + return sysfs_emit(buf, "%s\n", ssb_strings[ssb_mode]); case X86_BUG_L1TF: if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) @@ -2832,7 +2831,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr break; } - return sprintf(buf, "Vulnerable\n"); + return sysfs_emit(buf, "Vulnerable\n"); } ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) -- GitLab From 8d70aa08326c559638353dd68b1d9bb35e4fd867 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Nov 2022 20:58:39 +0800 Subject: [PATCH 1189/2290] KVM: x86: Update KVM-only leaf handling to allow for 100% KVM-only leafs commit 047c7229906152fb85c23dc18fd25a00cd7cb4de upstream. Rename kvm_cpu_cap_init_scattered() to kvm_cpu_cap_init_kvm_defined() in anticipation of adding KVM-only CPUID leafs that aren't recognized by the kernel and thus not scattered, i.e. for leafs that are 100% KVM-defined. Adjust/add comments to kvm_only_cpuid_leafs and KVM_X86_FEATURE to document how to create new kvm_only_cpuid_leafs entries for scattered features as well as features that are entirely unknown to the kernel. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20221125125845.1182922-3-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 8 ++++---- arch/x86/kvm/reverse_cpuid.h | 18 +++++++++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c3ef1fc602bf9..1180938917ce3 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -535,9 +535,9 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) } static __always_inline -void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) +void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_mask for non-scattered leafs. */ + /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */ BUILD_BUG_ON(leaf < NCAPINTS); kvm_cpu_caps[leaf] = mask; @@ -547,7 +547,7 @@ void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_init_scattered for scattered leafs. */ + /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ BUILD_BUG_ON(leaf >= NCAPINTS); kvm_cpu_caps[leaf] &= mask; @@ -656,7 +656,7 @@ void kvm_set_cpu_caps(void) F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); - kvm_cpu_cap_init_scattered(CPUID_12_EAX, + kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, SF(SGX1) | SF(SGX2) ); diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 7eeade35a425b..9b73fb8b87d99 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -7,9 +7,9 @@ #include /* - * Hardware-defined CPUID leafs that are scattered in the kernel, but need to - * be directly used by KVM. Note, these word values conflict with the kernel's - * "bug" caps, but KVM doesn't use those. + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, @@ -18,6 +18,18 @@ enum kvm_only_cpuid_leafs { NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; +/* + * Define a KVM-only feature flag. + * + * For features that are scattered by cpufeatures.h, __feature_translate() also + * needs to be updated to translate the kernel-defined feature into the + * KVM-defined feature. + * + * For features that are 100% KVM-only, i.e. not defined by cpufeatures.h, + * forego the intermediate KVM_X86_FEATURE and directly define X86_FEATURE_* so + * that X86_FEATURE_* can be used in KVM. No __feature_translate() handling is + * needed in this case. + */ #define KVM_X86_FEATURE(w, f) ((w)*32 + (f)) /* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */ -- GitLab From b6aa21725fbe6ab9f0f1c08d97b77b31d71c5066 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 23 Oct 2023 17:16:35 -0700 Subject: [PATCH 1190/2290] KVM: x86: Advertise CPUID.(EAX=7,ECX=2):EDX[5:0] to userspace commit eefe5e6682099445f77f2d97d4c525f9ac9d9b07 upstream. The low five bits {INTEL_PSFD, IPRED_CTRL, RRSBA_CTRL, DDPD_U, BHI_CTRL} advertise the availability of specific bits in IA32_SPEC_CTRL. Since KVM dynamically determines the legal IA32_SPEC_CTRL bits for the underlying hardware, the hard work has already been done. Just let userspace know that a guest can use these IA32_SPEC_CTRL bits. The sixth bit (MCDT_NO) states that the processor does not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior. This is an inherent property of the physical processor that is inherited by the virtual CPU. Pass that information on to userspace. Signed-off-by: Jim Mattson Reviewed-by: Chao Gao Link: https://lore.kernel.org/r/20231024001636.890236-1-jmattson@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 21 ++++++++++++++++++--- arch/x86/kvm/reverse_cpuid.h | 12 ++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1180938917ce3..62a44455c51d0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -652,6 +652,11 @@ void kvm_set_cpu_caps(void) F(AVX_VNNI) | F(AVX512_BF16) ); + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, + F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) | + F(BHI_CTRL) | F(MCDT_NO) + ); + kvm_cpu_cap_mask(CPUID_D_1_EAX, F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); @@ -902,13 +907,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; /* function 7 has additional index. */ case 7: - entry->eax = min(entry->eax, 1u); + max_idx = entry->eax = min(entry->eax, 2u); cpuid_entry_override(entry, CPUID_7_0_EBX); cpuid_entry_override(entry, CPUID_7_ECX); cpuid_entry_override(entry, CPUID_7_EDX); - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */ - if (entry->eax == 1) { + /* KVM only supports up to 0x7.2, capped above via min(). */ + if (max_idx >= 1) { entry = do_host_cpuid(array, function, 1); if (!entry) goto out; @@ -918,6 +923,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = 0; entry->edx = 0; } + if (max_idx >= 2) { + entry = do_host_cpuid(array, function, 2); + if (!entry) + goto out; + + cpuid_entry_override(entry, CPUID_7_2_EDX); + entry->ecx = 0; + entry->ebx = 0; + entry->eax = 0; + } break; case 0xa: { /* Architectural Performance Monitoring */ union cpuid10_eax eax; diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 9b73fb8b87d99..d50b653cb8d09 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -13,6 +13,7 @@ */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, + CPUID_7_2_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -36,6 +37,14 @@ enum kvm_only_cpuid_leafs { #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ +#define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0) +#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) +#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) +#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) + struct cpuid_reg { u32 function; u32 index; @@ -61,6 +70,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, }; /* @@ -91,6 +101,8 @@ static __always_inline u32 __feature_translate(int x86_feature) return KVM_X86_FEATURE_SGX1; else if (x86_feature == X86_FEATURE_SGX2) return KVM_X86_FEATURE_SGX2; + else if (x86_feature == X86_FEATURE_RRSBA_CTRL) + return KVM_X86_FEATURE_RRSBA_CTRL; return x86_feature; } -- GitLab From 981cf0cab48a11fd2011895213f44e2460da03bb Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 23 Oct 2023 17:16:36 -0700 Subject: [PATCH 1191/2290] KVM: x86: Use a switch statement and macros in __feature_translate() commit 80c883db87d9ffe2d685e91ba07a087b1c246c78 upstream. Use a switch statement with macro-generated case statements to handle translating feature flags in order to reduce the probability of runtime errors due to copy+paste goofs, to make compile-time errors easier to debug, and to make the code more readable. E.g. the compiler won't directly generate an error for duplicate if statements if (x86_feature == X86_FEATURE_SGX1) return KVM_X86_FEATURE_SGX1; else if (x86_feature == X86_FEATURE_SGX2) return KVM_X86_FEATURE_SGX1; and so instead reverse_cpuid_check() will fail due to the untranslated entry pointing at a Linux-defined leaf, which provides practically no hint as to what is broken arch/x86/kvm/reverse_cpuid.h:108:2: error: call to __compiletime_assert_450 declared with 'error' attribute: BUILD_BUG_ON failed: x86_leaf == CPUID_LNX_4 BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); ^ whereas duplicate case statements very explicitly point at the offending code: arch/x86/kvm/reverse_cpuid.h:125:2: error: duplicate case value '361' KVM_X86_TRANSLATE_FEATURE(SGX2); ^ arch/x86/kvm/reverse_cpuid.h:124:2: error: duplicate case value '360' KVM_X86_TRANSLATE_FEATURE(SGX1); ^ And without macros, the opposite type of copy+paste goof doesn't generate any error at compile-time, e.g. this yields no complaints: case X86_FEATURE_SGX1: return KVM_X86_FEATURE_SGX1; case X86_FEATURE_SGX2: return KVM_X86_FEATURE_SGX1; Note, __feature_translate() is forcibly inlined and the feature is known at compile-time, so the code generation between an if-elif sequence and a switch statement should be identical. Signed-off-by: Jim Mattson Link: https://lore.kernel.org/r/20231024001636.890236-2-jmattson@google.com [sean: use a macro, rewrite changelog] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/reverse_cpuid.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index d50b653cb8d09..7c8e2b20a13b0 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -97,14 +97,16 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) */ static __always_inline u32 __feature_translate(int x86_feature) { - if (x86_feature == X86_FEATURE_SGX1) - return KVM_X86_FEATURE_SGX1; - else if (x86_feature == X86_FEATURE_SGX2) - return KVM_X86_FEATURE_SGX2; - else if (x86_feature == X86_FEATURE_RRSBA_CTRL) - return KVM_X86_FEATURE_RRSBA_CTRL; - - return x86_feature; +#define KVM_X86_TRANSLATE_FEATURE(f) \ + case X86_FEATURE_##f: return KVM_X86_FEATURE_##f + + switch (x86_feature) { + KVM_X86_TRANSLATE_FEATURE(SGX1); + KVM_X86_TRANSLATE_FEATURE(SGX2); + KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + default: + return x86_feature; + } } static __always_inline u32 __feature_leaf(int x86_feature) -- GitLab From a50fd9871257f2cd78e367ad0c1140cb86c4366e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Nov 2022 21:18:40 +0100 Subject: [PATCH 1192/2290] timers: Update kernel-doc for various functions [ Upstream commit 14f043f1340bf30bc60af127bff39f55889fef26 ] The kernel-doc of timer related functions is partially uncomprehensible word salad. Rewrite it to make it useful. Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Reviewed-by: Jacob Keller Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/r/20221123201624.828703870@linutronix.de Stable-dep-of: 0f7352557a35 ("wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach") Signed-off-by: Sasha Levin --- kernel/time/timer.c | 148 +++++++++++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 58 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 717fcb9fb14aa..ab9688a2ae190 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1121,14 +1121,16 @@ out_unlock: } /** - * mod_timer_pending - modify a pending timer's timeout - * @timer: the pending timer to be modified - * @expires: new timeout in jiffies + * mod_timer_pending - Modify a pending timer's timeout + * @timer: The pending timer to be modified + * @expires: New absolute timeout in jiffies * - * mod_timer_pending() is the same for pending timers as mod_timer(), - * but will not re-activate and modify already deleted timers. + * mod_timer_pending() is the same for pending timers as mod_timer(), but + * will not activate inactive timers. * - * It is useful for unserialized use of timers. + * Return: + * * %0 - The timer was inactive and not modified + * * %1 - The timer was active and requeued to expire at @expires */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { @@ -1137,24 +1139,27 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(mod_timer_pending); /** - * mod_timer - modify a timer's timeout - * @timer: the timer to be modified - * @expires: new timeout in jiffies - * - * mod_timer() is a more efficient way to update the expire field of an - * active timer (if the timer is inactive it will be activated) + * mod_timer - Modify a timer's timeout + * @timer: The timer to be modified + * @expires: New absolute timeout in jiffies * * mod_timer(timer, expires) is equivalent to: * * del_timer(timer); timer->expires = expires; add_timer(timer); * + * mod_timer() is more efficient than the above open coded sequence. In + * case that the timer is inactive, the del_timer() part is a NOP. The + * timer is in any case activated with the new expiry time @expires. + * * Note that if there are multiple unserialized concurrent users of the * same timer, then mod_timer() is the only safe way to modify the timeout, * since add_timer() cannot modify an already running timer. * - * The function returns whether it has modified a pending timer or not. - * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an - * active timer returns 1.) + * Return: + * * %0 - The timer was inactive and started + * * %1 - The timer was active and requeued to expire at @expires or + * the timer was active and not modified because @expires did + * not change the effective expiry time */ int mod_timer(struct timer_list *timer, unsigned long expires) { @@ -1165,11 +1170,18 @@ EXPORT_SYMBOL(mod_timer); /** * timer_reduce - Modify a timer's timeout if it would reduce the timeout * @timer: The timer to be modified - * @expires: New timeout in jiffies + * @expires: New absolute timeout in jiffies * * timer_reduce() is very similar to mod_timer(), except that it will only - * modify a running timer if that would reduce the expiration time (it will - * start a timer that isn't running). + * modify an enqueued timer if that would reduce the expiration time. If + * @timer is not enqueued it starts the timer. + * + * Return: + * * %0 - The timer was inactive and started + * * %1 - The timer was active and requeued to expire at @expires or + * the timer was active and not modified because @expires + * did not change the effective expiry time such that the + * timer would expire earlier than already scheduled */ int timer_reduce(struct timer_list *timer, unsigned long expires) { @@ -1178,18 +1190,21 @@ int timer_reduce(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(timer_reduce); /** - * add_timer - start a timer - * @timer: the timer to be added + * add_timer - Start a timer + * @timer: The timer to be started * - * The kernel will do a ->function(@timer) callback from the - * timer interrupt at the ->expires point in the future. The - * current time is 'jiffies'. + * Start @timer to expire at @timer->expires in the future. @timer->expires + * is the absolute expiry time measured in 'jiffies'. When the timer expires + * timer->function(timer) will be invoked from soft interrupt context. * - * The timer's ->expires, ->function fields must be set prior calling this - * function. + * The @timer->expires and @timer->function fields must be set prior + * to calling this function. + * + * If @timer->expires is already in the past @timer will be queued to + * expire at the next timer tick. * - * Timers with an ->expires field in the past will be executed in the next - * timer tick. + * This can only operate on an inactive timer. Attempts to invoke this on + * an active timer are rejected with a warning. */ void add_timer(struct timer_list *timer) { @@ -1199,11 +1214,13 @@ void add_timer(struct timer_list *timer) EXPORT_SYMBOL(add_timer); /** - * add_timer_on - start a timer on a particular CPU - * @timer: the timer to be added - * @cpu: the CPU to start it on + * add_timer_on - Start a timer on a particular CPU + * @timer: The timer to be started + * @cpu: The CPU to start it on + * + * Same as add_timer() except that it starts the timer on the given CPU. * - * This is not very scalable on SMP. Double adds are not possible. + * See add_timer() for further details. */ void add_timer_on(struct timer_list *timer, int cpu) { @@ -1238,15 +1255,18 @@ void add_timer_on(struct timer_list *timer, int cpu) EXPORT_SYMBOL_GPL(add_timer_on); /** - * del_timer - deactivate a timer. - * @timer: the timer to be deactivated - * - * del_timer() deactivates a timer - this works on both active and inactive - * timers. - * - * The function returns whether it has deactivated a pending timer or not. - * (ie. del_timer() of an inactive timer returns 0, del_timer() of an - * active timer returns 1.) + * del_timer - Deactivate a timer. + * @timer: The timer to be deactivated + * + * The function only deactivates a pending timer, but contrary to + * del_timer_sync() it does not take into account whether the timer's + * callback function is concurrently executed on a different CPU or not. + * It neither prevents rearming of the timer. If @timer can be rearmed + * concurrently then the return value of this function is meaningless. + * + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ int del_timer(struct timer_list *timer) { @@ -1268,10 +1288,19 @@ EXPORT_SYMBOL(del_timer); /** * try_to_del_timer_sync - Try to deactivate a timer - * @timer: timer to delete + * @timer: Timer to deactivate + * + * This function tries to deactivate a timer. On success the timer is not + * queued and the timer callback function is not running on any CPU. * - * This function tries to deactivate a timer. Upon successful (ret >= 0) - * exit the timer is not queued and the handler is not running on any CPU. + * This function does not guarantee that the timer cannot be rearmed right + * after dropping the base lock. That needs to be prevented by the calling + * code if necessary. + * + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated + * * %-1 - The timer callback function is running on a different CPU */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -1367,23 +1396,19 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) /** - * del_timer_sync - deactivate a timer and wait for the handler to finish. - * @timer: the timer to be deactivated - * - * This function only differs from del_timer() on SMP: besides deactivating - * the timer it also makes sure the handler has finished executing on other - * CPUs. + * del_timer_sync - Deactivate a timer and wait for the handler to finish. + * @timer: The timer to be deactivated * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from * interrupt contexts unless the timer is an irqsafe one. The caller must - * not hold locks which would prevent completion of the timer's - * handler. The timer's handler must not call add_timer_on(). Upon exit the - * timer is not queued and the handler is not running on any CPU. + * not hold locks which would prevent completion of the timer's callback + * function. The timer's handler must not call add_timer_on(). Upon exit + * the timer is not queued and the handler is not running on any CPU. * - * Note: For !irqsafe timers, you must not hold locks that are held in - * interrupt context while calling this function. Even if the lock has - * nothing to do with the timer in question. Here's why:: + * For !irqsafe timers, the caller must not hold locks that are held in + * interrupt context. Even if the lock has nothing to do with the timer in + * question. Here's why:: * * CPU0 CPU1 * ---- ---- @@ -1397,10 +1422,17 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * while (base->running_timer == mytimer); * * Now del_timer_sync() will never return and never release somelock. - * The interrupt on the other CPU is waiting to grab somelock but - * it has interrupted the softirq that CPU0 is waiting to finish. + * The interrupt on the other CPU is waiting to grab somelock but it has + * interrupted the softirq that CPU0 is waiting to finish. + * + * This function cannot guarantee that the timer is not rearmed again by + * some concurrent or preempting code, right after it dropped the base + * lock. If there is the possibility of a concurrent rearm then the return + * value of the function is meaningless. * - * The function returns whether it has deactivated a pending timer or not. + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ int del_timer_sync(struct timer_list *timer) { -- GitLab From 2957037c1d94650d19c2bd176373def0c43c6d6d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Nov 2022 21:18:42 +0100 Subject: [PATCH 1193/2290] timers: Use del_timer_sync() even on UP [ Upstream commit 168f6b6ffbeec0b9333f3582e4cf637300858db5 ] del_timer_sync() is assumed to be pointless on uniprocessor systems and can be mapped to del_timer() because in theory del_timer() can never be invoked while the timer callback function is executed. This is not entirely true because del_timer() can be invoked from interrupt context and therefore hit in the middle of a running timer callback. Contrary to that del_timer_sync() is not allowed to be invoked from interrupt context unless the affected timer is marked with TIMER_IRQSAFE. del_timer_sync() has proper checks in place to detect such a situation. Give up on the UP optimization and make del_timer_sync() unconditionally available. Co-developed-by: Steven Rostedt Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Reviewed-by: Jacob Keller Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/all/20220407161745.7d6754b3@gandalf.local.home Link: https://lore.kernel.org/all/20221110064101.429013735@goodmis.org Link: https://lore.kernel.org/r/20221123201624.888306160@linutronix.de Stable-dep-of: 0f7352557a35 ("wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach") Signed-off-by: Sasha Levin --- include/linux/timer.h | 7 +------ kernel/time/timer.c | 2 -- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 648f00105f588..82bb2e4d3b7c2 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -183,12 +183,7 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); - -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) - extern int del_timer_sync(struct timer_list *timer); -#else -# define del_timer_sync(t) del_timer(t) -#endif +extern int del_timer_sync(struct timer_list *timer); #define del_singleshot_timer_sync(t) del_timer_sync(t) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ab9688a2ae190..9d09a2a0ad708 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1394,7 +1394,6 @@ static inline void timer_sync_wait_running(struct timer_base *base) { } static inline void del_timer_wait_running(struct timer_list *timer) { } #endif -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) /** * del_timer_sync - Deactivate a timer and wait for the handler to finish. * @timer: The timer to be deactivated @@ -1475,7 +1474,6 @@ int del_timer_sync(struct timer_list *timer) return ret; } EXPORT_SYMBOL(del_timer_sync); -#endif static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *), -- GitLab From 113d5341ee1299aff7a6252cb19af52160ba22e8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Nov 2022 21:18:44 +0100 Subject: [PATCH 1194/2290] timers: Rename del_timer_sync() to timer_delete_sync() [ Upstream commit 9b13df3fb64ee95e2397585404e442afee2c7d4f ] The timer related functions do not have a strict timer_ prefixed namespace which is really annoying. Rename del_timer_sync() to timer_delete_sync() and provide del_timer_sync() as a wrapper. Document that del_timer_sync() is not for new code. Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Reviewed-by: Steven Rostedt (Google) Reviewed-by: Jacob Keller Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/r/20221123201624.954785441@linutronix.de Stable-dep-of: 0f7352557a35 ("wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach") Signed-off-by: Sasha Levin --- include/linux/timer.h | 15 ++++++++++++++- kernel/time/timer.c | 18 +++++++++--------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 82bb2e4d3b7c2..6d18f04ad7039 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -183,7 +183,20 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); -extern int del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync(struct timer_list *timer); + +/** + * del_timer_sync - Delete a pending timer and wait for a running callback + * @timer: The timer to be deleted + * + * See timer_delete_sync() for detailed explanation. + * + * Do not use in new code. Use timer_delete_sync() instead. + */ +static inline int del_timer_sync(struct timer_list *timer) +{ + return timer_delete_sync(timer); +} #define del_singleshot_timer_sync(t) del_timer_sync(t) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9d09a2a0ad708..59469897432bc 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1083,7 +1083,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option /* * We are trying to schedule the timer on the new base. * However we can't change timer's base while it is running, - * otherwise del_timer_sync() can't detect that the timer's + * otherwise timer_delete_sync() can't detect that the timer's * handler yet has not finished. This also guarantees that the * timer is serialized wrt itself. */ @@ -1259,7 +1259,7 @@ EXPORT_SYMBOL_GPL(add_timer_on); * @timer: The timer to be deactivated * * The function only deactivates a pending timer, but contrary to - * del_timer_sync() it does not take into account whether the timer's + * timer_delete_sync() it does not take into account whether the timer's * callback function is concurrently executed on a different CPU or not. * It neither prevents rearming of the timer. If @timer can be rearmed * concurrently then the return value of this function is meaningless. @@ -1395,7 +1395,7 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } #endif /** - * del_timer_sync - Deactivate a timer and wait for the handler to finish. + * timer_delete_sync - Deactivate a timer and wait for the handler to finish. * @timer: The timer to be deactivated * * Synchronization rules: Callers must prevent restarting of the timer, @@ -1417,10 +1417,10 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * spin_lock_irq(somelock); * * spin_lock(somelock); - * del_timer_sync(mytimer); + * timer_delete_sync(mytimer); * while (base->running_timer == mytimer); * - * Now del_timer_sync() will never return and never release somelock. + * Now timer_delete_sync() will never return and never release somelock. * The interrupt on the other CPU is waiting to grab somelock but it has * interrupted the softirq that CPU0 is waiting to finish. * @@ -1433,7 +1433,7 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated */ -int del_timer_sync(struct timer_list *timer) +int timer_delete_sync(struct timer_list *timer) { int ret; @@ -1473,7 +1473,7 @@ int del_timer_sync(struct timer_list *timer) return ret; } -EXPORT_SYMBOL(del_timer_sync); +EXPORT_SYMBOL(timer_delete_sync); static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *), @@ -1495,8 +1495,8 @@ static void call_timer_fn(struct timer_list *timer, #endif /* * Couple the lock chain with the lock chain at - * del_timer_sync() by acquiring the lock_map around the fn() - * call here and in del_timer_sync(). + * timer_delete_sync() by acquiring the lock_map around the fn() + * call here and in timer_delete_sync(). */ lock_map_acquire(&lockdep_map); -- GitLab From 0b812f706fd7090be74812101114a0e165b36744 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sun, 7 Jan 2024 08:25:04 +0100 Subject: [PATCH 1195/2290] wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach [ Upstream commit 0f7352557a35ab7888bc7831411ec8a3cbe20d78 ] This is the candidate patch of CVE-2023-47233 : https://nvd.nist.gov/vuln/detail/CVE-2023-47233 In brcm80211 driver,it starts with the following invoking chain to start init a timeout worker: ->brcmf_usb_probe ->brcmf_usb_probe_cb ->brcmf_attach ->brcmf_bus_started ->brcmf_cfg80211_attach ->wl_init_priv ->brcmf_init_escan ->INIT_WORK(&cfg->escan_timeout_work, brcmf_cfg80211_escan_timeout_worker); If we disconnect the USB by hotplug, it will call brcmf_usb_disconnect to make cleanup. The invoking chain is : brcmf_usb_disconnect ->brcmf_usb_disconnect_cb ->brcmf_detach ->brcmf_cfg80211_detach ->kfree(cfg); While the timeout woker may still be running. This will cause a use-after-free bug on cfg in brcmf_cfg80211_escan_timeout_worker. Fix it by deleting the timer and canceling the worker in brcmf_cfg80211_detach. Fixes: e756af5b30b0 ("brcmfmac: add e-scan support.") Signed-off-by: Zheng Wang Cc: stable@vger.kernel.org [arend.vanspriel@broadcom.com: keep timer delete as is and cancel work just before free] Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240107072504.392713-1-arend.vanspriel@broadcom.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index ad5a8d61d9385..24a3d5a593f15 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -791,8 +791,7 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg, scan_request = cfg->scan_request; cfg->scan_request = NULL; - if (timer_pending(&cfg->escan_timeout)) - del_timer_sync(&cfg->escan_timeout); + timer_delete_sync(&cfg->escan_timeout); if (fw_abort) { /* Do a scan abort to stop the driver's scan engine */ @@ -7805,6 +7804,7 @@ void brcmf_cfg80211_detach(struct brcmf_cfg80211_info *cfg) brcmf_btcoex_detach(cfg); wiphy_unregister(cfg->wiphy); wl_deinit_priv(cfg); + cancel_work_sync(&cfg->escan_timeout_work); brcmf_free_wiphy(cfg->wiphy); kfree(cfg); } -- GitLab From e144e47703ce67f5f467cdc6c8c43157bcd8b4d1 Mon Sep 17 00:00:00 2001 From: Hidenori Kobayashi Date: Tue, 9 Jan 2024 17:09:09 +0900 Subject: [PATCH 1196/2290] media: staging: ipu3-imgu: Set fields before media_entity_pads_init() [ Upstream commit 87318b7092670d4086bfec115a0280a60c51c2dd ] The imgu driver fails to probe with the following message because it does not set the pad's flags before calling media_entity_pads_init(). [ 14.596315] ipu3-imgu 0000:00:05.0: failed initialize subdev media entity (-22) [ 14.596322] ipu3-imgu 0000:00:05.0: failed to register subdev0 ret (-22) [ 14.596327] ipu3-imgu 0000:00:05.0: failed to register pipes (-22) [ 14.596331] ipu3-imgu 0000:00:05.0: failed to create V4L2 devices (-22) Fix the initialization order so that the driver probe succeeds. The ops initialization is also moved together for readability. Fixes: a0ca1627b450 ("media: staging/intel-ipu3: Add v4l2 driver based on media framework") Cc: # 6.7 Cc: Dan Carpenter Signed-off-by: Hidenori Kobayashi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/staging/media/ipu3/ipu3-v4l2.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c index e530767e80a5d..55cc44a401bc4 100644 --- a/drivers/staging/media/ipu3/ipu3-v4l2.c +++ b/drivers/staging/media/ipu3/ipu3-v4l2.c @@ -1069,6 +1069,11 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu, struct imgu_media_pipe *imgu_pipe = &imgu->imgu_pipe[pipe]; /* Initialize subdev media entity */ + imgu_sd->subdev.entity.ops = &imgu_media_ops; + for (i = 0; i < IMGU_NODE_NUM; i++) { + imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ? + MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE; + } r = media_entity_pads_init(&imgu_sd->subdev.entity, IMGU_NODE_NUM, imgu_sd->subdev_pads); if (r) { @@ -1076,11 +1081,6 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu, "failed initialize subdev media entity (%d)\n", r); return r; } - imgu_sd->subdev.entity.ops = &imgu_media_ops; - for (i = 0; i < IMGU_NODE_NUM; i++) { - imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ? - MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE; - } /* Initialize subdev */ v4l2_subdev_init(&imgu_sd->subdev, &imgu_subdev_ops); @@ -1177,15 +1177,15 @@ static int imgu_v4l2_node_setup(struct imgu_device *imgu, unsigned int pipe, } /* Initialize media entities */ + node->vdev_pad.flags = node->output ? + MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK; + vdev->entity.ops = NULL; r = media_entity_pads_init(&vdev->entity, 1, &node->vdev_pad); if (r) { dev_err(dev, "failed initialize media entity (%d)\n", r); mutex_destroy(&node->lock); return r; } - node->vdev_pad.flags = node->output ? - MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK; - vdev->entity.ops = NULL; /* Initialize vbq */ vbq->type = node->vdev_fmt.type; -- GitLab From 5166fc45d7eafbbe47417c5effb47191c08f33ab Mon Sep 17 00:00:00 2001 From: Krishna chaitanya chundru Date: Mon, 18 Dec 2023 19:32:36 +0530 Subject: [PATCH 1197/2290] arm64: dts: qcom: sc7280: Add additional MSI interrupts [ Upstream commit b8ba66b40da3230a8675cb5dd5c2dea5bce24d62 ] Current MSI's mapping doesn't have all the vectors. This platform supports 8 vectors each vector supports 32 MSI's, so total MSI's supported is 256. Add all the MSI groups supported for this PCIe instance in this platform. Fixes: 92e0ee9f83b3 ("arm64: dts: qcom: sc7280: Add PCIe and PHY related nodes") cc: stable@vger.kernel.org Signed-off-by: Krishna chaitanya chundru Link: https://lore.kernel.org/r/20231218-additional_msi-v1-1-de6917392684@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 04106d7254000..b5cd24d59ad9a 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -2028,8 +2028,16 @@ ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>, <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>; - interrupts = ; - interrupt-names = "msi"; + interrupts = , + , + , + , + , + , + , + ; + interrupt-names = "msi0", "msi1", "msi2", "msi3", + "msi4", "msi5", "msi6", "msi7"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; interrupt-map = <0 0 0 1 &intc 0 0 0 434 IRQ_TYPE_LEVEL_HIGH>, -- GitLab From 9875deeefedfbfba08d4f261a2237549b1f918a8 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Sun, 17 Dec 2023 13:36:59 +0800 Subject: [PATCH 1198/2290] remoteproc: virtio: Fix wdg cannot recovery remote processor [ Upstream commit b327c72753d6a78de37aed6c35756f2ef62897ee ] Recovery remote processor failed when wdg irq received: [ 0.842574] remoteproc remoteproc0: crash detected in cix-dsp-rproc: type watchdog [ 0.842750] remoteproc remoteproc0: handling crash #1 in cix-dsp-rproc [ 0.842824] remoteproc remoteproc0: recovering cix-dsp-rproc [ 0.843342] remoteproc remoteproc0: stopped remote processor cix-dsp-rproc [ 0.847901] rproc-virtio rproc-virtio.0.auto: Failed to associate buffer [ 0.847979] remoteproc remoteproc0: failed to probe subdevices for cix-dsp-rproc: -16 The reason is that dma coherent mem would not be released when recovering the remote processor, due to rproc_virtio_remove() would not be called, where the mem released. It will fail when it try to allocate and associate buffer again. Releasing reserved memory from rproc_virtio_dev_release(), instead of rproc_virtio_remove(). Fixes: 1d7b61c06dc3 ("remoteproc: virtio: Create platform device for the remoteproc_virtio") Signed-off-by: Joakim Zhang Acked-by: Arnaud Pouliquen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231217053659.3245745-1-joakim.zhang@cixtech.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/remoteproc_virtio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 0e95525c11581..ab5e4f02ab225 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -351,6 +351,9 @@ static void rproc_virtio_dev_release(struct device *dev) kfree(vdev); + of_reserved_mem_device_release(&rvdev->pdev->dev); + dma_release_coherent_memory(&rvdev->pdev->dev); + put_device(&rvdev->pdev->dev); } @@ -584,9 +587,6 @@ static int rproc_virtio_remove(struct platform_device *pdev) rproc_remove_subdev(rproc, &rvdev->subdev); rproc_remove_rvdev(rvdev); - of_reserved_mem_device_release(&pdev->dev); - dma_release_coherent_memory(&pdev->dev); - put_device(&rproc->dev); return 0; -- GitLab From a881dd09fd9920dec3dd5b3627837bd52206a4bf Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 23 Jan 2024 11:58:14 +0530 Subject: [PATCH 1199/2290] clk: qcom: gcc-sdm845: Add soft dependency on rpmhpd [ Upstream commit 1d9054e3a4fd36e2949e616f7360bdb81bcc1921 ] With the addition of RPMh power domain to the GCC node in device tree, we noticed a significant delay in getting the UFS driver probed on AOSP which futher led to mount failures because Android do not support rootwait. So adding a soft dependency on RPMh power domain which informs modprobe to load rpmhpd module before gcc-sdm845. Cc: stable@vger.kernel.org # v5.4+ Fixes: 4b6ea15c0a11 ("arm64: dts: qcom: sdm845: Add missing RPMh power domain to GCC") Suggested-by: Manivannan Sadhasivam Signed-off-by: Amit Pundir Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240123062814.2555649-1-amit.pundir@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-sdm845.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index 6af08e0ca8475..ef15e8f114027 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -4038,3 +4038,4 @@ module_exit(gcc_sdm845_exit); MODULE_DESCRIPTION("QTI GCC SDM845 Driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:gcc-sdm845"); +MODULE_SOFTDEP("pre: rpmhpd"); -- GitLab From d4b1c3b5ec1c14b5e89f4fd849cda8b5ab85f0fe Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Thu, 16 Nov 2023 10:01:21 +0100 Subject: [PATCH 1200/2290] smack: Set SMACK64TRANSMUTE only for dirs in smack_inode_setxattr() [ Upstream commit 9c82169208dde516510aaba6bbd8b13976690c5d ] Since the SMACK64TRANSMUTE xattr makes sense only for directories, enforce this restriction in smack_inode_setxattr(). Cc: stable@vger.kernel.org Fixes: 5c6d1125f8db ("Smack: Transmute labels on specified directories") # v2.6.38.x Signed-off-by: Roberto Sassu Signed-off-by: Casey Schaufler Signed-off-by: Sasha Levin --- security/smack/smack_lsm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index fbadc61feedd1..07f7351148ecf 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1309,7 +1309,8 @@ static int smack_inode_setxattr(struct user_namespace *mnt_userns, check_star = 1; } else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) { check_priv = 1; - if (size != TRANS_TRUE_SIZE || + if (!S_ISDIR(d_backing_inode(dentry)->i_mode) || + size != TRANS_TRUE_SIZE || strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) rc = -EINVAL; } else -- GitLab From dee9c25865550782e831cb8d6e80dd29817078ac Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Thu, 16 Nov 2023 10:01:22 +0100 Subject: [PATCH 1201/2290] smack: Handle SMACK64TRANSMUTE in smack_inode_setsecurity() [ Upstream commit ac02f007d64eb2769d0bde742aac4d7a5fc6e8a5 ] If the SMACK64TRANSMUTE xattr is provided, and the inode is a directory, update the in-memory inode flags by setting SMK_INODE_TRANSMUTE. Cc: stable@vger.kernel.org Fixes: 5c6d1125f8db ("Smack: Transmute labels on specified directories") # v2.6.38.x Signed-off-by: Roberto Sassu Signed-off-by: Casey Schaufler Signed-off-by: Sasha Levin --- security/smack/smack_lsm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 07f7351148ecf..feba69549d086 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2783,6 +2783,15 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, if (value == NULL || size > SMK_LONGLABEL || size == 0) return -EINVAL; + if (strcmp(name, XATTR_SMACK_TRANSMUTE) == 0) { + if (!S_ISDIR(inode->i_mode) || size != TRANS_TRUE_SIZE || + strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) + return -EINVAL; + + nsp->smk_flags |= SMK_INODE_TRANSMUTE; + return 0; + } + skp = smk_import_entry(value, size); if (IS_ERR(skp)) return PTR_ERR(skp); -- GitLab From 74cd997ba17bdc4a24ecaff608304ea93db0b9ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= Date: Thu, 25 Jan 2024 19:39:32 +0100 Subject: [PATCH 1202/2290] arm: dts: marvell: Fix maxium->maxim typo in brownstone dts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 831e0cd4f9ee15a4f02ae10b67e7fdc10eb2b4fc ] Fix an obvious spelling error in the PMIC compatible in the MMP2 Brownstone DTS file. Fixes: 58f1193e6210 ("mfd: max8925: Add dts") Cc: Signed-off-by: Duje Mihanović Reported-by: Krzysztof Kozlowski Closes: https://lore.kernel.org/linux-devicetree/1410884282-18041-1-git-send-email-k.kozlowski@samsung.com/ Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240125-brownstone-typo-fix-v2-1-45bc48a0c81c@skole.hr [krzysztof: Just 10 years to take a patch, not bad! Rephrased commit msg] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/mmp2-brownstone.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/mmp2-brownstone.dts b/arch/arm/boot/dts/mmp2-brownstone.dts index 04f1ae1382e7a..bc64348b82185 100644 --- a/arch/arm/boot/dts/mmp2-brownstone.dts +++ b/arch/arm/boot/dts/mmp2-brownstone.dts @@ -28,7 +28,7 @@ &twsi1 { status = "okay"; pmic: max8925@3c { - compatible = "maxium,max8925"; + compatible = "maxim,max8925"; reg = <0x3c>; interrupts = <1>; interrupt-parent = <&intcmux4>; -- GitLab From 07c3fe923ff7eccf684fb4f8c953d0a7cc8ded73 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 10 Jan 2024 15:03:05 -0500 Subject: [PATCH 1203/2290] drm/vmwgfx: Fix possible null pointer derefence with invalid contexts [ Upstream commit 517621b7060096e48e42f545fa6646fc00252eac ] vmw_context_cotable can return either an error or a null pointer and its usage sometimes went unchecked. Subsequent code would then try to access either a null pointer or an error value. The invalid dereferences were only possible with malformed userspace apps which never properly initialized the rendering contexts. Check the results of vmw_context_cotable to fix the invalid derefs. Thanks: ziming zhang(@ezrak1e) from Ant Group Light-Year Security Lab who was the first person to discover it. Niels De Graef who reported it and helped to track down the poc. Fixes: 9c079b8ce8bf ("drm/vmwgfx: Adapt execbuf to the new validation api") Cc: # v4.20+ Reported-by: Niels De Graef Signed-off-by: Zack Rusin Cc: Martin Krastev Cc: Maaz Mombasawala Cc: Ian Forbes Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Reviewed-by: Maaz Mombasawala Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240110200305.94086-1-zack.rusin@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index bc7f02e4ecebb..2f7ac91149fc0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -447,7 +447,7 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, vmw_res_type(ctx) == vmw_res_dx_context) { for (i = 0; i < cotable_max; ++i) { res = vmw_context_cotable(ctx, i); - if (IS_ERR(res)) + if (IS_ERR_OR_NULL(res)) continue; ret = vmw_execbuf_res_val_add(sw_context, res, @@ -1259,6 +1259,8 @@ static int vmw_cmd_dx_define_query(struct vmw_private *dev_priv, return -EINVAL; cotable_res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_DXQUERY); + if (IS_ERR_OR_NULL(cotable_res)) + return cotable_res ? PTR_ERR(cotable_res) : -EINVAL; ret = vmw_cotable_notify(cotable_res, cmd->body.queryId); return ret; @@ -2477,6 +2479,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, return ret; res = vmw_context_cotable(ctx_node->ctx, vmw_view_cotables[view_type]); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->defined_id); if (unlikely(ret != 0)) return ret; @@ -2562,8 +2566,8 @@ static int vmw_cmd_dx_so_define(struct vmw_private *dev_priv, so_type = vmw_so_cmd_to_type(header->id); res = vmw_context_cotable(ctx_node->ctx, vmw_so_cotables[so_type]); - if (IS_ERR(res)) - return PTR_ERR(res); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; cmd = container_of(header, typeof(*cmd), header); ret = vmw_cotable_notify(res, cmd->defined_id); @@ -2682,6 +2686,8 @@ static int vmw_cmd_dx_define_shader(struct vmw_private *dev_priv, return -EINVAL; res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_DXSHADER); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->body.shaderId); if (ret) return ret; @@ -3003,6 +3009,8 @@ static int vmw_cmd_dx_define_streamoutput(struct vmw_private *dev_priv, } res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_STREAMOUTPUT); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->body.soid); if (ret) return ret; -- GitLab From 12609c76b755dbeb1645c0aacc0f0f4743b2eff3 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 18 Jan 2024 10:21:57 -0500 Subject: [PATCH 1204/2290] serial: max310x: fix NULL pointer dereference in I2C instantiation [ Upstream commit 0d27056c24efd3d63a03f3edfbcfc4827086b110 ] When trying to instantiate a max14830 device from userspace: echo max14830 0x60 > /sys/bus/i2c/devices/i2c-2/new_device we get the following error: Unable to handle kernel NULL pointer dereference at virtual address... ... Call trace: max310x_i2c_probe+0x48/0x170 [max310x] i2c_device_probe+0x150/0x2a0 ... Add check for validity of devtype to prevent the error, and abort probe with a meaningful error message. Fixes: 2e1f2d9a9bdb ("serial: max310x: implement I2C support") Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240118152213.2644269-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 444f89eb2d4b7..d409ef3887212 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1633,13 +1633,16 @@ static unsigned short max310x_i2c_slave_addr(unsigned short addr, static int max310x_i2c_probe(struct i2c_client *client) { - const struct max310x_devtype *devtype = - device_get_match_data(&client->dev); + const struct max310x_devtype *devtype; struct i2c_client *port_client; struct regmap *regmaps[4]; unsigned int i; u8 port_addr; + devtype = device_get_match_data(&client->dev); + if (!devtype) + return dev_err_probe(&client->dev, -ENODEV, "Failed to match device\n"); + if (client->addr < devtype->slave_addr.min || client->addr > devtype->slave_addr.max) return dev_err_probe(&client->dev, -EINVAL, -- GitLab From 6d21d0356aa44157a62e39c0d1a13d4c69a8d0c8 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 31 Jan 2024 10:00:20 +0100 Subject: [PATCH 1205/2290] pci_iounmap(): Fix MMIO mapping leak [ Upstream commit 7626913652cc786c238e2dd7d8740b17d41b2637 ] The #ifdef ARCH_HAS_GENERIC_IOPORT_MAP accidentally also guards iounmap(), which means MMIO mappings are leaked. Move the guard so we call iounmap() for MMIO mappings. Fixes: 316e8d79a095 ("pci_iounmap'2: Electric Boogaloo: try to make sense of it all") Link: https://lore.kernel.org/r/20240131090023.12331-2-pstanner@redhat.com Reported-by: Danilo Krummrich Suggested-by: Arnd Bergmann Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Arnd Bergmann Cc: # v5.15+ Signed-off-by: Sasha Levin --- lib/pci_iomap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index ce39ce9f3526e..2829ddb0e316b 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -170,8 +170,8 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *p) if (addr >= start && addr < start + IO_SPACE_LIMIT) return; - iounmap(p); #endif + iounmap(p); } EXPORT_SYMBOL(pci_iounmap); -- GitLab From 09c1be4d581d3356159abcc5a8a7a6c5f1bf1e77 Mon Sep 17 00:00:00 2001 From: Gui-Dong Han <2045gemini@gmail.com> Date: Fri, 22 Dec 2023 13:50:30 +0800 Subject: [PATCH 1206/2290] media: xc4000: Fix atomicity violation in xc4000_get_frequency [ Upstream commit 36d503ad547d1c75758a6fcdbec2806f1b6aeb41 ] In xc4000_get_frequency(): *freq = priv->freq_hz + priv->freq_offset; The code accesses priv->freq_hz and priv->freq_offset without holding any lock. In xc4000_set_params(): // Code that updates priv->freq_hz and priv->freq_offset ... xc4000_get_frequency() and xc4000_set_params() may execute concurrently, risking inconsistent reads of priv->freq_hz and priv->freq_offset. Since these related data may update during reading, it can result in incorrect frequency calculation, leading to atomicity violations. This possible bug is found by an experimental static analysis tool developed by our team, BassCheck[1]. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including data races and atomicity violations. The above possible bug is reported when our tool analyzes the source code of Linux 6.2. To address this issue, it is proposed to add a mutex lock pair in xc4000_get_frequency() to ensure atomicity. With this patch applied, our tool no longer reports the possible bug, with the kernel configuration allyesconfig for x86_64. Due to the lack of associated hardware, we cannot test the patch in runtime testing, and just verify it according to the code logic. [1] https://sites.google.com/view/basscheck/ Fixes: 4c07e32884ab ("[media] xc4000: Fix get_frequency()") Cc: stable@vger.kernel.org Reported-by: BassCheck Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/tuners/xc4000.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c index 57ded9ff3f043..29bc63021c5aa 100644 --- a/drivers/media/tuners/xc4000.c +++ b/drivers/media/tuners/xc4000.c @@ -1515,10 +1515,10 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) { struct xc4000_priv *priv = fe->tuner_priv; + mutex_lock(&priv->lock); *freq = priv->freq_hz + priv->freq_offset; if (debug) { - mutex_lock(&priv->lock); if ((priv->cur_fw.type & (BASE | FM | DTV6 | DTV7 | DTV78 | DTV8)) == BASE) { u16 snr = 0; @@ -1529,8 +1529,8 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) return 0; } } - mutex_unlock(&priv->lock); } + mutex_unlock(&priv->lock); dprintk(1, "%s()\n", __func__); -- GitLab From 1025ff4e6a7abaa1070414f0b05b0b13df1a194d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 14 Jan 2024 15:55:40 +0200 Subject: [PATCH 1207/2290] media: mc: Add local pad to pipeline regardless of the link state [ Upstream commit 78f0daa026d4c5e192d31801d1be6caf88250220 ] When building pipelines by following links, the media_pipeline_explore_next_link() function only traverses enabled links. The remote pad of a disabled link is not added to the pipeline, and neither is the local pad. While the former is correct as disabled links should not be followed, not adding the local pad breaks processing of the MEDIA_PAD_FL_MUST_CONNECT flag. The MEDIA_PAD_FL_MUST_CONNECT flag is checked in the __media_pipeline_start() function that iterates over all pads after populating the pipeline. If the pad is not present, the check gets skipped, rendering it useless. Fix this by adding the local pad of all links regardless of their state, only skipping the remote pad for disabled links. Cc: stable@vger.kernel.org # 6.1 Fixes: ae219872834a ("media: mc: entity: Rewrite media_pipeline_start()") Reported-by: Frieder Schrempf Closes: https://lore.kernel.org/linux-media/7658a15a-80c5-219f-2477-2a94ba6c6ba1@kontron.de Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/mc/mc-entity.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index f268cf66053e1..20a2630455f2c 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -594,13 +594,6 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, link->source->entity->name, link->source->index, link->sink->entity->name, link->sink->index); - /* Skip links that are not enabled. */ - if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { - dev_dbg(walk->mdev->dev, - "media pipeline: skipping link (disabled)\n"); - return 0; - } - /* Get the local pad and remote pad. */ if (link->source->entity == pad->entity) { local = link->source; @@ -622,13 +615,20 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, } /* - * Add the local and remote pads of the link to the pipeline and push - * them to the stack, if they're not already present. + * Add the local pad of the link to the pipeline and push it to the + * stack, if not already present. */ ret = media_pipeline_add_pad(pipe, walk, local); if (ret) return ret; + /* Similarly, add the remote pad, but only if the link is enabled. */ + if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { + dev_dbg(walk->mdev->dev, + "media pipeline: skipping link (disabled)\n"); + return 0; + } + ret = media_pipeline_add_pad(pipe, walk, remote); if (ret) return ret; -- GitLab From f29793c33984f4d41448ba55da7432b153dabd89 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Jan 2024 00:24:12 +0200 Subject: [PATCH 1208/2290] media: mc: Fix flags handling when creating pad links [ Upstream commit 422f7af75d03d50895938d38bc9cb8be759c440f ] The media_create_pad_link() function doesn't correctly clear reject link type flags, nor does it set the DATA_LINK flag. It only works because the MEDIA_LNK_FL_DATA_LINK flag's value is 0. Fix it by returning an error if any link type flag is set. This doesn't introduce any regression, as nobody calls the media_create_pad_link() function with link type flags (easily checked by grepping for the flag in the source code, there are very few hits). Set the MEDIA_LNK_FL_DATA_LINK explicitly, which is a no-op that the compiler will optimize out, but is still useful to make the code more explicit and easier to understand. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/mc/mc-entity.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 20a2630455f2c..688780c8734d4 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -1017,6 +1017,11 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, struct media_link *link; struct media_link *backlink; + if (flags & MEDIA_LNK_FL_LINK_TYPE) + return -EINVAL; + + flags |= MEDIA_LNK_FL_DATA_LINK; + if (WARN_ON(!source || !sink) || WARN_ON(source_pad >= source->num_pads) || WARN_ON(sink_pad >= sink->num_pads)) @@ -1032,7 +1037,7 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, link->source = &source->pads[source_pad]; link->sink = &sink->pads[sink_pad]; - link->flags = flags & ~MEDIA_LNK_FL_INTERFACE_LINK; + link->flags = flags; /* Initialize graph object embedded at the new link */ media_gobj_create(source->graph_obj.mdev, MEDIA_GRAPH_LINK, -- GitLab From cff51913c5cc42b45ea010e91eb0c1e349d4292f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Jan 2024 00:30:02 +0200 Subject: [PATCH 1209/2290] media: mc: Add num_links flag to media_pad [ Upstream commit baeddf94aa61879b118f2faa37ed126d772670cc ] Maintain a counter of the links connected to a pad in the media_pad structure. This helps checking if a pad is connected to anything, which will be used in the pipeline building code. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/mc/mc-entity.c | 6 ++++++ include/media/media-entity.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 688780c8734d4..c7cb49205b017 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -957,6 +957,9 @@ static void __media_entity_remove_link(struct media_entity *entity, /* Remove the reverse links for a data link. */ if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == MEDIA_LNK_FL_DATA_LINK) { + link->source->num_links--; + link->sink->num_links--; + if (link->source->entity == entity) remote = link->sink->entity; else @@ -1068,6 +1071,9 @@ media_create_pad_link(struct media_entity *source, u16 source_pad, sink->num_links++; source->num_links++; + link->source->num_links++; + link->sink->num_links++; + return 0; } EXPORT_SYMBOL_GPL(media_create_pad_link); diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 28c9de8a1f348..03bb0963942bd 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -205,6 +205,7 @@ enum media_pad_signal_type { * @graph_obj: Embedded structure containing the media object common data * @entity: Entity this pad belongs to * @index: Pad index in the entity pads array, numbered from 0 to n + * @num_links: Number of links connected to this pad * @sig_type: Type of the signal inside a media pad * @flags: Pad flags, as defined in * :ref:`include/uapi/linux/media.h ` @@ -216,6 +217,7 @@ struct media_pad { struct media_gobj graph_obj; /* must be first field in struct */ struct media_entity *entity; u16 index; + u16 num_links; enum media_pad_signal_type sig_type; unsigned long flags; -- GitLab From 61656ca92ea9391ed1ea9da0fc38205a8e22c230 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Jan 2024 00:30:02 +0200 Subject: [PATCH 1210/2290] media: mc: Rename pad variable to clarify intent [ Upstream commit 9ec9109cf9f611e3ec9ed0355afcc7aae5e73176 ] The pad local variable in the media_pipeline_explore_next_link() function is used to store the pad through which the entity has been reached. Rename it to origin to reflect that and make the code easier to read. This will be even more important in subsequent commits when expanding the function with additional logic. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/mc/mc-entity.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index c7cb49205b017..50b68b4dde5d0 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -579,13 +579,13 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry = media_pipeline_walk_top(walk); - struct media_pad *pad; + struct media_pad *origin; struct media_link *link; struct media_pad *local; struct media_pad *remote; int ret; - pad = entry->pad; + origin = entry->pad; link = list_entry(entry->links, typeof(*link), list); media_pipeline_walk_pop(walk); @@ -595,7 +595,7 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, link->sink->entity->name, link->sink->index); /* Get the local pad and remote pad. */ - if (link->source->entity == pad->entity) { + if (link->source->entity == origin->entity) { local = link->source; remote = link->sink; } else { @@ -607,8 +607,9 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, * Skip links that originate from a different pad than the incoming pad * that is not connected internally in the entity to the incoming pad. */ - if (pad != local && - !media_entity_has_pad_interdep(pad->entity, pad->index, local->index)) { + if (origin != local && + !media_entity_has_pad_interdep(origin->entity, origin->index, + local->index)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (no route)\n"); return 0; -- GitLab From e2c545b841a7ae2c1796c9968fcf47166075cfb1 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Jan 2024 01:04:52 +0200 Subject: [PATCH 1211/2290] media: mc: Expand MUST_CONNECT flag to always require an enabled link [ Upstream commit b3decc5ce7d778224d266423b542326ad469cb5f ] The MEDIA_PAD_FL_MUST_CONNECT flag indicates that the pad requires an enabled link to stream, but only if it has any link at all. This makes little sense, as if a pad is part of a pipeline, there are very few use cases for an active link to be mandatory only if links exist at all. A review of in-tree drivers confirms they all need an enabled link for pads marked with the MEDIA_PAD_FL_MUST_CONNECT flag. Expand the scope of the flag by rejecting pads that have no links at all. This requires modifying the pipeline build code to add those pads to the pipeline. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- .../media/mediactl/media-types.rst | 11 ++-- drivers/media/mc/mc-entity.c | 53 +++++++++++++++---- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/Documentation/userspace-api/media/mediactl/media-types.rst b/Documentation/userspace-api/media/mediactl/media-types.rst index 0ffeece1e0c8e..6332e8395263b 100644 --- a/Documentation/userspace-api/media/mediactl/media-types.rst +++ b/Documentation/userspace-api/media/mediactl/media-types.rst @@ -375,12 +375,11 @@ Types and flags used to represent the media graph elements are origins of links. * - ``MEDIA_PAD_FL_MUST_CONNECT`` - - If this flag is set and the pad is linked to any other pad, then - at least one of those links must be enabled for the entity to be - able to stream. There could be temporary reasons (e.g. device - configuration dependent) for the pad to need enabled links even - when this flag isn't set; the absence of the flag doesn't imply - there is none. + - If this flag is set, then for this pad to be able to stream, it must + be connected by at least one enabled link. There could be temporary + reasons (e.g. device configuration dependent) for the pad to need + enabled links even when this flag isn't set; the absence of the flag + doesn't imply there is none. One and only one of ``MEDIA_PAD_FL_SINK`` and ``MEDIA_PAD_FL_SOURCE`` diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 50b68b4dde5d0..8919df09e3e8d 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -509,14 +509,15 @@ static int media_pipeline_walk_push(struct media_pipeline_walk *walk, /* * Move the top entry link cursor to the next link. If all links of the entry - * have been visited, pop the entry itself. + * have been visited, pop the entry itself. Return true if the entry has been + * popped. */ -static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) +static bool media_pipeline_walk_pop(struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry; if (WARN_ON(walk->stack.top < 0)) - return; + return false; entry = media_pipeline_walk_top(walk); @@ -526,7 +527,7 @@ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) walk->stack.top); walk->stack.top--; - return; + return true; } entry->links = entry->links->next; @@ -534,6 +535,8 @@ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) dev_dbg(walk->mdev->dev, "media pipeline: moved entry %u to next link\n", walk->stack.top); + + return false; } /* Free all memory allocated while walking the pipeline. */ @@ -583,11 +586,12 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, struct media_link *link; struct media_pad *local; struct media_pad *remote; + bool last_link; int ret; origin = entry->pad; link = list_entry(entry->links, typeof(*link), list); - media_pipeline_walk_pop(walk); + last_link = media_pipeline_walk_pop(walk); dev_dbg(walk->mdev->dev, "media pipeline: exploring link '%s':%u -> '%s':%u\n", @@ -612,7 +616,7 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, local->index)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (no route)\n"); - return 0; + goto done; } /* @@ -627,13 +631,44 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (disabled)\n"); - return 0; + goto done; } ret = media_pipeline_add_pad(pipe, walk, remote); if (ret) return ret; +done: + /* + * If we're done iterating over links, iterate over pads of the entity. + * This is necessary to discover pads that are not connected with any + * link. Those are dead ends from a pipeline exploration point of view, + * but are still part of the pipeline and need to be added to enable + * proper validation. + */ + if (!last_link) + return 0; + + dev_dbg(walk->mdev->dev, + "media pipeline: adding unconnected pads of '%s'\n", + local->entity->name); + + media_entity_for_each_pad(origin->entity, local) { + /* + * Skip the origin pad (already handled), pad that have links + * (already discovered through iterating over links) and pads + * not internally connected. + */ + if (origin == local || !local->num_links || + !media_entity_has_pad_interdep(origin->entity, origin->index, + local->index)) + continue; + + ret = media_pipeline_add_pad(pipe, walk, local); + if (ret) + return ret; + } + return 0; } @@ -745,7 +780,6 @@ __must_check int __media_pipeline_start(struct media_pad *pad, struct media_pad *pad = ppad->pad; struct media_entity *entity = pad->entity; bool has_enabled_link = false; - bool has_link = false; struct media_link *link; dev_dbg(mdev->dev, "Validating pad '%s':%u\n", pad->entity->name, @@ -775,7 +809,6 @@ __must_check int __media_pipeline_start(struct media_pad *pad, /* Record if the pad has links and enabled links. */ if (link->flags & MEDIA_LNK_FL_ENABLED) has_enabled_link = true; - has_link = true; /* * Validate the link if it's enabled and has the @@ -813,7 +846,7 @@ __must_check int __media_pipeline_start(struct media_pad *pad, * 3. If the pad has the MEDIA_PAD_FL_MUST_CONNECT flag set, * ensure that it has either no link or an enabled link. */ - if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && has_link && + if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && !has_enabled_link) { dev_dbg(mdev->dev, "Pad '%s':%u must be connected by an enabled link\n", -- GitLab From b54478d20375874aeee257744dedfd3e413432ff Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Jan 2024 17:15:30 -0800 Subject: [PATCH 1212/2290] KVM: Always flush async #PF workqueue when vCPU is being destroyed [ Upstream commit 3d75b8aa5c29058a512db29da7cbee8052724157 ] Always flush the per-vCPU async #PF workqueue when a vCPU is clearing its completion queue, e.g. when a VM and all its vCPUs is being destroyed. KVM must ensure that none of its workqueue callbacks is running when the last reference to the KVM _module_ is put. Gifting a reference to the associated VM prevents the workqueue callback from dereferencing freed vCPU/VM memory, but does not prevent the KVM module from being unloaded before the callback completes. Drop the misguided VM refcount gifting, as calling kvm_put_kvm() from async_pf_execute() if kvm_put_kvm() flushes the async #PF workqueue will result in deadlock. async_pf_execute() can't return until kvm_put_kvm() finishes, and kvm_put_kvm() can't return until async_pf_execute() finishes: WARNING: CPU: 8 PID: 251 at virt/kvm/kvm_main.c:1435 kvm_put_kvm+0x2d/0x320 [kvm] Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel kvm irqbypass CPU: 8 PID: 251 Comm: kworker/8:1 Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Workqueue: events async_pf_execute [kvm] RIP: 0010:kvm_put_kvm+0x2d/0x320 [kvm] Call Trace: async_pf_execute+0x198/0x260 [kvm] process_one_work+0x145/0x2d0 worker_thread+0x27e/0x3a0 kthread+0xba/0xe0 ret_from_fork+0x2d/0x50 ret_from_fork_asm+0x11/0x20 ---[ end trace 0000000000000000 ]--- INFO: task kworker/8:1:251 blocked for more than 120 seconds. Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/8:1 state:D stack:0 pid:251 ppid:2 flags:0x00004000 Workqueue: events async_pf_execute [kvm] Call Trace: __schedule+0x33f/0xa40 schedule+0x53/0xc0 schedule_timeout+0x12a/0x140 __wait_for_common+0x8d/0x1d0 __flush_work.isra.0+0x19f/0x2c0 kvm_clear_async_pf_completion_queue+0x129/0x190 [kvm] kvm_arch_destroy_vm+0x78/0x1b0 [kvm] kvm_put_kvm+0x1c1/0x320 [kvm] async_pf_execute+0x198/0x260 [kvm] process_one_work+0x145/0x2d0 worker_thread+0x27e/0x3a0 kthread+0xba/0xe0 ret_from_fork+0x2d/0x50 ret_from_fork_asm+0x11/0x20 If kvm_clear_async_pf_completion_queue() actually flushes the workqueue, then there's no need to gift async_pf_execute() a reference because all invocations of async_pf_execute() will be forced to complete before the vCPU and its VM are destroyed/freed. And that in turn fixes the module unloading bug as __fput() won't do module_put() on the last vCPU reference until the vCPU has been freed, e.g. if closing the vCPU file also puts the last reference to the KVM module. Note that kvm_check_async_pf_completion() may also take the work item off the completion queue and so also needs to flush the work queue, as the work will not be seen by kvm_clear_async_pf_completion_queue(). Waiting on the workqueue could theoretically delay a vCPU due to waiting for the work to complete, but that's a very, very small chance, and likely a very small delay. kvm_arch_async_page_present_queued() unconditionally makes a new request, i.e. will effectively delay entering the guest, so the remaining work is really just: trace_kvm_async_pf_completed(addr, cr2_or_gpa); __kvm_vcpu_wake_up(vcpu); mmput(mm); and mmput() can't drop the last reference to the page tables if the vCPU is still alive, i.e. the vCPU won't get stuck tearing down page tables. Add a helper to do the flushing, specifically to deal with "wakeup all" work items, as they aren't actually work items, i.e. are never placed in a workqueue. Trying to flush a bogus workqueue entry rightly makes __flush_work() complain (kudos to whoever added that sanity check). Note, commit 5f6de5cbebee ("KVM: Prevent module exit until all VMs are freed") *tried* to fix the module refcounting issue by having VMs grab a reference to the module, but that only made the bug slightly harder to hit as it gave async_pf_execute() a bit more time to complete before the KVM module could be unloaded. Fixes: af585b921e5d ("KVM: Halt vcpu if page it tries to access is swapped out") Cc: stable@vger.kernel.org Cc: David Matlack Reviewed-by: Xu Yilun Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240110011533.503302-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- virt/kvm/async_pf.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 9bfe1d6f6529a..adaf6f141804f 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -88,7 +88,27 @@ static void async_pf_execute(struct work_struct *work) __kvm_vcpu_wake_up(vcpu); mmput(mm); - kvm_put_kvm(vcpu->kvm); +} + +static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work) +{ + /* + * The async #PF is "done", but KVM must wait for the work item itself, + * i.e. async_pf_execute(), to run to completion. If KVM is a module, + * KVM must ensure *no* code owned by the KVM (the module) can be run + * after the last call to module_put(). Note, flushing the work item + * is always required when the item is taken off the completion queue. + * E.g. even if the vCPU handles the item in the "normal" path, the VM + * could be terminated before async_pf_execute() completes. + * + * Wake all events skip the queue and go straight done, i.e. don't + * need to be flushed (but sanity check that the work wasn't queued). + */ + if (work->wakeup_all) + WARN_ON_ONCE(work->work.func); + else + flush_work(&work->work); + kmem_cache_free(async_pf_cache, work); } void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) @@ -115,7 +135,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) #else if (cancel_work_sync(&work->work)) { mmput(work->mm); - kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } #endif @@ -127,7 +146,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_first_entry(&vcpu->async_pf.done, typeof(*work), link); list_del(&work->link); - kmem_cache_free(async_pf_cache, work); + + spin_unlock(&vcpu->async_pf.lock); + kvm_flush_and_free_async_pf_work(work); + spin_lock(&vcpu->async_pf.lock); } spin_unlock(&vcpu->async_pf.lock); @@ -152,7 +174,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->queue); vcpu->async_pf.queued--; - kmem_cache_free(async_pf_cache, work); + kvm_flush_and_free_async_pf_work(work); } } @@ -187,7 +209,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, work->arch = *arch; work->mm = current->mm; mmget(work->mm); - kvm_get_kvm(work->vcpu->kvm); INIT_WORK(&work->work, async_pf_execute); -- GitLab From 14cdbd9440a4c23f1347d7e97877953c9a5ab20c Mon Sep 17 00:00:00 2001 From: Tor Vic Date: Fri, 9 Feb 2024 16:42:26 +0100 Subject: [PATCH 1213/2290] cpufreq: amd-pstate: Fix min_perf assignment in amd_pstate_adjust_perf() [ Upstream commit b26ffbf800ae3c8d01bdf90d9cd8a37e1606ff06 ] In the function amd_pstate_adjust_perf(), the 'min_perf' variable is set to 'highest_perf' instead of 'lowest_perf'. Fixes: 1d215f0319c2 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State") Reported-by: Oleksandr Natalenko Reviewed-by: Perry Yuan Signed-off-by: Tor Vic Reviewed-by: Mario Limonciello Cc: 6.1+ # 6.1+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index edc294ee5a5bc..90dcf26f09731 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -320,7 +320,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (target_perf < capacity) des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); - min_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); if (_min_perf < capacity) min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); -- GitLab From 334fb14389b500a6dbf4e70ab2c3dabdddaa1d70 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Feb 2024 00:14:04 +1100 Subject: [PATCH 1214/2290] powerpc/smp: Adjust nr_cpu_ids to cover all threads of a core [ Upstream commit 5580e96dad5a439d561d9648ffcbccb739c2a120 ] If nr_cpu_ids is too low to include at least all the threads of a single core adjust nr_cpu_ids upwards. This avoids triggering odd bugs in code that assumes all threads of a core are available. Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://msgid.link/20231229120107.2281153-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/kernel/prom.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8537c354c560b..a64f4fb332893 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -369,6 +369,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (IS_ENABLED(CONFIG_PPC64)) boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); + if (nr_cpu_ids % nthreads != 0) { + set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads)); + pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n", + nr_cpu_ids); + } + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: -- GitLab From 605ddb3a6e5f2ba5c55b73e62dc2e6f2c127eee4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Feb 2024 00:14:04 +1100 Subject: [PATCH 1215/2290] powerpc/smp: Increase nr_cpu_ids to include the boot CPU [ Upstream commit 777f81f0a9c780a6443bcf2c7785f0cc2e87c1ef ] If nr_cpu_ids is too low to include the boot CPU adjust nr_cpu_ids upward. Otherwise the kernel will BUG when trying to allocate a paca for the boot CPU and fail to boot. Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://msgid.link/20231229120107.2281153-2-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/kernel/prom.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a64f4fb332893..9531ab90feb8a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -375,6 +375,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, nr_cpu_ids); } + if (boot_cpuid >= nr_cpu_ids) { + set_nr_cpu_ids(min(CONFIG_NR_CPUS, ALIGN(boot_cpuid + 1, nthreads))); + pr_warn("Boot CPU %d >= nr_cpu_ids, adjusted nr_cpu_ids to %d\n", + boot_cpuid, nr_cpu_ids); + } + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: -- GitLab From 0993f7f85d2650fdb2d91dfd6ffd7af991219be8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 10 Feb 2024 21:28:02 -0800 Subject: [PATCH 1216/2290] sparc64: NMI watchdog: fix return value of __setup handler [ Upstream commit 3ed7c61e49d65dacb96db798c0ab6fcd55a1f20f ] __setup() handlers should return 1 to obsolete_checksetup() in init/main.c to indicate that the boot option has been handled. A return of 0 causes the boot option/value to be listed as an Unknown kernel parameter and added to init's (limited) argument or environment strings. Also, error return codes don't mean anything to obsolete_checksetup() -- only non-zero (usually 1) or zero. So return 1 from setup_nmi_watchdog(). Fixes: e5553a6d0442 ("sparc64: Implement NMI watchdog on capable cpus.") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Cc: Sam Ravnborg Cc: Andrew Morton Cc: stable@vger.kernel.org Cc: Arnd Bergmann Cc: Andreas Larsson Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240211052802.22612-1-rdunlap@infradead.org Signed-off-by: Sasha Levin --- arch/sparc/kernel/nmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 060fff95a305c..fbf25e926f67c 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -274,7 +274,7 @@ static int __init setup_nmi_watchdog(char *str) if (!strncmp(str, "panic", 5)) panic_on_timeout = 1; - return 0; + return 1; } __setup("nmi_watchdog=", setup_nmi_watchdog); -- GitLab From 8557bef80cc3a3ac3aa2a53118294a024e277852 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 10 Feb 2024 21:28:08 -0800 Subject: [PATCH 1217/2290] sparc: vDSO: fix return value of __setup handler [ Upstream commit 5378f00c935bebb846b1fdb0e79cb76c137c56b5 ] __setup() handlers should return 1 to obsolete_checksetup() in init/main.c to indicate that the boot option has been handled. A return of 0 causes the boot option/value to be listed as an Unknown kernel parameter and added to init's (limited) argument or environment strings. Also, error return codes don't mean anything to obsolete_checksetup() -- only non-zero (usually 1) or zero. So return 1 from vdso_setup(). Fixes: 9a08862a5d2e ("vDSO for sparc") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Cc: Dan Carpenter Cc: Nick Alcock Cc: Sam Ravnborg Cc: Andrew Morton Cc: stable@vger.kernel.org Cc: Arnd Bergmann Cc: Andreas Larsson Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240211052808.22635-1-rdunlap@infradead.org Signed-off-by: Sasha Levin --- arch/sparc/vdso/vma.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index ae9a86cb6f3d9..2b97df0850aa7 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -449,9 +449,8 @@ static __init int vdso_setup(char *s) unsigned long val; err = kstrtoul(s, 10, &val); - if (err) - return err; - vdso_enabled = val; - return 0; + if (!err) + vdso_enabled = val; + return 1; } __setup("vdso=", vdso_setup); -- GitLab From efffffde33d64bbed0ccf35224b2e6c4a0923823 Mon Sep 17 00:00:00 2001 From: Svyatoslav Pankratov Date: Mon, 9 Oct 2023 13:27:19 +0100 Subject: [PATCH 1218/2290] crypto: qat - fix double free during reset [ Upstream commit 01aed663e6c421aeafc9c330bda630976b50a764 ] There is no need to free the reset_data structure if the recovery is unsuccessful and the reset is synchronous. The function adf_dev_aer_schedule_reset() handles the cleanup properly. Only asynchronous resets require such structure to be freed inside the reset worker. Fixes: d8cba25d2c68 ("crypto: qat - Intel(R) QAT driver framework") Signed-off-by: Svyatoslav Pankratov Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu Stable-dep-of: 7d42e097607c ("crypto: qat - resolve race condition during AER recovery") Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/adf_aer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index fe9bb2f3536a9..fa6b7ecd4c08d 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -95,7 +95,8 @@ static void adf_device_reset_worker(struct work_struct *work) if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) { /* The device hanged and we can't restart it so stop here */ dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - kfree(reset_data); + if (reset_data->mode == ADF_DEV_RESET_ASYNC) + kfree(reset_data); WARN(1, "QAT: device restart failed. Device is unusable\n"); return; } -- GitLab From 226fc408c5fcd23cc4186f05ea3a09a7a9aef2f7 Mon Sep 17 00:00:00 2001 From: Damian Muszynski Date: Fri, 9 Feb 2024 13:43:42 +0100 Subject: [PATCH 1219/2290] crypto: qat - resolve race condition during AER recovery [ Upstream commit 7d42e097607c4d246d99225bf2b195b6167a210c ] During the PCI AER system's error recovery process, the kernel driver may encounter a race condition with freeing the reset_data structure's memory. If the device restart will take more than 10 seconds the function scheduling that restart will exit due to a timeout, and the reset_data structure will be freed. However, this data structure is used for completion notification after the restart is completed, which leads to a UAF bug. This results in a KFENCE bug notice. BUG: KFENCE: use-after-free read in adf_device_reset_worker+0x38/0xa0 [intel_qat] Use-after-free read at 0x00000000bc56fddf (in kfence-#142): adf_device_reset_worker+0x38/0xa0 [intel_qat] process_one_work+0x173/0x340 To resolve this race condition, the memory associated to the container of the work_struct is freed on the worker if the timeout expired, otherwise on the function that schedules the worker. The timeout detection can be done by checking if the caller is still waiting for completion or not by using completion_done() function. Fixes: d8cba25d2c68 ("crypto: qat - Intel(R) QAT driver framework") Cc: Signed-off-by: Damian Muszynski Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/adf_aer.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index fa6b7ecd4c08d..4f36b5a9164a7 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -95,7 +95,8 @@ static void adf_device_reset_worker(struct work_struct *work) if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) { /* The device hanged and we can't restart it so stop here */ dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - if (reset_data->mode == ADF_DEV_RESET_ASYNC) + if (reset_data->mode == ADF_DEV_RESET_ASYNC || + completion_done(&reset_data->compl)) kfree(reset_data); WARN(1, "QAT: device restart failed. Device is unusable\n"); return; @@ -103,11 +104,19 @@ static void adf_device_reset_worker(struct work_struct *work) adf_dev_restarted_notify(accel_dev); clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); - /* The dev is back alive. Notify the caller if in sync mode */ - if (reset_data->mode == ADF_DEV_RESET_SYNC) - complete(&reset_data->compl); - else + /* + * The dev is back alive. Notify the caller if in sync mode + * + * If device restart will take a more time than expected, + * the schedule_reset() function can timeout and exit. This can be + * detected by calling the completion_done() function. In this case + * the reset_data structure needs to be freed here. + */ + if (reset_data->mode == ADF_DEV_RESET_ASYNC || + completion_done(&reset_data->compl)) kfree(reset_data); + else + complete(&reset_data->compl); } static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, @@ -140,8 +149,9 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, dev_err(&GET_DEV(accel_dev), "Reset device timeout expired\n"); ret = -EFAULT; + } else { + kfree(reset_data); } - kfree(reset_data); return ret; } return 0; -- GitLab From 81479bf75809808c69bb4beda4f3010700f9a16d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 19 Feb 2024 16:08:02 -0800 Subject: [PATCH 1220/2290] selftests/mqueue: Set timeout to 180 seconds [ Upstream commit 85506aca2eb4ea41223c91c5fe25125953c19b13 ] While mq_perf_tests runs with the default kselftest timeout limit, which is 45 seconds, the test takes about 60 seconds to complete on i3.metal AWS instances. Hence, the test always times out. Increase the timeout to 180 seconds. Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") Cc: # 5.4.x Signed-off-by: SeongJae Park Reviewed-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/mqueue/setting | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/mqueue/setting diff --git a/tools/testing/selftests/mqueue/setting b/tools/testing/selftests/mqueue/setting new file mode 100644 index 0000000000000..a953c96aa16e1 --- /dev/null +++ b/tools/testing/selftests/mqueue/setting @@ -0,0 +1 @@ +timeout=180 -- GitLab From 719d7f899df48d662af6c2d36b253934fe9c5040 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 1 Feb 2024 22:18:45 +0800 Subject: [PATCH 1221/2290] ext4: correct best extent lstart adjustment logic [ Upstream commit 4fbf8bc733d14bceb16dda46a3f5e19c6a9621c5 ] When yangerkun review commit 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()"), it was found that the best extent did not completely cover the original request after adjusting the best extent lstart in ext4_mb_new_inode_pa() as follows: original request: 2/10(8) normalized request: 0/64(64) best extent: 0/9(9) When we check if best ex can be kept at start of goal, ac_o_ex.fe_logical is 2 less than the adjusted best extent logical end 9, so we think the adjustment is done. But obviously 0/9(9) doesn't cover 2/10(8), so we should determine here if the original request logical end is less than or equal to the adjusted best extent logical end. In addition, add a comment stating when adjusted best_ex will not cover the original request, and remove the duplicate assertion because adjusting lstart makes no change to b_ex.fe_len. Link: https://lore.kernel.org/r/3630fa7f-b432-7afd-5f79-781bc3b2c5ea@huawei.com Fixes: 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()") Cc: Signed-off-by: yangerkun Signed-off-by: Baokun Li Reviewed-by: Jan Kara Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20240201141845.1879253-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6a3e27771df73..bc0ca45a5d817 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4684,10 +4684,16 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) .fe_len = ac->ac_g_ex.fe_len, }; loff_t orig_goal_end = extent_logical_end(sbi, &ex); + loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex); - /* we can't allocate as much as normalizer wants. - * so, found space must get proper lstart - * to cover original request */ + /* + * We can't allocate as much as normalizer wants, so we try + * to get proper lstart to cover the original request, except + * when the goal doesn't cover the original request as below: + * + * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048 + * best_ex:0/200(200) -> adjusted: 1848/2048(200) + */ BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); @@ -4699,7 +4705,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) * 1. Check if best ex can be kept at end of goal and still * cover original start * 2. Else, check if best ex can be kept at start of goal and - * still cover original start + * still cover original end * 3. Else, keep the best ex at start of original request. */ ex.fe_len = ac->ac_b_ex.fe_len; @@ -4709,7 +4715,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) goto adjust_bex; ex.fe_logical = ac->ac_g_ex.fe_logical; - if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex)) + if (o_ex_end <= extent_logical_end(sbi, &ex)) goto adjust_bex; ex.fe_logical = ac->ac_o_ex.fe_logical; @@ -4717,7 +4723,6 @@ adjust_bex: ac->ac_b_ex.fe_logical = ex.fe_logical; BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); - BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end); } -- GitLab From 5ea241b1931f368096856d4d8c6f653392370b17 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 22 Feb 2024 22:17:23 +0900 Subject: [PATCH 1222/2290] block: Clear zone limits for a non-zoned stacked queue [ Upstream commit c8f6f88d25929ad2f290b428efcae3b526f3eab0 ] Device mapper may create a non-zoned mapped device out of a zoned device (e.g., the dm-zoned target). In such case, some queue limit such as the max_zone_append_sectors and zone_write_granularity endup being non zero values for a block device that is not zoned. Avoid this by clearing these limits in blk_stack_limits() when the stacked zoned limit is false. Fixes: 3093a479727b ("block: inherit the zoned characteristics in blk_stack_limits") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240222131724.1803520-1-dlemoal@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-settings.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index bbca4ce77a2d3..c702f408bbc0a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -680,6 +680,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); t->zoned = max(t->zoned, b->zoned); + if (!t->zoned) { + t->zone_write_granularity = 0; + t->max_zone_append_sectors = 0; + } return ret; } EXPORT_SYMBOL(blk_stack_limits); -- GitLab From 03b821647b73b4fd6701b4b4e8936e42aa435959 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:15:52 +0100 Subject: [PATCH 1223/2290] kasan/test: avoid gcc warning for intentional overflow [ Upstream commit e10aea105e9ed14b62a11844fec6aaa87c6935a3 ] The out-of-bounds test allocates an object that is three bytes too short in order to validate the bounds checking. Starting with gcc-14, this causes a compile-time warning as gcc has grown smart enough to understand the sizeof() logic: mm/kasan/kasan_test.c: In function 'kmalloc_oob_16': mm/kasan/kasan_test.c:443:14: error: allocation of insufficient size '13' for type 'struct ' with size '16' [-Werror=alloc-size] 443 | ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); | ^ Hide the actual computation behind a RELOC_HIDE() that ensures the compiler misses the intentional bug. Link: https://lkml.kernel.org/r/20240212111609.869266-1-arnd@kernel.org Fixes: 3f15801cdc23 ("lib: add kasan test module") Signed-off-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Dmitry Vyukov Cc: Marco Elver Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/kasan/kasan_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 0d59098f08761..cef683a2e0d2e 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -415,7 +415,8 @@ static void kmalloc_oob_16(struct kunit *test) /* This test is specifically crafted for the generic mode. */ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + /* RELOC_HIDE to prevent gcc from warning about short alloc */ + ptr1 = RELOC_HIDE(kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL), 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); -- GitLab From 428ca0000f0abd5c99354c52a36becf2b815ca21 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 10 Oct 2023 15:55:49 +0100 Subject: [PATCH 1224/2290] bounds: support non-power-of-two CONFIG_NR_CPUS [ Upstream commit f2d5dcb48f7ba9e3ff249d58fc1fa963d374e66a ] ilog2() rounds down, so for example when PowerPC 85xx sets CONFIG_NR_CPUS to 24, we will only allocate 4 bits to store the number of CPUs instead of 5. Use bits_per() instead, which rounds up. Found by code inspection. The effect of this would probably be a misaccounting when doing NUMA balancing, so to a user, it would only be a performance penalty. The effects may be more wide-spread; it's hard to tell. Link: https://lkml.kernel.org/r/20231010145549.1244748-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Fixes: 90572890d202 ("mm: numa: Change page last {nid,pid} into {cpu,pid}") Reviewed-by: Rik van Riel Acked-by: Mel Gorman Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/bounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bounds.c b/kernel/bounds.c index b529182e8b04f..c5a9fcd2d6228 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -19,7 +19,7 @@ int main(void) DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); #ifdef CONFIG_SMP - DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); + DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN -- GitLab From c8cc05de8e6b5612b6e9f92c385c1a064b0db375 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 5 Feb 2024 13:26:26 +0100 Subject: [PATCH 1225/2290] fat: fix uninitialized field in nostale filehandles [ Upstream commit fde2497d2bc3a063d8af88b258dbadc86bd7b57c ] When fat_encode_fh_nostale() encodes file handle without a parent it stores only first 10 bytes of the file handle. However the length of the file handle must be a multiple of 4 so the file handle is actually 12 bytes long and the last two bytes remain uninitialized. This is not great at we potentially leak uninitialized information with the handle to userspace. Properly initialize the full handle length. Link: https://lkml.kernel.org/r/20240205122626.13701-1-jack@suse.cz Reported-by: syzbot+3ce5dea5b1539ff36769@syzkaller.appspotmail.com Fixes: ea3983ace6b7 ("fat: restructure export_operations") Signed-off-by: Jan Kara Acked-by: OGAWA Hirofumi Cc: Amir Goldstein Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/fat/nfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index af191371c3529..bab63eeaf9cbc 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -130,6 +130,12 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp, fid->parent_i_gen = parent->i_generation; type = FILEID_FAT_WITH_PARENT; *lenp = FAT_FID_SIZE_WITH_PARENT; + } else { + /* + * We need to initialize this field because the fh is actually + * 12 bytes long + */ + fid->parent_i_pos_hi = 0; } return type; -- GitLab From 142d87c958d9454c3cffa625fab56f3016e8f9f3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 24 Jan 2024 17:52:44 +0000 Subject: [PATCH 1226/2290] ubifs: Set page uptodate in the correct place [ Upstream commit 723012cab779eee8228376754e22c6594229bf8f ] Page cache reads are lockless, so setting the freshly allocated page uptodate before we've overwritten it with the data it's supposed to have in it will allow a simultaneous reader to see old data. Move the call to SetPageUptodate into ubifs_write_end(), which is after we copied the new data into the page. Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- fs/ubifs/file.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 10c1779af9c51..f7b1f9ece1364 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -261,9 +261,6 @@ static int write_begin_slow(struct address_space *mapping, return err; } } - - SetPageUptodate(page); - ClearPageError(page); } if (PagePrivate(page)) @@ -462,9 +459,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return err; } } - - SetPageUptodate(page); - ClearPageError(page); } err = allocate_budget(c, page, ui, appending); @@ -474,10 +468,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * If we skipped reading the page because we were going to * write all of it, then it is not up to date. */ - if (skipped_read) { + if (skipped_read) ClearPageChecked(page); - ClearPageUptodate(page); - } /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the @@ -568,6 +560,9 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, goto out; } + if (len == PAGE_SIZE) + SetPageUptodate(page); + if (!PagePrivate(page)) { attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); -- GitLab From 5ecbc7465f66bb7eb2be2cd423f84000872e3560 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 24 Jan 2024 07:37:02 +0100 Subject: [PATCH 1227/2290] ubi: Check for too small LEB size in VTBL code [ Upstream commit 68a24aba7c593eafa8fd00f2f76407b9b32b47a9 ] If the LEB size is smaller than a volume table record we cannot have volumes. In this case abort attaching. Cc: Chenyuan Yang Cc: stable@vger.kernel.org Fixes: 801c135ce73d ("UBI: Unsorted Block Images") Reported-by: Chenyuan Yang Closes: https://lore.kernel.org/linux-mtd/1433EB7A-FC89-47D6-8F47-23BE41B263B3@illinois.edu/ Signed-off-by: Richard Weinberger Reviewed-by: Zhihao Cheng Signed-off-by: Sasha Levin --- drivers/mtd/ubi/vtbl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index f700f0e4f2ec4..6e5489e233dd2 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -791,6 +791,12 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) * The number of supported volumes is limited by the eraseblock size * and by the UBI_MAX_VOLUMES constant. */ + + if (ubi->leb_size < UBI_VTBL_RECORD_SIZE) { + ubi_err(ubi, "LEB size too small for a volume record"); + return -EINVAL; + } + ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; if (ubi->vtbl_slots > UBI_MAX_VOLUMES) ubi->vtbl_slots = UBI_MAX_VOLUMES; -- GitLab From 5d1442eeb93689902b8fea2efe041d3054814c86 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 20 Feb 2024 10:49:03 +0800 Subject: [PATCH 1228/2290] ubi: correct the calculation of fastmap size [ Upstream commit 7f174ae4f39e8475adcc09d26c5a43394689ad6c ] Now that the calculation of fastmap size in ubi_calc_fm_size() is incorrect since it miss each user volume's ubi_fm_eba structure and the Internal UBI volume info. Let's correct the calculation. Cc: stable@vger.kernel.org Signed-off-by: Zhang Yi Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- drivers/mtd/ubi/fastmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index ca2d9efe62c3c..1060e19205d2a 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -85,9 +85,10 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi) sizeof(struct ubi_fm_scan_pool) + sizeof(struct ubi_fm_scan_pool) + (ubi->peb_count * sizeof(struct ubi_fm_ec)) + - (sizeof(struct ubi_fm_eba) + - (ubi->peb_count * sizeof(__be32))) + - sizeof(struct ubi_fm_volhdr) * UBI_MAX_VOLUMES; + ((sizeof(struct ubi_fm_eba) + + sizeof(struct ubi_fm_volhdr)) * + (UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT)) + + (ubi->peb_count * sizeof(__be32)); return roundup(size, ubi->leb_size); } -- GitLab From ffbfea10d9c98c98e630e658407da52f341d3ff0 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sun, 11 Feb 2024 00:45:51 +0300 Subject: [PATCH 1229/2290] mtd: rawnand: meson: fix scrambling mode value in command macro [ Upstream commit ef6f463599e16924cdd02ce5056ab52879dc008c ] Scrambling mode is enabled by value (1 << 19). NFC_CMD_SCRAMBLER_ENABLE is already (1 << 19), so there is no need to shift it again in CMDRWGEN macro. Signed-off-by: Arseniy Krasnov Cc: Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240210214551.441610-1-avkrasnov@salutedevices.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/meson_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 0aeac8ccbd0ee..05925fb694602 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -63,7 +63,7 @@ #define CMDRWGEN(cmd_dir, ran, bch, short_mode, page_size, pages) \ ( \ (cmd_dir) | \ - ((ran) << 19) | \ + (ran) | \ ((bch) << 14) | \ ((short_mode) << 13) | \ (((page_size) & 0x7f) << 6) | \ -- GitLab From 10857a2412fc123bb6d868698ace39e6b2ffb9fe Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 16 Feb 2024 14:26:55 +0100 Subject: [PATCH 1230/2290] parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd() [ Upstream commit e5db6a74571a8baf87a116ea39aab946283362ff ] Convert to use real temp variables instead of clobbering processor registers. This aligns the 64-bit inline assembly code with the 32-bit assembly code which was rewritten with commit 427c1073a2a1 ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()"). While at it, fix comment in 32-bit rewrite code. Temporary variables are now used for both 32-bit and 64-bit code, so move their declarations to the function header. No functional change intended. Signed-off-by: Guenter Roeck Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/kernel/unaligned.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 8a8e7d7224a26..782ee05e20889 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -167,6 +167,7 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) { unsigned long saddr = regs->ior; + unsigned long shift, temp1; __u64 val = 0; ASM_EXCEPTIONTABLE_VAR(ret); @@ -178,25 +179,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) #ifdef CONFIG_64BIT __asm__ __volatile__ ( -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ -" mtsp %4, %%sr1\n" -" depd %%r0,63,3,%3\n" -"1: ldd 0(%%sr1,%3),%0\n" -"2: ldd 8(%%sr1,%3),%%r20\n" -" subi 64,%%r19,%%r19\n" -" mtsar %%r19\n" -" shrpd %0,%%r20,%%sar,%0\n" +" depd,z %2,60,3,%3\n" /* shift=(ofs&7)*8 */ +" mtsp %5, %%sr1\n" +" depd %%r0,63,3,%2\n" +"1: ldd 0(%%sr1,%2),%0\n" +"2: ldd 8(%%sr1,%2),%4\n" +" subi 64,%3,%3\n" +" mtsar %3\n" +" shrpd %0,%4,%%sar,%0\n" "3: \n" ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") - : "=r" (val), "+r" (ret) - : "0" (val), "r" (saddr), "r" (regs->isr) - : "r19", "r20" ); + : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) + : "r" (regs->isr) ); #else - { - unsigned long shift, temp1; __asm__ __volatile__ ( -" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */ +" zdep %2,29,2,%3\n" /* shift=(ofs&3)*8 */ " mtsp %5, %%sr1\n" " dep %%r0,31,2,%2\n" "1: ldw 0(%%sr1,%2),%0\n" @@ -212,7 +210,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); - } #endif DPRINTF("val = 0x%llx\n", val); -- GitLab From 6eb684e9c0d0b83ca8b232456258c28ebb6c53f1 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 23 Feb 2024 16:40:51 +0100 Subject: [PATCH 1231/2290] parisc: Avoid clobbering the C/B bits in the PSW with tophys and tovirt macros [ Upstream commit 4603fbaa76b5e703b38ac8cc718102834eb6e330 ] Use add,l to avoid clobbering the C/B bits in the PSW. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Sasha Levin --- arch/parisc/include/asm/assembly.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 5937d5edaba1e..000a28e1c5e8d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -97,26 +97,28 @@ * version takes two arguments: a src and destination register. * However, the source and destination registers can not be * the same register. + * + * We use add,l to avoid clobbering the C/B bits in the PSW. */ .macro tophys grvirt, grphys - ldil L%(__PAGE_OFFSET), \grphys - sub \grvirt, \grphys, \grphys + ldil L%(-__PAGE_OFFSET), \grphys + addl \grvirt, \grphys, \grphys .endm - + .macro tovirt grphys, grvirt ldil L%(__PAGE_OFFSET), \grvirt - add \grphys, \grvirt, \grvirt + addl \grphys, \grvirt, \grvirt .endm .macro tophys_r1 gr - ldil L%(__PAGE_OFFSET), %r1 - sub \gr, %r1, \gr + ldil L%(-__PAGE_OFFSET), %r1 + addl \gr, %r1, \gr .endm - + .macro tovirt_r1 gr ldil L%(__PAGE_OFFSET), %r1 - add \gr, %r1, \gr + addl \gr, %r1, \gr .endm .macro delay value -- GitLab From 3b64d68d90f5ebb90be5423215ea83126fcd46bb Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 10 Feb 2024 09:55:26 -0800 Subject: [PATCH 1232/2290] parisc: Fix ip_fast_csum [ Upstream commit a2abae8f0b638c31bb9799d9dd847306e0d005bd ] IP checksum unit tests report the following error when run on hppa/hppa64. # test_ip_fast_csum: ASSERTION FAILED at lib/checksum_kunit.c:463 Expected ( u64)csum_result == ( u64)expected, but ( u64)csum_result == 33754 (0x83da) ( u64)expected == 10946 (0x2ac2) not ok 4 test_ip_fast_csum 0x83da is the expected result if the IP header length is 20 bytes. 0x2ac2 is the expected result if the IP header length is 24 bytes. The test fails with an IP header length of 24 bytes. It appears that ip_fast_csum() always returns the checksum for a 20-byte header, no matter how long the header actually is. Code analysis shows a suspicious assembler sequence in ip_fast_csum(). " addc %0, %3, %0\n" "1: ldws,ma 4(%1), %3\n" " addib,< 0, %2, 1b\n" <--- While my understanding of HPPA assembler is limited, it does not seem to make much sense to subtract 0 from a register and to expect the result to ever be negative. Subtracting 1 from the length parameter makes more sense. On top of that, the operation should be repeated if and only if the result is still > 0, so change the suspicious instruction to " addib,> -1, %2, 1b\n" The IP checksum unit test passes after this change. Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Tested-by: Charlie Jenkins Reviewed-by: Charlie Jenkins Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index 3c43baca7b397..f705e5dd10742 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -40,7 +40,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) " addc %0, %5, %0\n" " addc %0, %3, %0\n" "1: ldws,ma 4(%1), %3\n" -" addib,< 0, %2, 1b\n" +" addib,> -1, %2, 1b\n" " addc %0, %3, %0\n" "\n" " extru %0, 31, 16, %4\n" -- GitLab From a5d32783a5665a1c63eb9d2bebb7f9001183e9ed Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 10 Feb 2024 11:15:56 -0800 Subject: [PATCH 1233/2290] parisc: Fix csum_ipv6_magic on 32-bit systems [ Upstream commit 4408ba75e4ba80c91fde7e10bccccf388f5c09be ] Calculating the IPv6 checksum on 32-bit systems missed overflows when adding the proto+len fields into the checksum. This results in the following unit test failure. # test_csum_ipv6_magic: ASSERTION FAILED at lib/checksum_kunit.c:506 Expected ( u64)csum_result == ( u64)expected, but ( u64)csum_result == 46722 (0xb682) ( u64)expected == 46721 (0xb681) not ok 5 test_csum_ipv6_magic This is probably rarely seen in the real world because proto+len are usually small values which will rarely result in overflows when calculating the checksum. However, the unit test code uses large values for the length field, causing the test to fail. Fix the problem by adding the missing carry into the final checksum. Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Tested-by: Charlie Jenkins Reviewed-by: Charlie Jenkins Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/checksum.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index f705e5dd10742..e619e67440db9 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -163,7 +163,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " ldw,ma 4(%2), %7\n" /* 4th daddr */ " addc %6, %0, %0\n" " addc %7, %0, %0\n" -" addc %3, %0, %0\n" /* fold in proto+len, catch carry */ +" addc %3, %0, %0\n" /* fold in proto+len */ +" addc 0, %0, %0\n" /* add carry */ #endif : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len), -- GitLab From 053bb9aab73dd2f39ae26e85c35d3e11f4c6ea82 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 13 Feb 2024 15:46:31 -0800 Subject: [PATCH 1234/2290] parisc: Fix csum_ipv6_magic on 64-bit systems [ Upstream commit 4b75b12d70506e31fc02356bbca60f8d5ca012d0 ] hppa 64-bit systems calculates the IPv6 checksum using 64-bit add operations. The last add folds protocol and length fields into the 64-bit result. While unlikely, this operation can overflow. The overflow can be triggered with a code sequence such as the following. /* try to trigger massive overflows */ memset(tmp_buf, 0xff, sizeof(struct in6_addr)); csum_result = csum_ipv6_magic((struct in6_addr *)tmp_buf, (struct in6_addr *)tmp_buf, 0xffff, 0xff, 0xffffffff); Fix the problem by adding any overflows from the final add operation into the calculated checksum. Fortunately, we can do this without additional cost by replacing the add operation used to fold the checksum into 32 bit with "add,dc" to add in the missing carry. Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Reviewed-by: Charlie Jenkins Tested-by: Guenter Roeck Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/checksum.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index e619e67440db9..c949aa20fa162 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -137,8 +137,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */ " extrd,u %0, 31, 32, %4\n"/* copy upper half down */ " depdi 0, 31, 32, %0\n"/* clear upper half */ -" add %4, %0, %0\n" /* fold into 32-bits */ -" addc 0, %0, %0\n" /* add carry */ +" add,dc %4, %0, %0\n" /* fold into 32-bits, plus carry */ +" addc 0, %0, %0\n" /* add final carry */ #else -- GitLab From ceffd026f851a86a0bf6f1a9a11c514eb7803e4a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 27 Feb 2024 12:33:51 -0800 Subject: [PATCH 1235/2290] parisc: Strip upper 32 bit of sum in csum_ipv6_magic for 64-bit builds [ Upstream commit 0568b6f0d863643db2edcc7be31165740c89fa82 ] IPv6 checksum tests with unaligned addresses on 64-bit builds result in unexpected failures. Expected expected == csum_result, but expected == 46591 (0xb5ff) csum_result == 46381 (0xb52d) with alignment offset 1 Oddly enough, the problem disappeared after adding test code into the beginning of csum_ipv6_magic(). As it turns out, the 'sum' parameter of csum_ipv6_magic() is declared as __wsum, which is a 32-bit variable. However, it is treated as 64-bit variable in the 64-bit assembler code. Tests showed that the upper 32 bit of the register used to pass the variable are _not_ cleared when entering the function. This can result in checksum calculation errors. Clearing the upper 32 bit of 'sum' as first operation in the assembler code fixes the problem. Acked-by: Helge Deller Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/checksum.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index c949aa20fa162..2aceebcd695c8 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -126,6 +126,7 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, ** Try to keep 4 registers with "live" values ahead of the ALU. */ +" depdi 0, 31, 32, %0\n"/* clear upper half of incoming checksum */ " ldd,ma 8(%1), %4\n" /* get 1st saddr word */ " ldd,ma 8(%2), %5\n" /* get 1st daddr word */ " add %4, %0, %0\n" -- GitLab From 9477cfeb300823461b44223a7d5fac26a31df4fe Mon Sep 17 00:00:00 2001 From: Gui-Dong Han <2045gemini@gmail.com> Date: Fri, 12 Jan 2024 15:10:17 +0800 Subject: [PATCH 1236/2290] md/raid5: fix atomicity violation in raid5_cache_count [ Upstream commit dfd2bf436709b2bccb78c2dda550dde93700efa7 ] In raid5_cache_count(): if (conf->max_nr_stripes < conf->min_nr_stripes) return 0; return conf->max_nr_stripes - conf->min_nr_stripes; The current check is ineffective, as the values could change immediately after being checked. In raid5_set_cache_size(): ... conf->min_nr_stripes = size; ... while (size > conf->max_nr_stripes) conf->min_nr_stripes = conf->max_nr_stripes; ... Due to intermediate value updates in raid5_set_cache_size(), concurrent execution of raid5_cache_count() and raid5_set_cache_size() may lead to inconsistent reads of conf->max_nr_stripes and conf->min_nr_stripes. The current checks are ineffective as values could change immediately after being checked, raising the risk of conf->min_nr_stripes exceeding conf->max_nr_stripes and potentially causing an integer overflow. This possible bug is found by an experimental static analysis tool developed by our team. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including data races and atomicity violations. The above possible bug is reported when our tool analyzes the source code of Linux 6.2. To resolve this issue, it is suggested to introduce local variables 'min_stripes' and 'max_stripes' in raid5_cache_count() to ensure the values remain stable throughout the check. Adding locks in raid5_cache_count() fails to resolve atomicity violations, as raid5_set_cache_size() may hold intermediate values of conf->min_nr_stripes while unlocked. With this patch applied, our tool no longer reports the bug, with the kernel configuration allyesconfig for x86_64. Due to the lack of associated hardware, we cannot test the patch in runtime testing, and just verify it according to the code logic. Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.") Cc: stable@vger.kernel.org Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240112071017.16313-1-2045gemini@gmail.com Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid5.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e4564ca1f2434..8cf2317857e0a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2420,7 +2420,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) atomic_inc(&conf->active_stripes); raid5_release_stripe(sh); - conf->max_nr_stripes++; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1); return 1; } @@ -2717,7 +2717,7 @@ static int drop_one_stripe(struct r5conf *conf) shrink_buffers(sh); free_stripe(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); - conf->max_nr_stripes--; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1); return 1; } @@ -6891,7 +6891,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) if (size <= 16 || size > 32768) return -EINVAL; - conf->min_nr_stripes = size; + WRITE_ONCE(conf->min_nr_stripes, size); mutex_lock(&conf->cache_size_mutex); while (size < conf->max_nr_stripes && drop_one_stripe(conf)) @@ -6903,7 +6903,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) if (!grow_one_stripe(conf, GFP_KERNEL)) { - conf->min_nr_stripes = conf->max_nr_stripes; + WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes); result = -ENOMEM; break; } @@ -7468,11 +7468,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); + int max_stripes = READ_ONCE(conf->max_nr_stripes); + int min_stripes = READ_ONCE(conf->min_nr_stripes); - if (conf->max_nr_stripes < conf->min_nr_stripes) + if (max_stripes < min_stripes) /* unlikely, but not impossible */ return 0; - return conf->max_nr_stripes - conf->min_nr_stripes; + return max_stripes - min_stripes; } static struct r5conf *setup_conf(struct mddev *mddev) -- GitLab From 56452891e262fdbcc07a4d7d66deef7c0e444f0c Mon Sep 17 00:00:00 2001 From: Shivnandan Kumar Date: Tue, 27 Feb 2024 14:43:51 +0530 Subject: [PATCH 1237/2290] cpufreq: Limit resolving a frequency to policy min/max [ Upstream commit d394abcb12bb1a6f309c1221fdb8e73594ecf1b4 ] Resolving a frequency to an efficient one should not transgress policy->max (which can be set for thermal reason) and policy->min. Currently, there is possibility where scaling_cur_freq can exceed scaling_max_freq when scaling_max_freq is an inefficient frequency. Add a check to ensure that resolving a frequency will respect policy->min/max. Cc: All applicable Fixes: 1f39fa0dccff ("cpufreq: Introducing CPUFREQ_RELATION_E") Signed-off-by: Shivnandan Kumar [ rjw: Whitespace adjustment, changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- include/linux/cpufreq.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d5595d57f4e53..9d208648c84d5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1023,6 +1023,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +{ + unsigned int freq; + + if (idx < 0) + return false; + + freq = policy->freq_table[idx].frequency; + + return freq == clamp_val(freq, policy->min, policy->max); +} + static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -1056,7 +1068,8 @@ retry: return 0; } - if (idx < 0 && efficiencies) { + /* Limit frequency index to honor policy->min/max */ + if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { efficiencies = false; goto retry; } -- GitLab From a7b6523f92001a538b74edf85f1384cab4fada5a Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Thu, 29 Feb 2024 12:14:59 +0530 Subject: [PATCH 1238/2290] PM: suspend: Set mem_sleep_current during kernel command line setup [ Upstream commit 9bc4ffd32ef8943f5c5a42c9637cfd04771d021b ] psci_init_system_suspend() invokes suspend_set_ops() very early during bootup even before kernel command line for mem_sleep_default is setup. This leads to kernel command line mem_sleep_default=s2idle not working as mem_sleep_current gets changed to deep via suspend_set_ops() and never changes back to s2idle. Set mem_sleep_current along with mem_sleep_default during kernel command line setup as default suspend mode. Fixes: faf7ec4a92c0 ("drivers: firmware: psci: add system suspend support") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Maulik Shah Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- kernel/power/suspend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index fa3bf161d13f7..a718067deecee 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -192,6 +192,7 @@ static int __init mem_sleep_default_setup(char *str) if (mem_sleep_labels[state] && !strcmp(str, mem_sleep_labels[state])) { mem_sleep_default = state; + mem_sleep_current = state; break; } -- GitLab From 852db52b45ea96dac2720f108e7c7331cd3738bb Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 29 Feb 2024 19:07:47 +0100 Subject: [PATCH 1239/2290] clk: qcom: gcc-ipq6018: fix terminating of frequency table arrays [ Upstream commit cdbc6e2d8108bc47895e5a901cfcaf799b00ca8d ] The frequency table arrays are supposed to be terminated with an empty element. Add such entry to the end of the arrays where it is missing in order to avoid possible out-of-bound access when the table is traversed by functions like qcom_find_freq() or qcom_find_freq_floor(). Only compile tested. Fixes: d9db07f088af ("clk: qcom: Add ipq6018 Global Clock Controller support") Signed-off-by: Gabor Juhos Reviewed-by: Stephen Boyd Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240229-freq-table-terminator-v1-2-074334f0905c@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-ipq6018.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gcc-ipq6018.c b/drivers/clk/qcom/gcc-ipq6018.c index 4c5c7a8f41d08..b9844e41cf99d 100644 --- a/drivers/clk/qcom/gcc-ipq6018.c +++ b/drivers/clk/qcom/gcc-ipq6018.c @@ -1557,6 +1557,7 @@ static struct clk_regmap_div nss_ubi0_div_clk_src = { static const struct freq_tbl ftbl_pcie_aux_clk_src[] = { F(24000000, P_XO, 1, 0, 0), + { } }; static const struct clk_parent_data gcc_xo_gpll0_core_pi_sleep_clk[] = { @@ -1737,6 +1738,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = { F(160000000, P_GPLL0, 5, 0, 0), F(216000000, P_GPLL6, 5, 0, 0), F(308570000, P_GPLL6, 3.5, 0, 0), + { } }; static const struct clk_parent_data gcc_xo_gpll0_gpll6_gpll0_div2[] = { -- GitLab From dd92b159c506804ac57adf3742d9728298bb1255 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 29 Feb 2024 19:07:48 +0100 Subject: [PATCH 1240/2290] clk: qcom: gcc-ipq8074: fix terminating of frequency table arrays [ Upstream commit 1040ef5ed95d6fd2628bad387d78a61633e09429 ] The frequency table arrays are supposed to be terminated with an empty element. Add such entry to the end of the arrays where it is missing in order to avoid possible out-of-bound access when the table is traversed by functions like qcom_find_freq() or qcom_find_freq_floor(). Only compile tested. Fixes: 9607f6224b39 ("clk: qcom: ipq8074: add PCIE, USB and SDCC clocks") Signed-off-by: Gabor Juhos Reviewed-by: Stephen Boyd Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240229-freq-table-terminator-v1-3-074334f0905c@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-ipq8074.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index b2e83b38976e5..b52c923a2fbca 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -973,6 +973,7 @@ static struct clk_rcg2 pcie0_axi_clk_src = { static const struct freq_tbl ftbl_pcie_aux_clk_src[] = { F(19200000, P_XO, 1, 0, 0), + { } }; static struct clk_rcg2 pcie0_aux_clk_src = { @@ -1078,6 +1079,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = { F(19200000, P_XO, 1, 0, 0), F(160000000, P_GPLL0, 5, 0, 0), F(308570000, P_GPLL6, 3.5, 0, 0), + { } }; static struct clk_rcg2 sdcc1_ice_core_clk_src = { -- GitLab From 185de0b7cdeaad8b89ebd4c8a258ff2f21adba99 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 29 Feb 2024 19:07:51 +0100 Subject: [PATCH 1241/2290] clk: qcom: mmcc-apq8084: fix terminating of frequency table arrays [ Upstream commit a903cfd38d8dee7e754fb89fd1bebed99e28003d ] The frequency table arrays are supposed to be terminated with an empty element. Add such entry to the end of the arrays where it is missing in order to avoid possible out-of-bound access when the table is traversed by functions like qcom_find_freq() or qcom_find_freq_floor(). Only compile tested. Fixes: 2b46cd23a5a2 ("clk: qcom: Add APQ8084 Multimedia Clock Controller (MMCC) support") Signed-off-by: Gabor Juhos Reviewed-by: Stephen Boyd Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240229-freq-table-terminator-v1-6-074334f0905c@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/mmcc-apq8084.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/mmcc-apq8084.c b/drivers/clk/qcom/mmcc-apq8084.c index e9f9713591558..5f373c10ec6ee 100644 --- a/drivers/clk/qcom/mmcc-apq8084.c +++ b/drivers/clk/qcom/mmcc-apq8084.c @@ -334,6 +334,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = { F(333430000, P_MMPLL1, 3.5, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), F(466800000, P_MMPLL1, 2.5, 0, 0), + { } }; static struct clk_rcg2 mmss_axi_clk_src = { @@ -358,6 +359,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = { F(150000000, P_GPLL0, 4, 0, 0), F(228570000, P_MMPLL0, 3.5, 0, 0), F(320000000, P_MMPLL0, 2.5, 0, 0), + { } }; static struct clk_rcg2 ocmemnoc_clk_src = { -- GitLab From 537040c257ab4cd0673fbae048f3940c8ea2e589 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 29 Feb 2024 19:07:52 +0100 Subject: [PATCH 1242/2290] clk: qcom: mmcc-msm8974: fix terminating of frequency table arrays [ Upstream commit e2c02a85bf53ae86d79b5fccf0a75ac0b78e0c96 ] The frequency table arrays are supposed to be terminated with an empty element. Add such entry to the end of the arrays where it is missing in order to avoid possible out-of-bound access when the table is traversed by functions like qcom_find_freq() or qcom_find_freq_floor(). Only compile tested. Fixes: d8b212014e69 ("clk: qcom: Add support for MSM8974's multimedia clock controller (MMCC)") Signed-off-by: Gabor Juhos Reviewed-by: Stephen Boyd Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240229-freq-table-terminator-v1-7-074334f0905c@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/mmcc-msm8974.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/mmcc-msm8974.c b/drivers/clk/qcom/mmcc-msm8974.c index 17ed52046170a..eb2b0e2200d23 100644 --- a/drivers/clk/qcom/mmcc-msm8974.c +++ b/drivers/clk/qcom/mmcc-msm8974.c @@ -279,6 +279,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = { F(291750000, P_MMPLL1, 4, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), F(466800000, P_MMPLL1, 2.5, 0, 0), + { } }; static struct clk_rcg2 mmss_axi_clk_src = { @@ -303,6 +304,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = { F(150000000, P_GPLL0, 4, 0, 0), F(291750000, P_MMPLL1, 4, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), + { } }; static struct clk_rcg2 ocmemnoc_clk_src = { -- GitLab From b2c898469dfc388f619c6c972a28466cbb1442ea Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Thu, 29 Feb 2024 16:14:38 +0200 Subject: [PATCH 1243/2290] usb: xhci: Add error handling in xhci_map_urb_for_dma [ Upstream commit be95cc6d71dfd0cba66e3621c65413321b398052 ] Currently xhci_map_urb_for_dma() creates a temporary buffer and copies the SG list to the new linear buffer. But if the kzalloc_node() fails, then the following sg_pcopy_to_buffer() can lead to crash since it tries to memcpy to NULL pointer. So return -ENOMEM if kzalloc returns null pointer. Cc: stable@vger.kernel.org # 5.11 Fixes: 2017a1e58472 ("usb: xhci: Use temporary buffer to consolidate SG") Signed-off-by: Prashanth K Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240229141438.619372-10-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index c02ad4f76bb3c..565aba6b99860 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1334,6 +1334,8 @@ static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb) temp = kzalloc_node(buf_len, GFP_ATOMIC, dev_to_node(hcd->self.sysdev)); + if (!temp) + return -ENOMEM; if (usb_urb_dir_out(urb)) sg_pcopy_to_buffer(urb->sg, urb->num_sgs, -- GitLab From 33a4aa08d5f0a816323510b40924f4bd90e28e64 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 29 Feb 2024 23:25:19 +1100 Subject: [PATCH 1244/2290] powerpc/fsl: Fix mfpmr build errors with newer binutils [ Upstream commit 5f491356b7149564ab22323ccce79c8d595bfd0c ] Binutils 2.38 complains about the use of mfpmr when building ppc6xx_defconfig: CC arch/powerpc/kernel/pmc.o {standard input}: Assembler messages: {standard input}:45: Error: unrecognized opcode: `mfpmr' {standard input}:56: Error: unrecognized opcode: `mtpmr' This is because by default the kernel is built with -mcpu=powerpc, and the mt/mfpmr instructions are not defined. It can be avoided by enabling CONFIG_E300C3_CPU, but just adding that to the defconfig will leave open the possibility of randconfig failures. So add machine directives around the mt/mfpmr instructions to tell binutils how to assemble them. Cc: stable@vger.kernel.org Reported-by: Jan-Benedict Glaw Signed-off-by: Michael Ellerman Link: https://msgid.link/20240229122521.762431-3-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/reg_fsl_emb.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h index a21f529c43d96..8359c06d92d9f 100644 --- a/arch/powerpc/include/asm/reg_fsl_emb.h +++ b/arch/powerpc/include/asm/reg_fsl_emb.h @@ -12,9 +12,16 @@ #ifndef __ASSEMBLY__ /* Performance Monitor Registers */ #define mfpmr(rn) ({unsigned int rval; \ - asm volatile("mfpmr %0," __stringify(rn) \ + asm volatile(".machine push; " \ + ".machine e300; " \ + "mfpmr %0," __stringify(rn) ";" \ + ".machine pop; " \ : "=r" (rval)); rval;}) -#define mtpmr(rn, v) asm volatile("mtpmr " __stringify(rn) ",%0" : : "r" (v)) +#define mtpmr(rn, v) asm volatile(".machine push; " \ + ".machine e300; " \ + "mtpmr " __stringify(rn) ",%0; " \ + ".machine pop; " \ + : : "r" (v)) #endif /* __ASSEMBLY__ */ /* Freescale Book E Performance Monitor APU Registers */ -- GitLab From a87209645494907009ef93f71bcca9b88a24183f Mon Sep 17 00:00:00 2001 From: Daniel Vogelbacher Date: Sun, 11 Feb 2024 15:42:46 +0100 Subject: [PATCH 1245/2290] USB: serial: ftdi_sio: add support for GMC Z216C Adapter IR-USB [ Upstream commit 3fb7bc4f3a98c48981318b87cf553c5f115fd5ca ] The GMC IR-USB adapter cable utilizes a FTDI FT232R chip. Add VID/PID for this adapter so it can be used as serial device via ftdi_sio. Signed-off-by: Daniel Vogelbacher Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index fe2173e37b061..248cbc9c48fd1 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1077,6 +1077,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_UNBUF_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + /* GMC devices */ + { USB_DEVICE(GMC_VID, GMC_Z216C_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 21a2b5a25fc09..5ee60ba2a73cd 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1606,3 +1606,9 @@ #define UBLOX_VID 0x1546 #define UBLOX_C099F9P_ZED_PID 0x0502 #define UBLOX_C099F9P_ODIN_PID 0x0503 + +/* + * GMC devices + */ +#define GMC_VID 0x1cd7 +#define GMC_Z216C_PID 0x0217 /* GMC Z216C Adapter IR-USB */ -- GitLab From b67095647f5bf6e4a198a5fd31dfe14b40da6cf4 Mon Sep 17 00:00:00 2001 From: Cameron Williams Date: Tue, 13 Feb 2024 21:53:29 +0000 Subject: [PATCH 1246/2290] USB: serial: add device ID for VeriFone adapter [ Upstream commit cda704809797a8a86284f9df3eef5e62ec8a3175 ] Add device ID for a (probably fake) CP2102 UART device. lsusb -v output: Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 1.10 bDeviceClass 0 [unknown] bDeviceSubClass 0 [unknown] bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x11ca VeriFone Inc idProduct 0x0212 Verifone USB to Printer bcdDevice 1.00 iManufacturer 1 Silicon Labs iProduct 2 Verifone USB to Printer iSerial 3 0001 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 0x0020 bNumInterfaces 1 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 100mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 [unknown] bInterfaceProtocol 0 iInterface 2 Verifone USB to Printer Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Device Status: 0x0000 (Bus Powered) Signed-off-by: Cameron Williams Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index b3e60b3847941..bd0632e77d8b0 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -177,6 +177,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ + { USB_DEVICE(0x11CA, 0x0212) }, /* Verifone USB to Printer (UART, CP2102) */ { USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */ { USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ -- GitLab From d0ab375e730a9ba32b9cd61a184ce8e749653c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20H=C3=A4ggstr=C3=B6m?= Date: Wed, 14 Feb 2024 11:47:29 +0100 Subject: [PATCH 1247/2290] USB: serial: cp210x: add ID for MGP Instruments PDS100 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a0d9d868491a362d421521499d98308c8e3a0398 ] The radiation meter has the text MGP Instruments PDS-100G or PDS-100GN produced by Mirion Technologies. Tested by forcing the driver association with echo 10c4 863c > /sys/bus/usb-serial/drivers/cp210x/new_id and then setting the serial port in 115200 8N1 mode. The device announces ID_USB_VENDOR_ENC=Silicon\x20Labs and ID_USB_MODEL_ENC=PDS100 Signed-off-by: Christian Häggström Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index bd0632e77d8b0..e9ee8da8cc296 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -144,6 +144,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ + { USB_DEVICE(0x10C4, 0x863C) }, /* MGP Instruments PDS100 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */ -- GitLab From 8093d6e928bd2a59b1a08ac439cf1a72deffea52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Jacobs?= Date: Wed, 31 Jan 2024 18:49:17 +0100 Subject: [PATCH 1248/2290] USB: serial: option: add MeiG Smart SLM320 product MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 46809c51565b83881aede6cdf3b0d25254966a41 ] Update the USB serial option driver to support MeiG Smart SLM320. ID 2dee:4d41 UNISOC UNISOC-8910 T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2dee ProdID=4d41 Rev=00.00 S: Manufacturer=UNISOC S: Product=UNISOC-8910 C: #Ifs= 8 Cfg#= 1 Atr=e0 MxPwr=400mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Tested successfully a PPP LTE connection using If#= 0. Not sure of the purpose of every other serial interfaces. Signed-off-by: Aurélien Jacobs Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c0a0cca65437f..1a3e5a9414f07 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -613,6 +613,11 @@ static void option_instat_callback(struct urb *urb); /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 +/* MeiG Smart Technology products */ +#define MEIGSMART_VENDOR_ID 0x2dee +/* MeiG Smart SLM320 based on UNISOC UIS8910 */ +#define MEIGSMART_PRODUCT_SLM320 0x4d41 + /* Device flags */ /* Highest interface number which can be used with NCTRL() and RSVD() */ @@ -2282,6 +2287,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- GitLab From ed85c3113ad1c623652ab8b0ea0f4a7f5e988f5e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Feb 2024 11:49:16 +0000 Subject: [PATCH 1249/2290] KVM: x86/xen: inject vCPU upcall vector when local APIC is enabled [ Upstream commit 8e62bf2bfa46367e14d0ffdcde5aada08759497c ] Linux guests since commit b1c3497e604d ("x86/xen: Add support for HVMOP_set_evtchn_upcall_vector") in v6.0 onwards will use the per-vCPU upcall vector when it's advertised in the Xen CPUID leaves. This upcall is injected through the guest's local APIC as an MSI, unlike the older system vector which was merely injected by the hypervisor any time the CPU was able to receive an interrupt and the upcall_pending flags is set in its vcpu_info. Effectively, that makes the per-CPU upcall edge triggered instead of level triggered, which results in the upcall being lost if the MSI is delivered when the local APIC is *disabled*. Xen checks the vcpu_info->evtchn_upcall_pending flag when the local APIC for a vCPU is software enabled (in fact, on any write to the SPIV register which doesn't disable the APIC). Do the same in KVM since KVM doesn't provide a way for userspace to intervene and trap accesses to the SPIV register of a local APIC emulated by KVM. Fixes: fde0451be8fb3 ("KVM: x86/xen: Support per-vCPU event channel upcall via local APIC") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240227115648.3104-3-dwmw2@infradead.org Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- arch/x86/kvm/lapic.c | 5 ++++- arch/x86/kvm/xen.c | 2 +- arch/x86/kvm/xen.h | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index edcf45e312b99..bfeafe4855528 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -40,6 +40,7 @@ #include "ioapic.h" #include "trace.h" #include "x86.h" +#include "xen.h" #include "cpuid.h" #include "hyperv.h" @@ -338,8 +339,10 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) } /* Check if there are APF page ready requests pending */ - if (enabled) + if (enabled) { kvm_make_request(KVM_REQ_APF_READY, apic->vcpu); + kvm_xen_sw_enable_lapic(apic->vcpu); + } } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index a58a426e6b1c0..684a39df60d9e 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -314,7 +314,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); } -static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) +void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) { struct kvm_lapic_irq irq = { }; int r; diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 532a535a9e99f..500d9593a5a38 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -16,6 +16,7 @@ extern struct static_key_false_deferred kvm_xen_enabled; int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu); void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu); +void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *vcpu); int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); @@ -33,6 +34,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); +static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) +{ + /* + * The local APIC is being enabled. If the per-vCPU upcall vector is + * set and the vCPU's evtchn_upcall_pending flag is set, inject the + * interrupt. + */ + if (static_branch_unlikely(&kvm_xen_enabled.key) && + vcpu->arch.xen.vcpu_info_cache.active && + vcpu->arch.xen.upcall_vector && __kvm_xen_has_interrupt(vcpu)) + kvm_xen_inject_vcpu_vector(vcpu); +} + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { return static_branch_unlikely(&kvm_xen_enabled.key) && @@ -98,6 +112,10 @@ static inline void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) { } +static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu) +{ +} + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { return false; -- GitLab From a43ebdbd39ce56992f5939bd78991f353f212f7a Mon Sep 17 00:00:00 2001 From: Toru Katagiri Date: Tue, 5 Mar 2024 08:46:14 +0900 Subject: [PATCH 1250/2290] USB: serial: cp210x: add pid/vid for TDK NC0110013M and MM0110113M [ Upstream commit b1a8da9ff1395c4879b4bd41e55733d944f3d613 ] TDK NC0110013M and MM0110113M have custom USB IDs for CP210x, so we need to add them to the driver. Signed-off-by: Toru Katagiri Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index e9ee8da8cc296..aa30288c8a8e0 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -56,6 +56,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ + { USB_DEVICE(0x04BF, 0x1301) }, /* TDK Corporation NC0110013M - Network Controller */ + { USB_DEVICE(0x04BF, 0x1303) }, /* TDK Corporation MM0110113M - i3 Micro Module */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ -- GitLab From 56a2038d00171bd903206256e30eba4c261505a2 Mon Sep 17 00:00:00 2001 From: Qingliang Li Date: Fri, 1 Mar 2024 17:26:57 +0800 Subject: [PATCH 1251/2290] PM: sleep: wakeirq: fix wake irq warning in system suspend [ Upstream commit e7a7681c859643f3f2476b2a28a494877fd89442 ] When driver uses pm_runtime_force_suspend() as the system suspend callback function and registers the wake irq with reverse enable ordering, the wake irq will be re-enabled when entering system suspend, triggering an 'Unbalanced enable for IRQ xxx' warning. In this scenario, the call sequence during system suspend is as follows: suspend_devices_and_enter() -> dpm_suspend_start() -> dpm_run_callback() -> pm_runtime_force_suspend() -> dev_pm_enable_wake_irq_check() -> dev_pm_enable_wake_irq_complete() -> suspend_enter() -> dpm_suspend_noirq() -> device_wakeup_arm_wake_irqs() -> dev_pm_arm_wake_irq() To fix this issue, complete the setting of WAKE_IRQ_DEDICATED_ENABLED flag in dev_pm_enable_wake_irq_complete() to avoid redundant irq enablement. Fixes: 8527beb12087 ("PM: sleep: wakeirq: fix wake irq arming") Reviewed-by: Dhruva Gole Signed-off-by: Qingliang Li Reviewed-by: Johan Hovold Cc: 5.16+ # 5.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/base/power/wakeirq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index afd094dec5ca3..ca0c092ba47fb 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -362,8 +362,10 @@ void dev_pm_enable_wake_irq_complete(struct device *dev) return; if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED && - wirq->status & WAKE_IRQ_DEDICATED_REVERSE) + wirq->status & WAKE_IRQ_DEDICATED_REVERSE) { enable_irq(wirq->irq); + wirq->status |= WAKE_IRQ_DEDICATED_ENABLED; + } } /** -- GitLab From bdba49e46905e9d4c308deaee704cb7b8dc004d5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 5 Mar 2024 11:42:56 +0100 Subject: [PATCH 1252/2290] mmc: tmio: avoid concurrent runs of mmc_request_done() [ Upstream commit e8d1b41e69d72c62865bebe8f441163ec00b3d44 ] With the to-be-fixed commit, the reset_work handler cleared 'host->mrq' outside of the spinlock protected critical section. That leaves a small race window during execution of 'tmio_mmc_reset()' where the done_work handler could grab a pointer to the now invalid 'host->mrq'. Both would use it to call mmc_request_done() causing problems (see link below). However, 'host->mrq' cannot simply be cleared earlier inside the critical section. That would allow new mrqs to come in asynchronously while the actual reset of the controller still needs to be done. So, like 'tmio_mmc_set_ios()', an ERR_PTR is used to prevent new mrqs from coming in but still avoiding concurrency between work handlers. Reported-by: Dirk Behme Closes: https://lore.kernel.org/all/20240220061356.3001761-1-dirk.behme@de.bosch.com/ Fixes: df3ef2d3c92c ("mmc: protect the tmio_mmc driver against a theoretical race") Signed-off-by: Wolfram Sang Tested-by: Dirk Behme Reviewed-by: Dirk Behme Cc: stable@vger.kernel.org # 3.0+ Link: https://lore.kernel.org/r/20240305104423.3177-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/tmio_mmc_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 437048bb80273..5024cae411d3a 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -259,6 +259,8 @@ static void tmio_mmc_reset_work(struct work_struct *work) else mrq->cmd->error = -ETIMEDOUT; + /* No new calls yet, but disallow concurrent tmio_mmc_done_work() */ + host->mrq = ERR_PTR(-EBUSY); host->cmd = NULL; host->data = NULL; -- GitLab From 970e8c49f2cfe68792c408480f3fbda4b837457c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 28 Feb 2024 16:50:49 +0100 Subject: [PATCH 1253/2290] fuse: fix root lookup with nonzero generation [ Upstream commit 68ca1b49e430f6534d0774a94147a823e3b8b26e ] The root inode has a fixed nodeid and generation (1, 0). Prior to the commit 15db16837a35 ("fuse: fix illegal access to inode with reused nodeid") generation number on lookup was ignored. After this commit lookup with the wrong generation number resulted in the inode being unhashed. This is correct for non-root inodes, but replacing the root inode is wrong and results in weird behavior. Fix by reverting to the old behavior if ignoring the generation for the root inode, but issuing a warning in dmesg. Reported-by: Antonio SJ Musumeci Closes: https://lore.kernel.org/all/CAOQ4uxhek5ytdN8Yz2tNEOg5ea4NkBb4nk0FGPjPk_9nz-VG3g@mail.gmail.com/ Fixes: 15db16837a35 ("fuse: fix illegal access to inode with reused nodeid") Cc: # v5.14 Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5e408e7ec4c6b..936a24b646cef 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -399,6 +399,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name goto out_put_forget; if (fuse_invalid_attr(&outarg->attr)) goto out_put_forget; + if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) { + pr_warn_once("root generation should be zero\n"); + outarg->generation = 0; + } *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, &outarg->attr, entry_attr_timeout(outarg), -- GitLab From 371f27c2c8fc3f41b4772c51300c954e796d223f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 28 Feb 2024 16:50:49 +0100 Subject: [PATCH 1254/2290] fuse: don't unhash root [ Upstream commit b1fe686a765e6c0d71811d825b5a1585a202b777 ] The root inode is assumed to be always hashed. Do not unhash the root inode even if it is marked BAD. Fixes: 5d069dbe8aaf ("fuse: fix bad inode") Cc: # v5.11 Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/fuse_i.h | 1 - fs/fuse/inode.c | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a9681fecbd91f..253b9b78d6f13 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -923,7 +923,6 @@ static inline bool fuse_stale_inode(const struct inode *inode, int generation, static inline void fuse_make_bad(struct inode *inode) { - remove_inode_hash(inode); set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f81000d968875..367e3b276092f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -445,8 +445,11 @@ retry: } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); - iput(inode); - goto retry; + if (inode != d_inode(sb->s_root)) { + remove_inode_hash(inode); + iput(inode); + goto retry; + } } fi = get_fuse_inode(inode); spin_lock(&fi->lock); -- GitLab From 2496e37ada362d94f709d789666775723e3fe5e0 Mon Sep 17 00:00:00 2001 From: Jameson Thies Date: Tue, 5 Mar 2024 02:58:01 +0000 Subject: [PATCH 1255/2290] usb: typec: ucsi: Clean up UCSI_CABLE_PROP macros [ Upstream commit 4d0a5a9915793377c0fe1a8d78de6bcd92cea963 ] Clean up UCSI_CABLE_PROP macros by fixing a bitmask shifting error for plug type and updating the modal support macro for consistent naming. Fixes: 3cf657f07918 ("usb: typec: ucsi: Remove all bit-fields") Cc: stable@vger.kernel.org Reviewed-by: Benson Leung Reviewed-by: Prashant Malani Reviewed-by: Dmitry Baryshkov Signed-off-by: Jameson Thies Link: https://lore.kernel.org/r/20240305025804.1290919-2-jthies@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/ucsi/ucsi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 60ce9fb6e7450..dbb10cb310d4c 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -220,12 +220,12 @@ struct ucsi_cable_property { #define UCSI_CABLE_PROP_FLAG_VBUS_IN_CABLE BIT(0) #define UCSI_CABLE_PROP_FLAG_ACTIVE_CABLE BIT(1) #define UCSI_CABLE_PROP_FLAG_DIRECTIONALITY BIT(2) -#define UCSI_CABLE_PROP_FLAG_PLUG_TYPE(_f_) ((_f_) & GENMASK(3, 0)) +#define UCSI_CABLE_PROP_FLAG_PLUG_TYPE(_f_) (((_f_) & GENMASK(4, 3)) >> 3) #define UCSI_CABLE_PROPERTY_PLUG_TYPE_A 0 #define UCSI_CABLE_PROPERTY_PLUG_TYPE_B 1 #define UCSI_CABLE_PROPERTY_PLUG_TYPE_C 2 #define UCSI_CABLE_PROPERTY_PLUG_OTHER 3 -#define UCSI_CABLE_PROP_MODE_SUPPORT BIT(5) +#define UCSI_CABLE_PROP_FLAG_MODE_SUPPORT BIT(5) u8 latency; } __packed; -- GitLab From b8073c069a843f556469aab45254b099a4d969de Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 4 Mar 2024 13:43:49 -0800 Subject: [PATCH 1256/2290] serial: Lock console when calling into driver before registration [ Upstream commit 801410b26a0e8b8a16f7915b2b55c9528b69ca87 ] During the handoff from earlycon to the real console driver, we have two separate drivers operating on the same device concurrently. In the case of the 8250 driver these concurrent accesses cause problems due to the driver's use of banked registers, controlled by LCR.DLAB. It is possible for the setup(), config_port(), pm() and set_mctrl() callbacks to set DLAB, which can cause the earlycon code that intends to access TX to instead access DLL, leading to missed output and corruption on the serial line due to unintended modifications to the baud rate. In particular, for setup() we have: univ8250_console_setup() -> serial8250_console_setup() -> uart_set_options() -> serial8250_set_termios() -> serial8250_do_set_termios() -> serial8250_do_set_divisor() For config_port() we have: serial8250_config_port() -> autoconfig() For pm() we have: serial8250_pm() -> serial8250_do_pm() -> serial8250_set_sleep() For set_mctrl() we have (for some devices): serial8250_set_mctrl() -> omap8250_set_mctrl() -> __omap8250_set_mctrl() To avoid such problems, let's make it so that the console is locked during pre-registration calls to these callbacks, which will prevent the earlycon driver from running concurrently. Remove the partial solution to this problem in the 8250 driver that locked the console only during autoconfig_irq(), as this would result in a deadlock with the new approach. The console continues to be locked during autoconfig_irq() because it can only be called through uart_configure_port(). Although this patch introduces more locking than strictly necessary (and in particular it also locks during the call to rs485_config() which is not affected by this issue as far as I can tell), it follows the principle that it is the responsibility of the generic console code to manage the earlycon handoff by ensuring that earlycon and real console driver code cannot run concurrently, and not the individual drivers. Signed-off-by: Peter Collingbourne Reviewed-by: John Ogness Link: https://linux-review.googlesource.com/id/I7cf8124dcebf8618e6b2ee543fa5b25532de55d8 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240304214350.501253-1-pcc@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_port.c | 6 ------ drivers/tty/serial/serial_core.c | 12 ++++++++++++ kernel/printk/printk.c | 21 ++++++++++++++++++--- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 8efe31448df3c..c744feabd7cdd 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1377,9 +1377,6 @@ static void autoconfig_irq(struct uart_8250_port *up) inb_p(ICP); } - if (uart_console(port)) - console_lock(); - /* forget possible initially masked and pending IRQ */ probe_irq_off(probe_irq_on()); save_mcr = serial8250_in_MCR(up); @@ -1410,9 +1407,6 @@ static void autoconfig_irq(struct uart_8250_port *up) if (port->flags & UPF_FOURPORT) outb_p(save_ICP, ICP); - if (uart_console(port)) - console_unlock(); - port->irq = (irq > 0) ? irq : 0; } diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index f0ed30d0a697c..fe3f1d655dfe2 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2561,7 +2561,12 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, port->type = PORT_UNKNOWN; flags |= UART_CONFIG_TYPE; } + /* Synchronize with possible boot console. */ + if (uart_console(port)) + console_lock(); port->ops->config_port(port, flags); + if (uart_console(port)) + console_unlock(); } if (port->type != PORT_UNKNOWN) { @@ -2569,6 +2574,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, uart_report_port(drv, port); + /* Synchronize with possible boot console. */ + if (uart_console(port)) + console_lock(); + /* Power up port for set_mctrl() */ uart_change_pm(state, UART_PM_STATE_ON); @@ -2585,6 +2594,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, uart_rs485_config(port); + if (uart_console(port)) + console_unlock(); + /* * If this driver supports console, and it hasn't been * successfully registered yet, try to re-register it. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 981cdb00b8722..c55ee859dbd08 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3045,6 +3045,21 @@ static int __init keep_bootcon_setup(char *str) early_param("keep_bootcon", keep_bootcon_setup); +static int console_call_setup(struct console *newcon, char *options) +{ + int err; + + if (!newcon->setup) + return 0; + + /* Synchronize with possible boot console. */ + console_lock(); + err = newcon->setup(newcon, options); + console_unlock(); + + return err; +} + /* * This is called by register_console() to try to match * the newly registered console with any of the ones selected @@ -3080,8 +3095,8 @@ static int try_enable_preferred_console(struct console *newcon, if (_braille_register_console(newcon, c)) return 0; - if (newcon->setup && - (err = newcon->setup(newcon, c->options)) != 0) + err = console_call_setup(newcon, c->options); + if (err) return err; } newcon->flags |= CON_ENABLED; @@ -3107,7 +3122,7 @@ static void try_enable_default_console(struct console *newcon) if (newcon->index < 0) newcon->index = 0; - if (newcon->setup && newcon->setup(newcon, NULL) != 0) + if (console_call_setup(newcon, NULL) != 0) return; newcon->flags |= CON_ENABLED; -- GitLab From c13e7256571fde61836ec133b3c705a7435f47df Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 23 Feb 2024 18:13:38 +1030 Subject: [PATCH 1257/2290] btrfs: qgroup: always free reserved space for extent records [ Upstream commit d139ded8b9cdb897bb9539eb33311daf9a177fd2 ] [BUG] If qgroup is marked inconsistent (e.g. caused by operations needing full subtree rescan, like creating a snapshot and assign to a higher level qgroup), btrfs would immediately start leaking its data reserved space. The following script can easily reproduce it: mkfs.btrfs -O quota -f $dev mount $dev $mnt btrfs subvolume create $mnt/subv1 btrfs qgroup create 1/0 $mnt # This snapshot creation would mark qgroup inconsistent, # as the ownership involves different higher level qgroup, thus # we have to rescan both source and snapshot, which can be very # time consuming, thus here btrfs just choose to mark qgroup # inconsistent, and let users to determine when to do the rescan. btrfs subv snapshot -i 1/0 $mnt/subv1 $mnt/snap1 # Now this write would lead to qgroup rsv leak. xfs_io -f -c "pwrite 0 64k" $mnt/file1 # And at unmount time, btrfs would report 64K DATA rsv space leaked. umount $mnt And we would have the following dmesg output for the unmount: BTRFS info (device dm-1): last unmount of filesystem 14a3d84e-f47b-4f72-b053-a8a36eef74d3 BTRFS warning (device dm-1): qgroup 0/5 has unreleased space, type 0 rsv 65536 [CAUSE] Since commit e15e9f43c7ca ("btrfs: introduce BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING to skip qgroup accounting"), we introduce a mode for btrfs qgroup to skip the timing consuming backref walk, if the qgroup is already inconsistent. But this skip also covered the data reserved freeing, thus the qgroup reserved space for each newly created data extent would not be freed, thus cause the leakage. [FIX] Make the data extent reserved space freeing mandatory. The qgroup reserved space handling is way cheaper compared to the backref walking part, and we always have the super sensitive leak detector, thus it's definitely worth to always free the qgroup reserved data space. Reported-by: Fabian Vogt Fixes: e15e9f43c7ca ("btrfs: introduce BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING to skip qgroup accounting") CC: stable@vger.kernel.org # 6.1+ Link: https://bugzilla.suse.com/show_bug.cgi?id=1216196 Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b3472bf6b288f..c14d4f70e84bd 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2800,11 +2800,6 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) goto cleanup; } - /* Free the reserved data space */ - btrfs_qgroup_free_refroot(fs_info, - record->data_rsv_refroot, - record->data_rsv, - BTRFS_QGROUP_RSV_DATA); /* * Use BTRFS_SEQ_LAST as time_seq to do special search, * which doesn't lock tree or delayed_refs and search @@ -2826,6 +2821,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) record->old_roots = NULL; new_roots = NULL; } + /* Free the reserved data space */ + btrfs_qgroup_free_refroot(fs_info, + record->data_rsv_refroot, + record->data_rsv, + BTRFS_QGROUP_RSV_DATA); cleanup: ulist_free(record->old_roots); ulist_free(new_roots); -- GitLab From 50361c2af7561c6ce25795bddc710fddfc948a2d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 29 Feb 2024 10:37:04 +0000 Subject: [PATCH 1258/2290] btrfs: fix off-by-one chunk length calculation at contains_pending_extent() [ Upstream commit ae6bd7f9b46a29af52ebfac25d395757e2031d0d ] At contains_pending_extent() the value of the end offset of a chunk we found in the device's allocation state io tree is inclusive, so when we calculate the length we pass to the in_range() macro, we must sum 1 to the expression "physical_end - physical_offset". In practice the wrong calculation should be harmless as chunks sizes are never 1 byte and we should never have 1 byte ranges of unallocated space. Nevertheless fix the wrong calculation. Reported-by: Alex Lyakas Link: https://lore.kernel.org/linux-btrfs/CAOcd+r30e-f4R-5x-S7sV22RJPe7+pgwherA6xqN2_qe7o4XTg@mail.gmail.com/ Fixes: 1c11b63eff2a ("btrfs: replace pending/pinned chunks lists with io tree") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Josef Bacik Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6fc2d99270c18..03cfb425ea4ea 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1444,7 +1444,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, if (in_range(physical_start, *start, len) || in_range(*start, physical_start, - physical_end - physical_start)) { + physical_end + 1 - physical_start)) { *start = physical_end + 1; return true; } -- GitLab From 900b81caf00c89417172afe0e7e49ac4eb110f4b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Mar 2024 11:45:38 +0100 Subject: [PATCH 1259/2290] PCI/PM: Drain runtime-idle callbacks before driver removal [ Upstream commit 9d5286d4e7f68beab450deddbb6a32edd5ecf4bf ] A race condition between the .runtime_idle() callback and the .remove() callback in the rtsx_pcr PCI driver leads to a kernel crash due to an unhandled page fault [1]. The problem is that rtsx_pci_runtime_idle() is not expected to be running after pm_runtime_get_sync() has been called, but the latter doesn't really guarantee that. It only guarantees that the suspend and resume callbacks will not be running when it returns. However, if a .runtime_idle() callback is already running when pm_runtime_get_sync() is called, the latter will notice that the runtime PM status of the device is RPM_ACTIVE and it will return right away without waiting for the former to complete. In fact, it cannot wait for .runtime_idle() to complete because it may be called from that callback (it arguably does not make much sense to do that, but it is not strictly prohibited). Thus in general, whoever is providing a .runtime_idle() callback needs to protect it from running in parallel with whatever code runs after pm_runtime_get_sync(). [Note that .runtime_idle() will not start after pm_runtime_get_sync() has returned, but it may continue running then if it has started earlier.] One way to address that race condition is to call pm_runtime_barrier() after pm_runtime_get_sync() (not before it, because a nonzero value of the runtime PM usage counter is necessary to prevent runtime PM callbacks from being invoked) to wait for the .runtime_idle() callback to complete should it be running at that point. A suitable place for doing that is in pci_device_remove() which calls pm_runtime_get_sync() before removing the driver, so it may as well call pm_runtime_barrier() subsequently, which will prevent the race in question from occurring, not just in the rtsx_pcr driver, but in any PCI drivers providing .runtime_idle() callbacks. Link: https://lore.kernel.org/lkml/20240229062201.49500-1-kai.heng.feng@canonical.com/ # [1] Link: https://lore.kernel.org/r/5761426.DvuYhMxLoT@kreacher Reported-by: Kai-Heng Feng Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Tested-by: Ricky Wu Acked-by: Kai-Heng Feng Cc: Signed-off-by: Sasha Levin --- drivers/pci/pci-driver.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f47a3b10bf504..8dda3b205dfd0 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -473,6 +473,13 @@ static void pci_device_remove(struct device *dev) if (drv->remove) { pm_runtime_get_sync(dev); + /* + * If the driver provides a .runtime_idle() callback and it has + * started to run already, it may continue to run in parallel + * with the code below, so wait until all of the runtime PM + * activity has completed. + */ + pm_runtime_barrier(dev); drv->remove(pci_dev); pm_runtime_put_noidle(dev); } -- GitLab From c9ef367b3e3984721d59252725fd47b4199aaa6c Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Tue, 5 Mar 2024 12:30:56 +0100 Subject: [PATCH 1260/2290] PCI/DPC: Quirk PIO log size for Intel Raptor Lake Root Ports [ Upstream commit 627c6db20703b5d18d928464f411d0d4ec327508 ] Commit 5459c0b70467 ("PCI/DPC: Quirk PIO log size for certain Intel Root Ports") and commit 3b8803494a06 ("PCI/DPC: Quirk PIO log size for Intel Ice Lake Root Ports") add quirks for Ice, Tiger and Alder Lake Root Ports. System firmware for Raptor Lake still has the bug, so Linux logs the warning below on several Raptor Lake systems like Dell Precision 3581 with Intel Raptor Lake processor (0W18NX) system firmware/BIOS version 1.10.1. pci 0000:00:07.0: [8086:a76e] type 01 class 0x060400 pci 0000:00:07.0: DPC: RP PIO log size 0 is invalid pci 0000:00:07.1: [8086:a73f] type 01 class 0x060400 pci 0000:00:07.1: DPC: RP PIO log size 0 is invalid Apply the quirk for Raptor Lake Root Ports as well. This also enables the DPC driver to dump the RP PIO Log registers when DPC is triggered. Link: https://lore.kernel.org/r/20240305113057.56468-1-pmenzel@molgen.mpg.de Reported-by: Niels van Aert Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218560 Signed-off-by: Paul Menzel Signed-off-by: Bjorn Helgaas Cc: Cc: Mika Westerberg Cc: Niels van Aert Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c175b70a984c6..289ba6902e41b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -6078,6 +6078,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2b, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa73f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa76e, dpc_log_size); #endif /* -- GitLab From aab8a0745f872d8bdfe7552fd721af17e42bda57 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:06 +0800 Subject: [PATCH 1261/2290] dm-raid: fix lockdep waring in "pers->hot_add_disk" [ Upstream commit 95009ae904b1e9dca8db6f649f2d7c18a6e42c75 ] The lockdep assert is added by commit a448af25becf ("md/raid10: remove rcu protection to access rdev from conf") in print_conf(). And I didn't notice that dm-raid is calling "pers->hot_add_disk" without holding 'reconfig_mutex'. "pers->hot_add_disk" read and write many fields that is protected by 'reconfig_mutex', and raid_resume() already grab the lock in other contex. Hence fix this problem by protecting "pers->host_add_disk" with the lock. Fixes: 9092c02d9435 ("DM RAID: Add ability to restore transiently failed devices on resume") Fixes: a448af25becf ("md/raid10: remove rcu protection to access rdev from conf") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-10-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/dm-raid.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index bf833ca880bc1..99b4738e867a8 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -4046,7 +4046,9 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ + mddev_lock_nointr(mddev); attempt_restore_of_faulty_devices(rs); + mddev_unlock(mddev); } if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { -- GitLab From f299404fd869ccb0c95572d8ecebfb11ad532e5a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 27 Jan 2024 11:07:43 -0700 Subject: [PATCH 1262/2290] powerpc: xor_vmx: Add '-mhard-float' to CFLAGS [ Upstream commit 35f20786c481d5ced9283ff42de5c69b65e5ed13 ] arch/powerpc/lib/xor_vmx.o is built with '-msoft-float' (from the main powerpc Makefile) and '-maltivec' (from its CFLAGS), which causes an error when building with clang after a recent change in main: error: option '-msoft-float' cannot be specified with '-maltivec' make[6]: *** [scripts/Makefile.build:243: arch/powerpc/lib/xor_vmx.o] Error 1 Explicitly add '-mhard-float' before '-maltivec' in xor_vmx.o's CFLAGS to override the previous inclusion of '-msoft-float' (as the last option wins), which matches how other areas of the kernel use '-maltivec', such as AMDGPU. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1986 Link: https://github.com/llvm/llvm-project/commit/4792f912b232141ecba4cbae538873be3c28556c Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://msgid.link/20240127-ppc-xor_vmx-drop-msoft-float-v1-1-f24140e81376@kernel.org Signed-off-by: Sasha Levin --- arch/powerpc/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9b394bab17eba..374b82cf13d9d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -72,7 +72,7 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o -CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) +CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) # Enable CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) -- GitLab From dcd51ab42b7a0431575689c5f74b8b6efd45fc2f Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 28 Feb 2024 19:38:39 +0300 Subject: [PATCH 1263/2290] mac802154: fix llsec key resources release in mac802154_llsec_key_del [ Upstream commit e8a1e58345cf40b7b272e08ac7b32328b2543e40 ] mac802154_llsec_key_del() can free resources of a key directly without following the RCU rules for waiting before the end of a grace period. This may lead to use-after-free in case llsec_lookup_key() is traversing the list of keys in parallel with a key deletion: refcount_t: addition on 0; use-after-free. WARNING: CPU: 4 PID: 16000 at lib/refcount.c:25 refcount_warn_saturate+0x162/0x2a0 Modules linked in: CPU: 4 PID: 16000 Comm: wpan-ping Not tainted 6.7.0 #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:refcount_warn_saturate+0x162/0x2a0 Call Trace: llsec_lookup_key.isra.0+0x890/0x9e0 mac802154_llsec_encrypt+0x30c/0x9c0 ieee802154_subif_start_xmit+0x24/0x1e0 dev_hard_start_xmit+0x13e/0x690 sch_direct_xmit+0x2ae/0xbc0 __dev_queue_xmit+0x11dd/0x3c20 dgram_sendmsg+0x90b/0xd60 __sys_sendto+0x466/0x4c0 __x64_sys_sendto+0xe0/0x1c0 do_syscall_64+0x45/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Also, ieee802154_llsec_key_entry structures are not freed by mac802154_llsec_key_del(): unreferenced object 0xffff8880613b6980 (size 64): comm "iwpan", pid 2176, jiffies 4294761134 (age 60.475s) hex dump (first 32 bytes): 78 0d 8f 18 80 88 ff ff 22 01 00 00 00 00 ad de x......."....... 00 00 00 00 00 00 00 00 03 00 cd ab 00 00 00 00 ................ backtrace: [] __kmem_cache_alloc_node+0x1e2/0x2d0 [] kmalloc_trace+0x25/0xc0 [] mac802154_llsec_key_add+0xac9/0xcf0 [] ieee802154_add_llsec_key+0x5a/0x80 [] nl802154_add_llsec_key+0x426/0x5b0 [] genl_family_rcv_msg_doit+0x1fe/0x2f0 [] genl_rcv_msg+0x531/0x7d0 [] netlink_rcv_skb+0x169/0x440 [] genl_rcv+0x28/0x40 [] netlink_unicast+0x53c/0x820 [] netlink_sendmsg+0x93b/0xe60 [] ____sys_sendmsg+0xac5/0xca0 [] ___sys_sendmsg+0x11d/0x1c0 [] __sys_sendmsg+0xfa/0x1d0 [] do_syscall_64+0x45/0xf0 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 Handle the proper resource release in the RCU callback function mac802154_llsec_key_del_rcu(). Note that if llsec_lookup_key() finds a key, it gets a refcount via llsec_key_get() and locally copies key id from key_entry (which is a list element). So it's safe to call llsec_key_put() and free the list entry after the RCU grace period elapses. Found by Linux Verification Center (linuxtesting.org). Fixes: 5d637d5aabd8 ("mac802154: add llsec structures and mutators") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Alexander Aring Message-ID: <20240228163840.6667-1-pchelkin@ispras.ru> Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- include/net/cfg802154.h | 1 + net/mac802154/llsec.c | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index d8d8719315fd8..5f7f28c9edcb6 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -267,6 +267,7 @@ struct ieee802154_llsec_key { struct ieee802154_llsec_key_entry { struct list_head list; + struct rcu_head rcu; struct ieee802154_llsec_key_id id; struct ieee802154_llsec_key *key; diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 55550ead2ced8..a4cc9d077c59c 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -265,19 +265,27 @@ fail: return -ENOMEM; } +static void mac802154_llsec_key_del_rcu(struct rcu_head *rcu) +{ + struct ieee802154_llsec_key_entry *pos; + struct mac802154_llsec_key *mkey; + + pos = container_of(rcu, struct ieee802154_llsec_key_entry, rcu); + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + llsec_key_put(mkey); + kfree_sensitive(pos); +} + int mac802154_llsec_key_del(struct mac802154_llsec *sec, const struct ieee802154_llsec_key_id *key) { struct ieee802154_llsec_key_entry *pos; list_for_each_entry(pos, &sec->table.keys, list) { - struct mac802154_llsec_key *mkey; - - mkey = container_of(pos->key, struct mac802154_llsec_key, key); - if (llsec_key_id_equal(&pos->id, key)) { list_del_rcu(&pos->list); - llsec_key_put(mkey); + call_rcu(&pos->rcu, mac802154_llsec_key_del_rcu); return 0; } } -- GitLab From 509546fc034747b91edb75697e8f6d2089bdd270 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 29 May 2023 14:13:55 +0800 Subject: [PATCH 1264/2290] swap: comments get_swap_device() with usage rule [ Upstream commit a95722a047724ef75567381976a36f0e44230bd9 ] The general rule to use a swap entry is as follows. When we get a swap entry, if there aren't some other ways to prevent swapoff, such as the folio in swap cache is locked, page table lock is held, etc., the swap entry may become invalid because of swapoff. Then, we need to enclose all swap related functions with get_swap_device() and put_swap_device(), unless the swap functions call get/put_swap_device() by themselves. Add the rule as comments of get_swap_device(). Link: https://lkml.kernel.org/r/20230529061355.125791-6-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: David Hildenbrand Reviewed-by: Yosry Ahmed Reviewed-by: Chris Li (Google) Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Michal Hocko Cc: Minchan Kim Cc: Tim Chen Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton Stable-dep-of: 82b1c07a0af6 ("mm: swap: fix race between free_swap_and_cache() and swapoff()") Signed-off-by: Sasha Levin --- mm/swapfile.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index cca9fda9d036f..324844f98d67c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1222,6 +1222,13 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } /* + * When we get a swap entry, if there aren't some other ways to + * prevent swapoff, such as the folio in swap cache is locked, page + * table lock is held, etc., the swap entry may become invalid because + * of swapoff. Then, we need to enclose all swap related functions + * with get_swap_device() and put_swap_device(), unless the swap + * functions call get/put_swap_device() by themselves. + * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until @@ -1230,9 +1237,8 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, * Notice that swapoff or swapoff+swapon can still happen before the * percpu_ref_tryget_live() in get_swap_device() or after the * percpu_ref_put() in put_swap_device() if there isn't any other way - * to prevent swapoff, such as page lock, page table lock, etc. The - * caller must be prepared for that. For example, the following - * situation is possible. + * to prevent swapoff. The caller must be prepared for that. For + * example, the following situation is possible. * * CPU1 CPU2 * do_swap_page() -- GitLab From 1ede7f1d7eed1738d1b9333fd1e152ccb450b86a Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 6 Mar 2024 14:03:56 +0000 Subject: [PATCH 1265/2290] mm: swap: fix race between free_swap_and_cache() and swapoff() [ Upstream commit 82b1c07a0af603e3c47b906c8e991dc96f01688e ] There was previously a theoretical window where swapoff() could run and teardown a swap_info_struct while a call to free_swap_and_cache() was running in another thread. This could cause, amongst other bad possibilities, swap_page_trans_huge_swapped() (called by free_swap_and_cache()) to access the freed memory for swap_map. This is a theoretical problem and I haven't been able to provoke it from a test case. But there has been agreement based on code review that this is possible (see link below). Fix it by using get_swap_device()/put_swap_device(), which will stall swapoff(). There was an extra check in _swap_info_get() to confirm that the swap entry was not free. This isn't present in get_swap_device() because it doesn't make sense in general due to the race between getting the reference and swapoff. So I've added an equivalent check directly in free_swap_and_cache(). Details of how to provoke one possible issue (thanks to David Hildenbrand for deriving this): --8<----- __swap_entry_free() might be the last user and result in "count == SWAP_HAS_CACHE". swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0. So the question is: could someone reclaim the folio and turn si->inuse_pages==0, before we completed swap_page_trans_huge_swapped(). Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are still references by swap entries. Process 1 still references subpage 0 via swap entry. Process 2 still references subpage 1 via swap entry. Process 1 quits. Calls free_swap_and_cache(). -> count == SWAP_HAS_CACHE [then, preempted in the hypervisor etc.] Process 2 quits. Calls free_swap_and_cache(). -> count == SWAP_HAS_CACHE Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls __try_to_reclaim_swap(). __try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()-> put_swap_folio()->free_swap_slot()->swapcache_free_entries()-> swap_entry_free()->swap_range_free()-> ... WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries); What stops swapoff to succeed after process 2 reclaimed the swap cache but before process1 finished its call to swap_page_trans_huge_swapped()? --8<----- Link: https://lkml.kernel.org/r/20240306140356.3974886-1-ryan.roberts@arm.com Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch") Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redhat.com/ Signed-off-by: Ryan Roberts Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/swapfile.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 324844f98d67c..0d6182db44a6a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1229,6 +1229,11 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, * with get_swap_device() and put_swap_device(), unless the swap * functions call get/put_swap_device() by themselves. * + * Note that when only holding the PTL, swapoff might succeed immediately + * after freeing a swap entry. Therefore, immediately after + * __swap_entry_free(), the swap info might become stale and should not + * be touched without a prior get_swap_device(). + * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until @@ -1630,13 +1635,19 @@ int free_swap_and_cache(swp_entry_t entry) if (non_swap_entry(entry)) return 1; - p = _swap_info_get(entry); + p = get_swap_device(entry); if (p) { + if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) { + put_swap_device(p); + return 0; + } + count = __swap_entry_free(p, entry); if (count == SWAP_HAS_CACHE && !swap_page_trans_huge_swapped(p, entry)) __try_to_reclaim_swap(p, swp_offset(entry), TTRS_UNMAPPED | TTRS_FULL); + put_swap_device(p); } return p != NULL; } -- GitLab From ad8a4eb663521323f1a57a72c4a63ffabe24e1ac Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 6 Mar 2024 10:44:38 +0900 Subject: [PATCH 1266/2290] mmc: core: Fix switch on gp3 partition [ Upstream commit 4af59a8df5ea930038cd3355e822f5eedf4accc1 ] Commit e7794c14fd73 ("mmc: rpmb: fixes pause retune on all RPMB partitions.") added a mask check for 'part_type', but the mask used was wrong leading to the code intended for rpmb also being executed for GP3. On some MMCs (but not all) this would make gp3 partition inaccessible: armadillo:~# head -c 1 < /dev/mmcblk2gp3 head: standard input: I/O error armadillo:~# dmesg -c [ 422.976583] mmc2: running CQE recovery [ 423.058182] mmc2: running CQE recovery [ 423.137607] mmc2: running CQE recovery [ 423.137802] blk_update_request: I/O error, dev mmcblk2gp3, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 4 prio class 0 [ 423.237125] mmc2: running CQE recovery [ 423.318206] mmc2: running CQE recovery [ 423.397680] mmc2: running CQE recovery [ 423.397837] blk_update_request: I/O error, dev mmcblk2gp3, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0 [ 423.408287] Buffer I/O error on dev mmcblk2gp3, logical block 0, async page read the part_type values of interest here are defined as follow: main 0 boot0 1 boot1 2 rpmb 3 gp0 4 gp1 5 gp2 6 gp3 7 so mask with EXT_CSD_PART_CONFIG_ACC_MASK (7) to correctly identify rpmb Fixes: e7794c14fd73 ("mmc: rpmb: fixes pause retune on all RPMB partitions.") Cc: stable@vger.kernel.org Cc: Jorge Ramirez-Ortiz Signed-off-by: Dominique Martinet Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240306-mmc-partswitch-v1-1-bf116985d950@codewreck.org Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/core/block.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ea60efaecb0dd..4688a658d6a6d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -889,10 +889,11 @@ static const struct block_device_operations mmc_bdops = { static int mmc_blk_part_switch_pre(struct mmc_card *card, unsigned int part_type) { - const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB; + const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK; + const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB; int ret = 0; - if ((part_type & mask) == mask) { + if ((part_type & mask) == rpmb) { if (card->ext_csd.cmdq_en) { ret = mmc_cmdq_disable(card); if (ret) @@ -907,10 +908,11 @@ static int mmc_blk_part_switch_pre(struct mmc_card *card, static int mmc_blk_part_switch_post(struct mmc_card *card, unsigned int part_type) { - const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB; + const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK; + const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB; int ret = 0; - if ((part_type & mask) == mask) { + if ((part_type & mask) == rpmb) { mmc_retune_unpause(card->host); if (card->reenable_cmdq && !card->ext_csd.cmdq_en) ret = mmc_cmdq_enable(card); -- GitLab From 03f58a64794aaee56df2de37e53c3610dd151f76 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Fri, 1 Mar 2024 14:28:11 +0100 Subject: [PATCH 1267/2290] drm/etnaviv: Restore some id values [ Upstream commit b735ee173f84d5d0d0733c53946a83c12d770d05 ] The hwdb selection logic as a feature that allows it to mark some fields as 'don't care'. If we match with such a field we memcpy(..) the current etnaviv_chip_identity into ident. This step can overwrite some id values read from the GPU with the 'don't care' value. Fix this issue by restoring the affected values after the memcpy(..). As this is crucial for user space to know when this feature works as expected increment the minor version too. Fixes: 4078a1186dd3 ("drm/etnaviv: update hwdb selection logic") Cc: stable@vger.kernel.org Signed-off-by: Christian Gmeiner Reviewed-by: Tomeu Vizoso Signed-off-by: Lucas Stach Signed-off-by: Sasha Levin --- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 1d2b4fb4bcf8b..f29952a55c05d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -488,7 +488,7 @@ static const struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 3, + .minor = 4, }; /* diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index f2fc645c79569..212e7050c4ba6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -135,6 +135,9 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu) { struct etnaviv_chip_identity *ident = &gpu->identity; + const u32 product_id = ident->product_id; + const u32 customer_id = ident->customer_id; + const u32 eco_id = ident->eco_id; int i; for (i = 0; i < ARRAY_SIZE(etnaviv_chip_identities); i++) { @@ -148,6 +151,12 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu) etnaviv_chip_identities[i].eco_id == ~0U)) { memcpy(ident, &etnaviv_chip_identities[i], sizeof(*ident)); + + /* Restore some id values as ~0U aka 'don't care' might been used. */ + ident->product_id = product_id; + ident->customer_id = customer_id; + ident->eco_id = eco_id; + return true; } } -- GitLab From acda20add47677df58b779550d2b63a89133b7f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 27 Feb 2024 12:05:50 +0100 Subject: [PATCH 1268/2290] landlock: Warn once if a Landlock action is requested while disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 782191c74875cc33b50263e21d76080b1411884d ] Because sandboxing can be used as an opportunistic security measure, user space may not log unsupported features. Let the system administrator know if an application tries to use Landlock but failed because it isn't enabled at boot time. This may be caused by boot loader configurations with outdated "lsm" kernel's command-line parameter. Cc: stable@vger.kernel.org Fixes: 265885daf3e5 ("landlock: Add syscall implementations") Reviewed-by: Kees Cook Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20240227110550.3702236-2-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Sasha Levin --- security/landlock/syscalls.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 2ca0ccbd905ae..d0cb3d0cbf985 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -32,6 +32,18 @@ #include "ruleset.h" #include "setup.h" +static bool is_initialized(void) +{ + if (likely(landlock_initialized)) + return true; + + pr_warn_once( + "Disabled but requested by user space. " + "You should enable Landlock at boot time: " + "https://docs.kernel.org/userspace-api/landlock.html#boot-time-configuration\n"); + return false; +} + /** * copy_min_struct_from_user - Safe future-proof argument copying * @@ -165,7 +177,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset, /* Build-time checks. */ build_check_abi(); - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; if (flags) { @@ -311,7 +323,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, struct landlock_ruleset *ruleset; int res, err; - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; /* No flag for now. */ @@ -402,7 +414,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, struct landlock_cred_security *new_llcred; int err; - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; /* -- GitLab From b04abf51d72505f6a102d53788200e8a38afc29d Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Thu, 7 Mar 2024 12:06:58 +0100 Subject: [PATCH 1269/2290] hwmon: (amc6821) add of_match table [ Upstream commit 3f003fda98a7a8d5f399057d92e6ed56b468657c ] Add of_match table for "ti,amc6821" compatible string. This fixes automatic driver loading by userspace when using device-tree, and if built as a module like major linux distributions do. While devices probe just fine with i2c_device_id table, userspace can't match the "ti,amc6821" compatible string from dt with the plain "amc6821" device id. As a result, the kernel module can not be loaded. Cc: stable@vger.kernel.org Signed-off-by: Josua Mayer Link: https://lore.kernel.org/r/20240307-amc6821-of-match-v1-1-5f40464a3110@solid-run.com [groeck: Cleaned up patch description] Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/amc6821.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index 3bfd12ff4b3ca..6868db4ac84f3 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -934,10 +934,21 @@ static const struct i2c_device_id amc6821_id[] = { MODULE_DEVICE_TABLE(i2c, amc6821_id); +static const struct of_device_id __maybe_unused amc6821_of_match[] = { + { + .compatible = "ti,amc6821", + .data = (void *)amc6821, + }, + { } +}; + +MODULE_DEVICE_TABLE(of, amc6821_of_match); + static struct i2c_driver amc6821_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "amc6821", + .of_match_table = of_match_ptr(amc6821_of_match), }, .probe_new = amc6821_probe, .id_table = amc6821_id, -- GitLab From fb1088d51bbaa0faec5a55d4f5818a9ab79e24df Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Thu, 15 Feb 2024 15:50:09 +0000 Subject: [PATCH 1270/2290] ext4: fix corruption during on-line resize [ Upstream commit a6b3bfe176e8a5b05ec4447404e412c2a3fc92cc ] We observed a corruption during on-line resize of a file system that is larger than 16 TiB with 4k block size. With having more then 2^32 blocks resize_inode is turned off by default by mke2fs. The issue can be reproduced on a smaller file system for convenience by explicitly turning off resize_inode. An on-line resize across an 8 GiB boundary (the size of a meta block group in this setup) then leads to a corruption: dev=/dev/ # should be >= 16 GiB mkdir -p /corruption /sbin/mke2fs -t ext4 -b 4096 -O ^resize_inode $dev $((2 * 2**21 - 2**15)) mount -t ext4 $dev /corruption dd if=/dev/zero bs=4096 of=/corruption/test count=$((2*2**21 - 4*2**15)) sha1sum /corruption/test # 79d2658b39dcfd77274e435b0934028adafaab11 /corruption/test /sbin/resize2fs $dev $((2*2**21)) # drop page cache to force reload the block from disk echo 1 > /proc/sys/vm/drop_caches sha1sum /corruption/test # 3c2abc63cbf1a94c9e6977e0fbd72cd832c4d5c3 /corruption/test 2^21 = 2^15*2^6 equals 8 GiB whereof 2^15 is the number of blocks per block group and 2^6 are the number of block groups that make a meta block group. The last checksum might be different depending on how the file is laid out across the physical blocks. The actual corruption occurs at physical block 63*2^15 = 2064384 which would be the location of the backup of the meta block group's block descriptor. During the on-line resize the file system will be converted to meta_bg starting at s_first_meta_bg which is 2 in the example - meaning all block groups after 16 GiB. However, in ext4_flex_group_add we might add block groups that are not part of the first meta block group yet. In the reproducer we achieved this by substracting the size of a whole block group from the point where the meta block group would start. This must be considered when updating the backup block group descriptors to follow the non-meta_bg layout. The fix is to add a test whether the group to add is already part of the meta block group or not. Fixes: 01f795f9e0d67 ("ext4: add online resizing support for meta_bg and 64-bit file systems") Cc: Signed-off-by: Maximilian Heyne Tested-by: Srivathsa Dara Reviewed-by: Srivathsa Dara Link: https://lore.kernel.org/r/20240215155009.94493-1-mheyne@amazon.de Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f2ed15af703a8..38ce42396758d 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1606,7 +1606,8 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); - int meta_bg = ext4_has_feature_meta_bg(sb); + int meta_bg = ext4_has_feature_meta_bg(sb) && + gdb_num >= le32_to_cpu(es->s_first_meta_bg); sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - ext4_group_first_block_no(sb, 0); sector_t old_gdb = 0; -- GitLab From 08eceec7931eb03956fec7910bc798c2cbc4586b Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Sat, 24 Feb 2024 11:40:23 +0000 Subject: [PATCH 1271/2290] nvmem: meson-efuse: fix function pointer type mismatch [ Upstream commit cbd38332c140829ab752ba4e727f98be5c257f18 ] clang-16 warns about casting functions to incompatible types, as is done here to call clk_disable_unprepare: drivers/nvmem/meson-efuse.c:78:12: error: cast from 'void (*)(struct clk *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 78 | (void(*)(void *))clk_disable_unprepare, The pattern of getting, enabling and setting a disable callback for a clock can be replaced with devm_clk_get_enabled(), which also fixes this warning. Fixes: 611fbca1c861 ("nvmem: meson-efuse: add peripheral clock") Cc: Stable@vger.kernel.org Reported-by: Arnd Bergmann Signed-off-by: Jerome Brunet Reviewed-by: Martin Blumenstingl Acked-by: Arnd Bergmann Reviewed-by: Justin Stitt Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240224114023.85535-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/nvmem/meson-efuse.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/nvmem/meson-efuse.c b/drivers/nvmem/meson-efuse.c index d6b533497ce1a..ba2714bef8d0e 100644 --- a/drivers/nvmem/meson-efuse.c +++ b/drivers/nvmem/meson-efuse.c @@ -47,7 +47,6 @@ static int meson_efuse_probe(struct platform_device *pdev) struct nvmem_config *econfig; struct clk *clk; unsigned int size; - int ret; sm_np = of_parse_phandle(pdev->dev.of_node, "secure-monitor", 0); if (!sm_np) { @@ -60,27 +59,9 @@ static int meson_efuse_probe(struct platform_device *pdev) if (!fw) return -EPROBE_DEFER; - clk = devm_clk_get(dev, NULL); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to get efuse gate"); - return ret; - } - - ret = clk_prepare_enable(clk); - if (ret) { - dev_err(dev, "failed to enable gate"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - clk); - if (ret) { - dev_err(dev, "failed to add disable callback"); - return ret; - } + clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "failed to get efuse gate"); if (meson_sm_call(fw, SM_EFUSE_USER_MAX, &size, 0, 0, 0, 0, 0) < 0) { dev_err(dev, "failed to get max user"); -- GitLab From b4139fe933df78215b8d60b8c4f703bd7553edc2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 24 Feb 2024 11:41:37 +0000 Subject: [PATCH 1272/2290] slimbus: core: Remove usage of the deprecated ida_simple_xx() API [ Upstream commit 89ffa4cccec54467446f141a79b9e36893079fb8 ] ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_range() is inclusive. So change this change allows one more device. Previously address 0xFE was never used. Fixes: 46a2bb5a7f7e ("slimbus: core: Add slim controllers support") Cc: Stable@vger.kernel.org Signed-off-by: Christophe JAILLET Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240224114137.85781-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/slimbus/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c index 219483b79c09c..37fd655994ef3 100644 --- a/drivers/slimbus/core.c +++ b/drivers/slimbus/core.c @@ -436,8 +436,8 @@ static int slim_device_alloc_laddr(struct slim_device *sbdev, if (ret < 0) goto err; } else if (report_present) { - ret = ida_simple_get(&ctrl->laddr_ida, - 0, SLIM_LA_MANAGER - 1, GFP_KERNEL); + ret = ida_alloc_max(&ctrl->laddr_ida, + SLIM_LA_MANAGER - 1, GFP_KERNEL); if (ret < 0) goto err; -- GitLab From 9cb3ace502385ef33581e3f21c749b768dec5070 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Thu, 7 Mar 2024 11:03:27 +0800 Subject: [PATCH 1273/2290] phy: tegra: xusb: Add API to retrieve the port number of phy [ Upstream commit d843f031d9e90462253015bc0bd9e3852d206bf2 ] This patch introduces a new API, tegra_xusb_padctl_get_port_number, to the Tegra XUSB Pad Controller driver. This API is used to identify the USB port that is associated with a given PHY. The function takes a PHY pointer for either a USB2 PHY or USB3 PHY as input and returns the corresponding port number. If the PHY pointer is invalid, it returns -ENODEV. Cc: stable@vger.kernel.org Signed-off-by: Wayne Chang Reviewed-by: Jon Hunter Tested-by: Jon Hunter Link: https://lore.kernel.org/r/20240307030328.1487748-2-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/phy/tegra/xusb.c | 13 +++++++++++++ include/linux/phy/tegra/xusb.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 4d5b4071d47d5..dc22b1dd2c8ba 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -1518,6 +1518,19 @@ int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, } EXPORT_SYMBOL_GPL(tegra_xusb_padctl_get_usb3_companion); +int tegra_xusb_padctl_get_port_number(struct phy *phy) +{ + struct tegra_xusb_lane *lane; + + if (!phy) + return -ENODEV; + + lane = phy_get_drvdata(phy); + + return lane->index; +} +EXPORT_SYMBOL_GPL(tegra_xusb_padctl_get_port_number); + MODULE_AUTHOR("Thierry Reding "); MODULE_DESCRIPTION("Tegra XUSB Pad Controller driver"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 70998e6dd6fdc..6ca51e0080ec0 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -26,6 +26,7 @@ void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); +int tegra_xusb_padctl_get_port_number(struct phy *phy); int tegra_xusb_padctl_enable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy, enum usb_device_speed speed); int tegra_xusb_padctl_disable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy); -- GitLab From bf1eef7fc0f32d476e32166a2e06a45e172eb1c7 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Thu, 7 Mar 2024 11:03:28 +0800 Subject: [PATCH 1274/2290] usb: gadget: tegra-xudc: Fix USB3 PHY retrieval logic [ Upstream commit 84fa943d93c31ee978355e6c6c69592dae3c9f59 ] This commit resolves an issue in the tegra-xudc USB gadget driver that incorrectly fetched USB3 PHY instances. The problem stemmed from the assumption of a one-to-one correspondence between USB2 and USB3 PHY names and their association with physical USB ports in the device tree. Previously, the driver associated USB3 PHY names directly with the USB3 instance number, leading to mismatches when mapping the physical USB ports. For instance, if using USB3-1 PHY, the driver expect the corresponding PHY name as 'usb3-1'. However, the physical USB ports in the device tree were designated as USB2-0 and USB3-0 as we only have one device controller, causing a misalignment. This commit rectifies the issue by adjusting the PHY naming logic. Now, the driver correctly correlates the USB2 and USB3 PHY instances, allowing the USB2-0 and USB3-1 PHYs to form a physical USB port pair while accurately reflecting their configuration in the device tree by naming them USB2-0 and USB3-0, respectively. The change ensures that the PHY and PHY names align appropriately, resolving the mismatch between physical USB ports and their associated names in the device tree. Fixes: b4e19931c98a ("usb: gadget: tegra-xudc: Support multiple device modes") Cc: stable@vger.kernel.org Signed-off-by: Wayne Chang Reviewed-by: Jon Hunter Tested-by: Jon Hunter Link: https://lore.kernel.org/r/20240307030328.1487748-3-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/tegra-xudc.c | 39 ++++++++++++++++++----------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index a8cadc45c65aa..fd7a9535973ed 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3486,8 +3486,8 @@ static void tegra_xudc_device_params_init(struct tegra_xudc *xudc) static int tegra_xudc_phy_get(struct tegra_xudc *xudc) { - int err = 0, usb3; - unsigned int i; + int err = 0, usb3_companion_port; + unsigned int i, j; xudc->utmi_phy = devm_kcalloc(xudc->dev, xudc->soc->num_phys, sizeof(*xudc->utmi_phy), GFP_KERNEL); @@ -3515,7 +3515,7 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc) if (IS_ERR(xudc->utmi_phy[i])) { err = PTR_ERR(xudc->utmi_phy[i]); dev_err_probe(xudc->dev, err, - "failed to get usb2-%d PHY\n", i); + "failed to get PHY for phy-name usb2-%d\n", i); goto clean_up; } else if (xudc->utmi_phy[i]) { /* Get usb-phy, if utmi phy is available */ @@ -3534,19 +3534,30 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc) } /* Get USB3 phy */ - usb3 = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i); - if (usb3 < 0) + usb3_companion_port = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i); + if (usb3_companion_port < 0) continue; - snprintf(phy_name, sizeof(phy_name), "usb3-%d", usb3); - xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name); - if (IS_ERR(xudc->usb3_phy[i])) { - err = PTR_ERR(xudc->usb3_phy[i]); - dev_err_probe(xudc->dev, err, - "failed to get usb3-%d PHY\n", usb3); - goto clean_up; - } else if (xudc->usb3_phy[i]) - dev_dbg(xudc->dev, "usb3-%d PHY registered", usb3); + for (j = 0; j < xudc->soc->num_phys; j++) { + snprintf(phy_name, sizeof(phy_name), "usb3-%d", j); + xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name); + if (IS_ERR(xudc->usb3_phy[i])) { + err = PTR_ERR(xudc->usb3_phy[i]); + dev_err_probe(xudc->dev, err, + "failed to get PHY for phy-name usb3-%d\n", j); + goto clean_up; + } else if (xudc->usb3_phy[i]) { + int usb2_port = + tegra_xusb_padctl_get_port_number(xudc->utmi_phy[i]); + int usb3_port = + tegra_xusb_padctl_get_port_number(xudc->usb3_phy[i]); + if (usb3_port == usb3_companion_port) { + dev_dbg(xudc->dev, "USB2 port %d is paired with USB3 port %d for device mode port %d\n", + usb2_port, usb3_port, i); + break; + } + } + } } return err; -- GitLab From 768625f84b2a6479d4e8f0bd599c53b1d5a35347 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 4 Feb 2024 16:57:36 +0100 Subject: [PATCH 1275/2290] speakup: Fix 8bit characters from direct synth [ Upstream commit b6c8dafc9d86eb77e502bb018ec4105e8d2fbf78 ] When userland echoes 8bit characters to /dev/synth with e.g. echo -e '\xe9' > /dev/synth synth_write would get characters beyond 0x7f, and thus negative when char is signed. When given to synth_buffer_add which takes a u16, this would sign-extend and produce a U+ffxy character rather than U+xy. Users thus get garbled text instead of accents in their output. Let's fix this by making sure that we read unsigned characters. Signed-off-by: Samuel Thibault Fixes: 89fc2ae80bb1 ("speakup: extend synth buffer to 16bit unicode characters") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240204155736.2oh4ot7tiaa2wpbh@begin Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/accessibility/speakup/synth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c index eea2a2fa4f015..45f9061031338 100644 --- a/drivers/accessibility/speakup/synth.c +++ b/drivers/accessibility/speakup/synth.c @@ -208,8 +208,10 @@ void spk_do_flush(void) wake_up_process(speakup_task); } -void synth_write(const char *buf, size_t count) +void synth_write(const char *_buf, size_t count) { + const unsigned char *buf = (const unsigned char *) _buf; + while (count--) synth_buffer_add(*buf++); synth_start(); -- GitLab From 3a342fa31840894b28d23dc29f0eaee2eb6862e0 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 12 Feb 2024 13:01:35 +0100 Subject: [PATCH 1276/2290] PCI/AER: Block runtime suspend when handling errors [ Upstream commit 002bf2fbc00e5c4b95fb167287e2ae7d1973281e ] PM runtime can be done simultaneously with AER error handling. Avoid that by using pm_runtime_get_sync() before and pm_runtime_put() after reset in pcie_do_recovery() for all recovering devices. pm_runtime_get_sync() will increase dev->power.usage_count counter to prevent any possible future request to runtime suspend a device. It will also resume a device, if it was previously in D3hot state. I tested with igc device by doing simultaneous aer_inject and rpm suspend/resume via /sys/bus/pci/devices/PCI_ID/power/control and can reproduce: igc 0000:02:00.0: not ready 65535ms after bus reset; giving up pcieport 0000:00:1c.2: AER: Root Port link has been reset (-25) pcieport 0000:00:1c.2: AER: subordinate device reset failed pcieport 0000:00:1c.2: AER: device recovery failed igc 0000:02:00.0: Unable to change power state from D3hot to D0, device inaccessible The problem disappears when this patch is applied. Link: https://lore.kernel.org/r/20240212120135.146068-1-stanislaw.gruszka@linux.intel.com Signed-off-by: Stanislaw Gruszka Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Rafael J. Wysocki Cc: Signed-off-by: Sasha Levin --- drivers/pci/pcie/err.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 59c90d04a609a..705893b5f7b09 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -13,6 +13,7 @@ #define dev_fmt(fmt) "AER: " fmt #include +#include #include #include #include @@ -85,6 +86,18 @@ static int report_error_detected(struct pci_dev *dev, return 0; } +static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data) +{ + pm_runtime_get_sync(&pdev->dev); + return 0; +} + +static int pci_pm_runtime_put(struct pci_dev *pdev, void *data) +{ + pm_runtime_put(&pdev->dev); + return 0; +} + static int report_frozen_detected(struct pci_dev *dev, void *data) { return report_error_detected(dev, pci_channel_io_frozen, data); @@ -207,6 +220,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, else bridge = pci_upstream_bridge(dev); + pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL); + pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); @@ -251,10 +266,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pcie_clear_device_status(dev); pci_aer_clear_nonfatal_status(dev); } + + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_info(bridge, "device recovery successful\n"); return status; failed: + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ -- GitLab From 220778af6c7285dbe7a989141074433ea66b3f98 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Mar 2024 17:48:03 -0700 Subject: [PATCH 1277/2290] io_uring/net: correctly handle multishot recvmsg retry setup [ Upstream commit deaef31bc1ec7966698a427da8c161930830e1cf ] If we loop for multishot receive on the initial attempt, and then abort later on to wait for more, we miss a case where we should be copying the io_async_msghdr from the stack to stable storage. This leads to the next retry potentially failing, if the application had the msghdr on the stack. Cc: stable@vger.kernel.org Fixes: 9bb66906f23e ("io_uring: support multishot in recvmsg") Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 0d4ee3d738fbf..b1b564c04d1e7 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -876,7 +876,8 @@ retry_multishot: kfree(kmsg->free_iov); io_netmsg_recycle(req, issue_flags); req->flags &= ~REQ_F_NEED_CLEANUP; - } + } else if (ret == -EAGAIN) + return io_setup_async_msg(req, kmsg, issue_flags); return ret; } -- GitLab From bda2265e6d1c44fc974498bd217fd8a2bddf2d9c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Jul 2023 14:45:20 -0600 Subject: [PATCH 1278/2290] sparc: Explicitly include correct DT includes [ Upstream commit 263291fa44ff0909b5b7c43ff40babc1c43362f2 ] The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it was merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Acked-by: Sam Ravnborg Link: https://lore.kernel.org/all/20230718143211.1066810-1-robh@kernel.org/ Signed-off-by: Rob Herring Stable-dep-of: 91d3ff922c34 ("sparc32: Fix parport build with sparc32") Signed-off-by: Sasha Levin --- arch/sparc/crypto/crop_devid.c | 2 +- arch/sparc/include/asm/floppy_32.h | 2 +- arch/sparc/include/asm/floppy_64.h | 2 +- arch/sparc/include/asm/parport.h | 3 ++- arch/sparc/kernel/apc.c | 2 +- arch/sparc/kernel/auxio_32.c | 1 - arch/sparc/kernel/auxio_64.c | 3 ++- arch/sparc/kernel/central.c | 2 +- arch/sparc/kernel/chmc.c | 3 ++- arch/sparc/kernel/ioport.c | 2 +- arch/sparc/kernel/leon_kernel.c | 2 -- arch/sparc/kernel/leon_pci.c | 3 ++- arch/sparc/kernel/leon_pci_grpci1.c | 3 ++- arch/sparc/kernel/leon_pci_grpci2.c | 4 +++- arch/sparc/kernel/of_device_32.c | 2 +- arch/sparc/kernel/of_device_64.c | 4 ++-- arch/sparc/kernel/of_device_common.c | 4 ++-- arch/sparc/kernel/pci.c | 3 ++- arch/sparc/kernel/pci_common.c | 3 ++- arch/sparc/kernel/pci_fire.c | 3 ++- arch/sparc/kernel/pci_impl.h | 1 - arch/sparc/kernel/pci_msi.c | 2 ++ arch/sparc/kernel/pci_psycho.c | 4 +++- arch/sparc/kernel/pci_sun4v.c | 3 ++- arch/sparc/kernel/pmc.c | 2 +- arch/sparc/kernel/power.c | 3 ++- arch/sparc/kernel/prom_irqtrans.c | 1 + arch/sparc/kernel/psycho_common.c | 1 + arch/sparc/kernel/sbus.c | 3 ++- arch/sparc/kernel/time_32.c | 1 - arch/sparc/mm/io-unit.c | 3 ++- arch/sparc/mm/iommu.c | 5 +++-- 32 files changed, 49 insertions(+), 33 deletions(-) diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c index 83fc4536dcd57..93f4e0fdd38c1 100644 --- a/arch/sparc/crypto/crop_devid.c +++ b/arch/sparc/crypto/crop_devid.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include -#include /* This is a dummy device table linked into all of the crypto * opcode drivers. It serves to trigger the module autoloading diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index e10ab9ad3097d..836f6575aa1d7 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -8,7 +8,7 @@ #define __ASM_SPARC_FLOPPY_H #include -#include +#include #include #include diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index 070c8c1f5c8fd..6efeb24b0a92c 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -11,7 +11,7 @@ #define __ASM_SPARC64_FLOPPY_H #include -#include +#include #include #include diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h index 03b27090c0c8c..0a7ffcfd59cda 100644 --- a/arch/sparc/include/asm/parport.h +++ b/arch/sparc/include/asm/parport.h @@ -7,7 +7,8 @@ #ifndef _ASM_SPARC64_PARPORT_H #define _ASM_SPARC64_PARPORT_H 1 -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index ecd05bc0a1045..d44725d37e30f 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/auxio_32.c b/arch/sparc/kernel/auxio_32.c index a32d588174f2f..989860e890c4f 100644 --- a/arch/sparc/kernel/auxio_32.c +++ b/arch/sparc/kernel/auxio_32.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c index 774a82b0c649f..2a2800d213256 100644 --- a/arch/sparc/kernel/auxio_64.c +++ b/arch/sparc/kernel/auxio_64.c @@ -10,7 +10,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c index 23f8838dd96e3..a1a6485c91831 100644 --- a/arch/sparc/kernel/central.c +++ b/arch/sparc/kernel/central.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c index 6ff43df740e08..d5fad5fb04c1d 100644 --- a/arch/sparc/kernel/chmc.c +++ b/arch/sparc/kernel/chmc.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 4e4f3d3263e46..e5a327799e574 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 39229940d725d..4c61da491fee1 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -8,9 +8,7 @@ #include #include #include -#include #include -#include #include #include diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index e5e5ff6b9a5c5..3a73bc466f95d 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -7,7 +7,8 @@ * Code is partially derived from pcic.c */ -#include +#include +#include #include #include #include diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c index c32590bdd3120..b2b639bee0684 100644 --- a/arch/sparc/kernel/leon_pci_grpci1.c +++ b/arch/sparc/kernel/leon_pci_grpci1.c @@ -13,10 +13,11 @@ * Contributors: Daniel Hellstrom */ -#include #include #include +#include #include +#include #include #include diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index dd06abc61657f..ac2acd62a24ec 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -6,12 +6,14 @@ * */ -#include #include #include #include #include #include +#include +#include + #include #include #include diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 4ebf51e6e78ec..9ac6853b34c1b 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -7,8 +7,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 5a9f86b1d4e7e..a8ccd7260fe7f 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include #include @@ -9,8 +8,9 @@ #include #include #include -#include +#include #include +#include #include #include "of_device_common.h" diff --git a/arch/sparc/kernel/of_device_common.c b/arch/sparc/kernel/of_device_common.c index e717a56efc5d3..a09724381bd40 100644 --- a/arch/sparc/kernel/of_device_common.c +++ b/arch/sparc/kernel/of_device_common.c @@ -1,15 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-only #include #include -#include #include #include #include #include +#include #include #include -#include #include +#include #include "of_device_common.h" diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index cb1ef25116e94..5637b37ba9114 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -20,8 +20,9 @@ #include #include #include -#include +#include #include +#include #include #include diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c index 4759ccd542fe6..5eeec9ad68457 100644 --- a/arch/sparc/kernel/pci_common.c +++ b/arch/sparc/kernel/pci_common.c @@ -8,7 +8,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c index 0ca08d455e805..0b91bde80fdc5 100644 --- a/arch/sparc/kernel/pci_fire.c +++ b/arch/sparc/kernel/pci_fire.c @@ -10,7 +10,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h index 4e3d15189fa95..f31761f517575 100644 --- a/arch/sparc/kernel/pci_impl.h +++ b/arch/sparc/kernel/pci_impl.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 9ed11985768e1..fc7402948b7bc 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -5,6 +5,8 @@ */ #include #include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c index f413371da3871..1efc98305ec76 100644 --- a/arch/sparc/kernel/pci_psycho.c +++ b/arch/sparc/kernel/pci_psycho.c @@ -13,7 +13,9 @@ #include #include #include -#include +#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 3844809718052..0ddef827e0f99 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c index b5c1eb33b9518..69a0206e56f01 100644 --- a/arch/sparc/kernel/pmc.c +++ b/arch/sparc/kernel/pmc.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c index d941875dd7186..2f6c909e1755d 100644 --- a/arch/sparc/kernel/power.c +++ b/arch/sparc/kernel/power.c @@ -9,7 +9,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/prom_irqtrans.c b/arch/sparc/kernel/prom_irqtrans.c index 28aff1c524b58..426bd08cb2ab1 100644 --- a/arch/sparc/kernel/prom_irqtrans.c +++ b/arch/sparc/kernel/prom_irqtrans.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/arch/sparc/kernel/psycho_common.c b/arch/sparc/kernel/psycho_common.c index e90bcb6bad7fc..5ee74b4c0cf40 100644 --- a/arch/sparc/kernel/psycho_common.c +++ b/arch/sparc/kernel/psycho_common.c @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c index 32141e1006c4a..0bababf6f2bcd 100644 --- a/arch/sparc/kernel/sbus.c +++ b/arch/sparc/kernel/sbus.c @@ -14,7 +14,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 8a08830e4a653..79934beba03a6 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index bf3e6d2fe5d94..3afbbe5fba46b 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -13,7 +13,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 9e3f6933ca13f..14e178bfe33ab 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -7,14 +7,15 @@ * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ - + #include #include #include #include #include #include -#include +#include +#include #include #include -- GitLab From f3ed58d7b3566d7c74df36dea79719b5aaf0df76 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 24 Feb 2024 18:42:27 +0100 Subject: [PATCH 1279/2290] sparc32: Fix parport build with sparc32 [ Upstream commit 91d3ff922c346d6d8cb8de5ff8d504fe0ca9e17e ] include/asm/parport.h is sparc64 specific. Rename it to parport_64.h and use the generic version for sparc32. This fixed all{mod,yes}config build errors like: parport_pc.c:(.text):undefined-reference-to-ebus_dma_enable parport_pc.c:(.text):undefined-reference-to-ebus_dma_irq_enable parport_pc.c:(.text):undefined-reference-to-ebus_dma_register The errors occur as the sparc32 build references sparc64 symbols. Signed-off-by: Sam Ravnborg Cc: "David S. Miller" Cc: Andreas Larsson Cc: Randy Dunlap Cc: Maciej W. Rozycki Closes: https://lore.kernel.org/r/20230406160548.25721-1-rdunlap@infradead.org/ Fixes: 66bcd06099bb ("parport_pc: Also enable driver for PCI systems") Cc: stable@vger.kernel.org # v5.18+ Tested-by: Randy Dunlap # build-tested Reviewed-by: Andreas Larsson Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240224-sam-fix-sparc32-all-builds-v2-6-1f186603c5c4@ravnborg.org Signed-off-by: Sasha Levin --- arch/sparc/include/asm/parport.h | 259 +--------------------------- arch/sparc/include/asm/parport_64.h | 256 +++++++++++++++++++++++++++ 2 files changed, 263 insertions(+), 252 deletions(-) create mode 100644 arch/sparc/include/asm/parport_64.h diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h index 0a7ffcfd59cda..e2eed8f97665f 100644 --- a/arch/sparc/include/asm/parport.h +++ b/arch/sparc/include/asm/parport.h @@ -1,256 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* parport.h: sparc64 specific parport initialization and dma. - * - * Copyright (C) 1999 Eddie C. Dost (ecd@skynet.be) - */ +#ifndef ___ASM_SPARC_PARPORT_H +#define ___ASM_SPARC_PARPORT_H -#ifndef _ASM_SPARC64_PARPORT_H -#define _ASM_SPARC64_PARPORT_H 1 - -#include -#include - -#include -#include -#include - -#define PARPORT_PC_MAX_PORTS PARPORT_MAX - -/* - * While sparc64 doesn't have an ISA DMA API, we provide something that looks - * close enough to make parport_pc happy - */ -#define HAS_DMA - -#ifdef CONFIG_PARPORT_PC_FIFO -static DEFINE_SPINLOCK(dma_spin_lock); - -#define claim_dma_lock() \ -({ unsigned long flags; \ - spin_lock_irqsave(&dma_spin_lock, flags); \ - flags; \ -}) - -#define release_dma_lock(__flags) \ - spin_unlock_irqrestore(&dma_spin_lock, __flags); +#if defined(__sparc__) && defined(__arch64__) +#include +#else +#include +#endif #endif -static struct sparc_ebus_info { - struct ebus_dma_info info; - unsigned int addr; - unsigned int count; - int lock; - - struct parport *port; -} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS]; - -static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS); - -static inline int request_dma(unsigned int dmanr, const char *device_id) -{ - if (dmanr >= PARPORT_PC_MAX_PORTS) - return -EINVAL; - if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0) - return -EBUSY; - return 0; -} - -static inline void free_dma(unsigned int dmanr) -{ - if (dmanr >= PARPORT_PC_MAX_PORTS) { - printk(KERN_WARNING "Trying to free DMA%d\n", dmanr); - return; - } - if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) { - printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr); - return; - } -} - -static inline void enable_dma(unsigned int dmanr) -{ - ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1); - - if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info, - sparc_ebus_dmas[dmanr].addr, - sparc_ebus_dmas[dmanr].count)) - BUG(); -} - -static inline void disable_dma(unsigned int dmanr) -{ - ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0); -} - -static inline void clear_dma_ff(unsigned int dmanr) -{ - /* nothing */ -} - -static inline void set_dma_mode(unsigned int dmanr, char mode) -{ - ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE)); -} - -static inline void set_dma_addr(unsigned int dmanr, unsigned int addr) -{ - sparc_ebus_dmas[dmanr].addr = addr; -} - -static inline void set_dma_count(unsigned int dmanr, unsigned int count) -{ - sparc_ebus_dmas[dmanr].count = count; -} - -static inline unsigned int get_dma_residue(unsigned int dmanr) -{ - return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info); -} - -static int ecpp_probe(struct platform_device *op) -{ - unsigned long base = op->resource[0].start; - unsigned long config = op->resource[1].start; - unsigned long d_base = op->resource[2].start; - unsigned long d_len; - struct device_node *parent; - struct parport *p; - int slot, err; - - parent = op->dev.of_node->parent; - if (of_node_name_eq(parent, "dma")) { - p = parport_pc_probe_port(base, base + 0x400, - op->archdata.irqs[0], PARPORT_DMA_NOFIFO, - op->dev.parent->parent, 0); - if (!p) - return -ENOMEM; - dev_set_drvdata(&op->dev, p); - return 0; - } - - for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) { - if (!test_and_set_bit(slot, dma_slot_map)) - break; - } - err = -ENODEV; - if (slot >= PARPORT_PC_MAX_PORTS) - goto out_err; - - spin_lock_init(&sparc_ebus_dmas[slot].info.lock); - - d_len = (op->resource[2].end - d_base) + 1UL; - sparc_ebus_dmas[slot].info.regs = - of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA"); - - if (!sparc_ebus_dmas[slot].info.regs) - goto out_clear_map; - - sparc_ebus_dmas[slot].info.flags = 0; - sparc_ebus_dmas[slot].info.callback = NULL; - sparc_ebus_dmas[slot].info.client_cookie = NULL; - sparc_ebus_dmas[slot].info.irq = 0xdeadbeef; - strcpy(sparc_ebus_dmas[slot].info.name, "parport"); - if (ebus_dma_register(&sparc_ebus_dmas[slot].info)) - goto out_unmap_regs; - - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1); - - /* Configure IRQ to Push Pull, Level Low */ - /* Enable ECP, set bit 2 of the CTR first */ - outb(0x04, base + 0x02); - ns87303_modify(config, PCR, - PCR_EPP_ENABLE | - PCR_IRQ_ODRAIN, - PCR_ECP_ENABLE | - PCR_ECP_CLK_ENA | - PCR_IRQ_POLAR); - - /* CTR bit 5 controls direction of port */ - ns87303_modify(config, PTR, - 0, PTR_LPT_REG_DIR); - - p = parport_pc_probe_port(base, base + 0x400, - op->archdata.irqs[0], - slot, - op->dev.parent, - 0); - err = -ENOMEM; - if (!p) - goto out_disable_irq; - - dev_set_drvdata(&op->dev, p); - - return 0; - -out_disable_irq: - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); - ebus_dma_unregister(&sparc_ebus_dmas[slot].info); - -out_unmap_regs: - of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len); - -out_clear_map: - clear_bit(slot, dma_slot_map); - -out_err: - return err; -} - -static int ecpp_remove(struct platform_device *op) -{ - struct parport *p = dev_get_drvdata(&op->dev); - int slot = p->dma; - - parport_pc_unregister_port(p); - - if (slot != PARPORT_DMA_NOFIFO) { - unsigned long d_base = op->resource[2].start; - unsigned long d_len; - - d_len = (op->resource[2].end - d_base) + 1UL; - - ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); - ebus_dma_unregister(&sparc_ebus_dmas[slot].info); - of_iounmap(&op->resource[2], - sparc_ebus_dmas[slot].info.regs, - d_len); - clear_bit(slot, dma_slot_map); - } - - return 0; -} - -static const struct of_device_id ecpp_match[] = { - { - .name = "ecpp", - }, - { - .name = "parallel", - .compatible = "ecpp", - }, - { - .name = "parallel", - .compatible = "ns87317-ecpp", - }, - { - .name = "parallel", - .compatible = "pnpALI,1533,3", - }, - {}, -}; - -static struct platform_driver ecpp_driver = { - .driver = { - .name = "ecpp", - .of_match_table = ecpp_match, - }, - .probe = ecpp_probe, - .remove = ecpp_remove, -}; - -static int parport_pc_find_nonpci_ports(int autoirq, int autodma) -{ - return platform_driver_register(&ecpp_driver); -} - -#endif /* !(_ASM_SPARC64_PARPORT_H */ diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h new file mode 100644 index 0000000000000..0a7ffcfd59cda --- /dev/null +++ b/arch/sparc/include/asm/parport_64.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* parport.h: sparc64 specific parport initialization and dma. + * + * Copyright (C) 1999 Eddie C. Dost (ecd@skynet.be) + */ + +#ifndef _ASM_SPARC64_PARPORT_H +#define _ASM_SPARC64_PARPORT_H 1 + +#include +#include + +#include +#include +#include + +#define PARPORT_PC_MAX_PORTS PARPORT_MAX + +/* + * While sparc64 doesn't have an ISA DMA API, we provide something that looks + * close enough to make parport_pc happy + */ +#define HAS_DMA + +#ifdef CONFIG_PARPORT_PC_FIFO +static DEFINE_SPINLOCK(dma_spin_lock); + +#define claim_dma_lock() \ +({ unsigned long flags; \ + spin_lock_irqsave(&dma_spin_lock, flags); \ + flags; \ +}) + +#define release_dma_lock(__flags) \ + spin_unlock_irqrestore(&dma_spin_lock, __flags); +#endif + +static struct sparc_ebus_info { + struct ebus_dma_info info; + unsigned int addr; + unsigned int count; + int lock; + + struct parport *port; +} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS]; + +static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS); + +static inline int request_dma(unsigned int dmanr, const char *device_id) +{ + if (dmanr >= PARPORT_PC_MAX_PORTS) + return -EINVAL; + if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0) + return -EBUSY; + return 0; +} + +static inline void free_dma(unsigned int dmanr) +{ + if (dmanr >= PARPORT_PC_MAX_PORTS) { + printk(KERN_WARNING "Trying to free DMA%d\n", dmanr); + return; + } + if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) { + printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr); + return; + } +} + +static inline void enable_dma(unsigned int dmanr) +{ + ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1); + + if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info, + sparc_ebus_dmas[dmanr].addr, + sparc_ebus_dmas[dmanr].count)) + BUG(); +} + +static inline void disable_dma(unsigned int dmanr) +{ + ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0); +} + +static inline void clear_dma_ff(unsigned int dmanr) +{ + /* nothing */ +} + +static inline void set_dma_mode(unsigned int dmanr, char mode) +{ + ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE)); +} + +static inline void set_dma_addr(unsigned int dmanr, unsigned int addr) +{ + sparc_ebus_dmas[dmanr].addr = addr; +} + +static inline void set_dma_count(unsigned int dmanr, unsigned int count) +{ + sparc_ebus_dmas[dmanr].count = count; +} + +static inline unsigned int get_dma_residue(unsigned int dmanr) +{ + return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info); +} + +static int ecpp_probe(struct platform_device *op) +{ + unsigned long base = op->resource[0].start; + unsigned long config = op->resource[1].start; + unsigned long d_base = op->resource[2].start; + unsigned long d_len; + struct device_node *parent; + struct parport *p; + int slot, err; + + parent = op->dev.of_node->parent; + if (of_node_name_eq(parent, "dma")) { + p = parport_pc_probe_port(base, base + 0x400, + op->archdata.irqs[0], PARPORT_DMA_NOFIFO, + op->dev.parent->parent, 0); + if (!p) + return -ENOMEM; + dev_set_drvdata(&op->dev, p); + return 0; + } + + for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) { + if (!test_and_set_bit(slot, dma_slot_map)) + break; + } + err = -ENODEV; + if (slot >= PARPORT_PC_MAX_PORTS) + goto out_err; + + spin_lock_init(&sparc_ebus_dmas[slot].info.lock); + + d_len = (op->resource[2].end - d_base) + 1UL; + sparc_ebus_dmas[slot].info.regs = + of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA"); + + if (!sparc_ebus_dmas[slot].info.regs) + goto out_clear_map; + + sparc_ebus_dmas[slot].info.flags = 0; + sparc_ebus_dmas[slot].info.callback = NULL; + sparc_ebus_dmas[slot].info.client_cookie = NULL; + sparc_ebus_dmas[slot].info.irq = 0xdeadbeef; + strcpy(sparc_ebus_dmas[slot].info.name, "parport"); + if (ebus_dma_register(&sparc_ebus_dmas[slot].info)) + goto out_unmap_regs; + + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1); + + /* Configure IRQ to Push Pull, Level Low */ + /* Enable ECP, set bit 2 of the CTR first */ + outb(0x04, base + 0x02); + ns87303_modify(config, PCR, + PCR_EPP_ENABLE | + PCR_IRQ_ODRAIN, + PCR_ECP_ENABLE | + PCR_ECP_CLK_ENA | + PCR_IRQ_POLAR); + + /* CTR bit 5 controls direction of port */ + ns87303_modify(config, PTR, + 0, PTR_LPT_REG_DIR); + + p = parport_pc_probe_port(base, base + 0x400, + op->archdata.irqs[0], + slot, + op->dev.parent, + 0); + err = -ENOMEM; + if (!p) + goto out_disable_irq; + + dev_set_drvdata(&op->dev, p); + + return 0; + +out_disable_irq: + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); + ebus_dma_unregister(&sparc_ebus_dmas[slot].info); + +out_unmap_regs: + of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len); + +out_clear_map: + clear_bit(slot, dma_slot_map); + +out_err: + return err; +} + +static int ecpp_remove(struct platform_device *op) +{ + struct parport *p = dev_get_drvdata(&op->dev); + int slot = p->dma; + + parport_pc_unregister_port(p); + + if (slot != PARPORT_DMA_NOFIFO) { + unsigned long d_base = op->resource[2].start; + unsigned long d_len; + + d_len = (op->resource[2].end - d_base) + 1UL; + + ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0); + ebus_dma_unregister(&sparc_ebus_dmas[slot].info); + of_iounmap(&op->resource[2], + sparc_ebus_dmas[slot].info.regs, + d_len); + clear_bit(slot, dma_slot_map); + } + + return 0; +} + +static const struct of_device_id ecpp_match[] = { + { + .name = "ecpp", + }, + { + .name = "parallel", + .compatible = "ecpp", + }, + { + .name = "parallel", + .compatible = "ns87317-ecpp", + }, + { + .name = "parallel", + .compatible = "pnpALI,1533,3", + }, + {}, +}; + +static struct platform_driver ecpp_driver = { + .driver = { + .name = "ecpp", + .of_match_table = ecpp_match, + }, + .probe = ecpp_probe, + .remove = ecpp_remove, +}; + +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ + return platform_driver_register(&ecpp_driver); +} + +#endif /* !(_ASM_SPARC64_PARPORT_H */ -- GitLab From 3abc2d160ed8213948b147295d77d44a22c88fa3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Mar 2024 11:49:57 -0500 Subject: [PATCH 1280/2290] nfs: fix UAF in direct writes [ Upstream commit 17f46b803d4f23c66cacce81db35fef3adb8f2af ] In production we have been hitting the following warning consistently ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 17 PID: 1800359 at lib/refcount.c:28 refcount_warn_saturate+0x9c/0xe0 Workqueue: nfsiod nfs_direct_write_schedule_work [nfs] RIP: 0010:refcount_warn_saturate+0x9c/0xe0 PKRU: 55555554 Call Trace: ? __warn+0x9f/0x130 ? refcount_warn_saturate+0x9c/0xe0 ? report_bug+0xcc/0x150 ? handle_bug+0x3d/0x70 ? exc_invalid_op+0x16/0x40 ? asm_exc_invalid_op+0x16/0x20 ? refcount_warn_saturate+0x9c/0xe0 nfs_direct_write_schedule_work+0x237/0x250 [nfs] process_one_work+0x12f/0x4a0 worker_thread+0x14e/0x3b0 ? ZSTD_getCParams_internal+0x220/0x220 kthread+0xdc/0x120 ? __btf_name_valid+0xa0/0xa0 ret_from_fork+0x1f/0x30 This is because we're completing the nfs_direct_request twice in a row. The source of this is when we have our commit requests to submit, we process them and send them off, and then in the completion path for the commit requests we have if (nfs_commit_end(cinfo.mds)) nfs_direct_write_complete(dreq); However since we're submitting asynchronous requests we sometimes have one that completes before we submit the next one, so we end up calling complete on the nfs_direct_request twice. The only other place we use nfs_generic_commit_list() is in __nfs_commit_inode, which wraps this call in a nfs_commit_begin(); nfs_commit_end(); Which is a common pattern for this style of completion handling, one that is also repeated in the direct code with get_dreq()/put_dreq() calls around where we process events as well as in the completion paths. Fix this by using the same pattern for the commit requests. Before with my 200 node rocksdb stress running this warning would pop every 10ish minutes. With my patch the stress test has been running for several hours without popping. Signed-off-by: Josef Bacik Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/direct.c | 11 +++++++++-- fs/nfs/write.c | 2 +- include/linux/nfs_fs.h | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8fdb65e1b14a3..b555efca01d20 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -647,10 +647,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) LIST_HEAD(mds_list); nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_commit_begin(cinfo.mds); nfs_scan_commit(dreq->inode, &mds_list, &cinfo); res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); - if (res < 0) /* res == -ENOMEM */ - nfs_direct_write_reschedule(dreq); + if (res < 0) { /* res == -ENOMEM */ + spin_lock(&dreq->lock); + if (dreq->flags == 0) + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); + } + if (nfs_commit_end(cinfo.mds)) + nfs_direct_write_complete(dreq); } static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6a06066684172..8e21caae4cae2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1656,7 +1656,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo) !atomic_read(&cinfo->rpcs_out)); } -static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) { atomic_inc(&cinfo->rpcs_out); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7931fa4725612..ac7d799d9d387 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -582,6 +582,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline bool nfs_have_writebacks(const struct inode *inode) -- GitLab From 32eaf695d3c7346fad54be26d21de90fec2f9a81 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 5 Mar 2024 15:12:47 -0700 Subject: [PATCH 1281/2290] kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1 [ Upstream commit 75b5ab134bb5f657ef7979a59106dce0657e8d87 ] Clang enables -Wenum-enum-conversion and -Wenum-compare-conditional under -Wenum-conversion. A recent change in Clang strengthened these warnings and they appear frequently in common builds, primarily due to several instances in common headers but there are quite a few drivers that have individual instances as well. include/linux/vmstat.h:508:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 508 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 509 | item]; | ~~~~ drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:955:24: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional] 955 | flags |= is_new_rate ? IWL_MAC_BEACON_CCK | ^ ~~~~~~~~~~~~~~~~~~ 956 | : IWL_MAC_BEACON_CCK_V1; | ~~~~~~~~~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:1120:21: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional] 1120 | 0) > 10 ? | ^ 1121 | IWL_MAC_BEACON_FILS : | ~~~~~~~~~~~~~~~~~~~ 1122 | IWL_MAC_BEACON_FILS_V1; | ~~~~~~~~~~~~~~~~~~~~~~ Doing arithmetic between or returning two different types of enums could be a bug, so each of the instance of the warning needs to be evaluated. Unfortunately, as mentioned above, there are many instances of this warning in many different configurations, which can break the build when CONFIG_WERROR is enabled. To avoid introducing new instances of the warnings while cleaning up the disruption for the majority of users, disable these warnings for the default build while leaving them on for W=1 builds. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2002 Link: https://github.com/llvm/llvm-project/commit/8c2ae42b3e1c6aa7c18f873edcebff7c0b45a37e Acked-by: Yonghong Song Signed-off-by: Nathan Chancellor Acked-by: Arnd Bergmann Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/Makefile.extrawarn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6bbba36c59695..fa5ef41806882 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -65,6 +65,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict) +KBUILD_CFLAGS += -Wno-enum-compare-conditional +KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif -- GitLab From a601e7a7fc8e9b80a78339d4d30335b2fde0ba21 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 16 Mar 2023 13:41:12 +0530 Subject: [PATCH 1282/2290] PCI: qcom: Rename qcom_pcie_config_sid_sm8250() to reflect IP version [ Upstream commit 1f70939871b260b52e9d1941f1cad740b7295c2c ] qcom_pcie_config_sid_sm8250() function no longer applies only to SM8250. So let's rename it to reflect the actual IP version and also move its definition to keep it sorted as per IP revisions. Link: https://lore.kernel.org/r/20230316081117.14288-15-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Lorenzo Pieralisi Stable-dep-of: bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly") Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-qcom.c | 143 ++++++++++++------------- 1 file changed, 71 insertions(+), 72 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 0ccd92faf078a..9202d2395b507 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1312,6 +1312,76 @@ static void qcom_pcie_deinit_2_7_0(struct qcom_pcie *pcie) regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies); } +static int qcom_pcie_config_sid_1_9_0(struct qcom_pcie *pcie) +{ + /* iommu map structure */ + struct { + u32 bdf; + u32 phandle; + u32 smmu_sid; + u32 smmu_sid_len; + } *map; + void __iomem *bdf_to_sid_base = pcie->parf + PARF_BDF_TO_SID_TABLE_N; + struct device *dev = pcie->pci->dev; + u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE]; + int i, nr_map, size = 0; + u32 smmu_sid_base; + + of_get_property(dev->of_node, "iommu-map", &size); + if (!size) + return 0; + + map = kzalloc(size, GFP_KERNEL); + if (!map) + return -ENOMEM; + + of_property_read_u32_array(dev->of_node, "iommu-map", (u32 *)map, + size / sizeof(u32)); + + nr_map = size / (sizeof(*map)); + + crc8_populate_msb(qcom_pcie_crc8_table, QCOM_PCIE_CRC8_POLYNOMIAL); + + /* Registers need to be zero out first */ + memset_io(bdf_to_sid_base, 0, CRC8_TABLE_SIZE * sizeof(u32)); + + /* Extract the SMMU SID base from the first entry of iommu-map */ + smmu_sid_base = map[0].smmu_sid; + + /* Look for an available entry to hold the mapping */ + for (i = 0; i < nr_map; i++) { + __be16 bdf_be = cpu_to_be16(map[i].bdf); + u32 val; + u8 hash; + + hash = crc8(qcom_pcie_crc8_table, (u8 *)&bdf_be, sizeof(bdf_be), 0); + + val = readl(bdf_to_sid_base + hash * sizeof(u32)); + + /* If the register is already populated, look for next available entry */ + while (val) { + u8 current_hash = hash++; + u8 next_mask = 0xff; + + /* If NEXT field is NULL then update it with next hash */ + if (!(val & next_mask)) { + val |= (u32)hash; + writel(val, bdf_to_sid_base + current_hash * sizeof(u32)); + } + + val = readl(bdf_to_sid_base + hash * sizeof(u32)); + } + + /* BDF [31:16] | SID [15:8] | NEXT [7:0] */ + val = map[i].bdf << 16 | (map[i].smmu_sid - smmu_sid_base) << 8 | 0; + writel(val, bdf_to_sid_base + hash * sizeof(u32)); + } + + kfree(map); + + return 0; +} + static int qcom_pcie_get_resources_2_9_0(struct qcom_pcie *pcie) { struct qcom_pcie_resources_2_9_0 *res = &pcie->res.v2_9_0; @@ -1429,77 +1499,6 @@ static int qcom_pcie_link_up(struct dw_pcie *pci) return !!(val & PCI_EXP_LNKSTA_DLLLA); } -static int qcom_pcie_config_sid_sm8250(struct qcom_pcie *pcie) -{ - /* iommu map structure */ - struct { - u32 bdf; - u32 phandle; - u32 smmu_sid; - u32 smmu_sid_len; - } *map; - void __iomem *bdf_to_sid_base = pcie->parf + PARF_BDF_TO_SID_TABLE_N; - struct device *dev = pcie->pci->dev; - u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE]; - int i, nr_map, size = 0; - u32 smmu_sid_base; - - of_get_property(dev->of_node, "iommu-map", &size); - if (!size) - return 0; - - map = kzalloc(size, GFP_KERNEL); - if (!map) - return -ENOMEM; - - of_property_read_u32_array(dev->of_node, - "iommu-map", (u32 *)map, size / sizeof(u32)); - - nr_map = size / (sizeof(*map)); - - crc8_populate_msb(qcom_pcie_crc8_table, QCOM_PCIE_CRC8_POLYNOMIAL); - - /* Registers need to be zero out first */ - memset_io(bdf_to_sid_base, 0, CRC8_TABLE_SIZE * sizeof(u32)); - - /* Extract the SMMU SID base from the first entry of iommu-map */ - smmu_sid_base = map[0].smmu_sid; - - /* Look for an available entry to hold the mapping */ - for (i = 0; i < nr_map; i++) { - __be16 bdf_be = cpu_to_be16(map[i].bdf); - u32 val; - u8 hash; - - hash = crc8(qcom_pcie_crc8_table, (u8 *)&bdf_be, sizeof(bdf_be), - 0); - - val = readl(bdf_to_sid_base + hash * sizeof(u32)); - - /* If the register is already populated, look for next available entry */ - while (val) { - u8 current_hash = hash++; - u8 next_mask = 0xff; - - /* If NEXT field is NULL then update it with next hash */ - if (!(val & next_mask)) { - val |= (u32)hash; - writel(val, bdf_to_sid_base + current_hash * sizeof(u32)); - } - - val = readl(bdf_to_sid_base + hash * sizeof(u32)); - } - - /* BDF [31:16] | SID [15:8] | NEXT [7:0] */ - val = map[i].bdf << 16 | (map[i].smmu_sid - smmu_sid_base) << 8 | 0; - writel(val, bdf_to_sid_base + hash * sizeof(u32)); - } - - kfree(map); - - return 0; -} - static int qcom_pcie_host_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); @@ -1616,7 +1615,7 @@ static const struct qcom_pcie_ops ops_1_9_0 = { .init = qcom_pcie_init_2_7_0, .deinit = qcom_pcie_deinit_2_7_0, .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, - .config_sid = qcom_pcie_config_sid_sm8250, + .config_sid = qcom_pcie_config_sid_1_9_0, }; /* Qcom IP rev.: 2.9.0 Synopsys IP rev.: 5.00a */ -- GitLab From 04f3652bd0fdad4f3486db6de59aa01d23eb01cb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 7 Mar 2024 16:35:15 +0530 Subject: [PATCH 1283/2290] PCI: qcom: Enable BDF to SID translation properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bf79e33cdd89db498e00a6131e937259de5f2705 ] Qcom SoCs making use of ARM SMMU require BDF to SID translation table in the driver to properly map the SID for the PCIe devices based on their BDF identifier. This is currently achieved with the help of qcom_pcie_config_sid_1_9_0() function for SoCs supporting the 1_9_0 config. But With newer Qcom SoCs starting from SM8450, BDF to SID translation is set to bypass mode by default in hardware. Due to this, the translation table that is set in the qcom_pcie_config_sid_1_9_0() is essentially unused and the default SID is used for all endpoints in SoCs starting from SM8450. This is a security concern and also warrants swapping the DeviceID in DT while using the GIC ITS to handle MSIs from endpoints. The swapping is currently done like below in DT when using GIC ITS: /* * MSIs for BDF (1:0.0) only works with Device ID 0x5980. * Hence, the IDs are swapped. */ msi-map = <0x0 &gic_its 0x5981 0x1>, <0x100 &gic_its 0x5980 0x1>; Here, swapping of the DeviceIDs ensure that the endpoint with BDF (1:0.0) gets the DeviceID 0x5980 which is associated with the default SID as per the iommu mapping in DT. So MSIs were delivered with IDs swapped so far. But this also means the Root Port (0:0.0) won't receive any MSIs (for PME, AER etc...) So let's fix these issues by clearing the BDF to SID bypass mode for all SoCs making use of the 1_9_0 config. This allows the PCIe devices to use the correct SID, thus avoiding the DeviceID swapping hack in DT and also achieving the isolation between devices. Fixes: 4c9398822106 ("PCI: qcom: Add support for configuring BDF to SID mapping for SM8250") Link: https://lore.kernel.org/linux-pci/20240307-pci-bdf-sid-fix-v1-1-9423a7e2d63c@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Cc: stable@vger.kernel.org # 5.11 Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-qcom.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 9202d2395b507..0bad23ec53ee8 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -50,6 +50,7 @@ #define PARF_SLV_ADDR_SPACE_SIZE 0x358 #define PARF_DEVICE_TYPE 0x1000 #define PARF_BDF_TO_SID_TABLE_N 0x2000 +#define PARF_BDF_TO_SID_CFG 0x2c00 /* ELBI registers */ #define ELBI_SYS_CTRL 0x04 @@ -102,6 +103,9 @@ /* PARF_DEVICE_TYPE register fields */ #define DEVICE_TYPE_RC 0x4 +/* PARF_BDF_TO_SID_CFG fields */ +#define BDF_TO_SID_BYPASS BIT(0) + /* ELBI_SYS_CTRL register fields */ #define ELBI_SYS_CTRL_LT_ENABLE BIT(0) @@ -1326,11 +1330,17 @@ static int qcom_pcie_config_sid_1_9_0(struct qcom_pcie *pcie) u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE]; int i, nr_map, size = 0; u32 smmu_sid_base; + u32 val; of_get_property(dev->of_node, "iommu-map", &size); if (!size) return 0; + /* Enable BDF to SID translation by disabling bypass mode (default) */ + val = readl(pcie->parf + PARF_BDF_TO_SID_CFG); + val &= ~BDF_TO_SID_BYPASS; + writel(val, pcie->parf + PARF_BDF_TO_SID_CFG); + map = kzalloc(size, GFP_KERNEL); if (!map) return -ENOMEM; -- GitLab From 3d863cf207974343457c45b5c2d2d3baabd83e26 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 7 Mar 2024 12:15:20 +0100 Subject: [PATCH 1284/2290] PCI: dwc: endpoint: Fix advertised resizable BAR size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 72e34b8593e08a0ee759b7a038e0b178418ea6f8 ] The commit message in commit fc9a77040b04 ("PCI: designware-ep: Configure Resizable BAR cap to advertise the smallest size") claims that it modifies the Resizable BAR capability to only advertise support for 1 MB size BARs. However, the commit writes all zeroes to PCI_REBAR_CAP (the register which contains the possible BAR sizes that a BAR be resized to). According to the spec, it is illegal to not have a bit set in PCI_REBAR_CAP, and 1 MB is the smallest size allowed. Set bit 4 in PCI_REBAR_CAP, so that we actually advertise support for a 1 MB BAR size. Before: Capabilities: [2e8 v1] Physical Resizable BAR BAR 0: current size: 1MB BAR 1: current size: 1MB BAR 2: current size: 1MB BAR 3: current size: 1MB BAR 4: current size: 1MB BAR 5: current size: 1MB After: Capabilities: [2e8 v1] Physical Resizable BAR BAR 0: current size: 1MB, supported: 1MB BAR 1: current size: 1MB, supported: 1MB BAR 2: current size: 1MB, supported: 1MB BAR 3: current size: 1MB, supported: 1MB BAR 4: current size: 1MB, supported: 1MB BAR 5: current size: 1MB, supported: 1MB Fixes: fc9a77040b04 ("PCI: designware-ep: Configure Resizable BAR cap to advertise the smallest size") Link: https://lore.kernel.org/linux-pci/20240307111520.3303774-1-cassel@kernel.org Signed-off-by: Niklas Cassel Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam Cc: # 5.2 Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-designware-ep.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 4086a7818981a..506d6d061d4cd 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -669,8 +669,13 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; + /* + * PCIe r6.0, sec 7.8.6.2 require us to support at least one + * size in the range from 1 MB to 512 GB. Advertise support + * for 1 MB BAR size only. + */ for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL) - dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, BIT(4)); } dw_pcie_setup(pci); -- GitLab From 4732ac1c23b5a9c755db7d4a613a757f93f61808 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Fri, 16 Feb 2024 12:22:40 -0800 Subject: [PATCH 1285/2290] PCI: hv: Fix ring buffer size calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b5ff74c1ef50fe08e384026875fec660fadfaedd ] For a physical PCI device that is passed through to a Hyper-V guest VM, current code specifies the VMBus ring buffer size as 4 pages. But this is an inappropriate dependency, since the amount of ring buffer space needed is unrelated to PAGE_SIZE. For example, on x86 the ring buffer size ends up as 16 Kbytes, while on ARM64 with 64 Kbyte pages, the ring size bloats to 256 Kbytes. The ring buffer for PCI pass-thru devices is used for only a few messages during device setup and removal, so any space above a few Kbytes is wasted. Fix this by declaring the ring buffer size to be a fixed 16 Kbytes. Furthermore, use the VMBUS_RING_SIZE() macro so that the ring buffer header is properly accounted for, and so the size is rounded up to a page boundary, using the page size for which the kernel is built. While w/64 Kbyte pages this results in a 64 Kbyte ring buffer header plus a 64 Kbyte ring buffer, that's the smallest possible with that page size. It's still 128 Kbytes better than the current code. Link: https://lore.kernel.org/linux-pci/20240216202240.251818-1-mhklinux@outlook.com Signed-off-by: Michael Kelley Signed-off-by: Krzysztof Wilczyński Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Jarvinen Reviewed-by: Long Li Cc: # 5.15.x Signed-off-by: Sasha Levin --- drivers/pci/controller/pci-hyperv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9693bab59bf7c..b36cbc9136ae1 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -49,6 +49,7 @@ #include #include #include +#include #include /* @@ -465,7 +466,7 @@ struct pci_eject_response { u32 status; } __packed; -static int pci_ring_size = (4 * PAGE_SIZE); +static int pci_ring_size = VMBUS_RING_SIZE(SZ_16K); /* * Driver specific state. -- GitLab From 0f1e6cd8fb3c36ad0ea7f120c9c5445a1c23aa54 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 8 Jan 2023 17:44:24 +0200 Subject: [PATCH 1286/2290] vfio: Use GFP_KERNEL_ACCOUNT for userspace persistent allocations [ Upstream commit 0886196ca8810c5b1f5097b71c4bc0df40b10208 ] Use GFP_KERNEL_ACCOUNT for userspace persistent allocations. The GFP_KERNEL_ACCOUNT option lets the memory allocator know that this is untrusted allocation triggered from userspace and should be a subject of kmem accounting, and as such it is controlled by the cgroup mechanism. The way to find the relevant allocations was for example to look at the close_device function and trace back all the kfrees to their allocations. Signed-off-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230108154427.32609-4-yishaih@nvidia.com Signed-off-by: Alex Williamson Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ") Signed-off-by: Sasha Levin --- drivers/vfio/container.c | 2 +- drivers/vfio/pci/vfio_pci_config.c | 6 +++--- drivers/vfio/pci/vfio_pci_core.c | 7 ++++--- drivers/vfio/pci/vfio_pci_igd.c | 2 +- drivers/vfio/pci/vfio_pci_intrs.c | 10 ++++++---- drivers/vfio/pci/vfio_pci_rdwr.c | 2 +- drivers/vfio/virqfd.c | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index d74164abbf401..ab9d8e3481f75 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -366,7 +366,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) { struct vfio_container *container; - container = kzalloc(sizeof(*container), GFP_KERNEL); + container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT); if (!container) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 4a350421c5f62..523e0144c86fa 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1244,7 +1244,7 @@ static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos) if (vdev->msi_perm) return len; - vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL); + vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL_ACCOUNT); if (!vdev->msi_perm) return -ENOMEM; @@ -1731,11 +1731,11 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) * no requirements on the length of a capability, so the gap between * capabilities needs byte granularity. */ - map = kmalloc(pdev->cfg_size, GFP_KERNEL); + map = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT); if (!map) return -ENOMEM; - vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL); + vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT); if (!vconfig) { kfree(map); return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index e030c2120183e..f357fd157e1ed 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -141,7 +141,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) * of the exclusive page in case that hot-add * device's bar is assigned into it. */ - dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL); + dummy_res = + kzalloc(sizeof(*dummy_res), GFP_KERNEL_ACCOUNT); if (dummy_res == NULL) goto no_mmap; @@ -856,7 +857,7 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, region = krealloc(vdev->region, (vdev->num_regions + 1) * sizeof(*region), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!region) return -ENOMEM; @@ -1637,7 +1638,7 @@ static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev, { struct vfio_pci_mmap_vma *mmap_vma; - mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL); + mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT); if (!mmap_vma) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 5e6ca59269548..dd70e2431bd74 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -180,7 +180,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) if (!addr || !(~addr)) return -ENODEV; - opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL); + opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL_ACCOUNT); if (!opregionvbt) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 40c3d7cf163f6..bffb0741518b9 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -177,7 +177,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev) if (!vdev->pdev->irq) return -ENODEV; - vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; @@ -216,7 +216,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) if (fd < 0) /* Disable only */ return 0; - vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)", + vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); if (!vdev->ctx[0].name) return -ENOMEM; @@ -284,7 +284,8 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi if (!is_irq_none(vdev)) return -EINVAL; - vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), + GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; @@ -343,7 +344,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, if (fd < 0) return 0; - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", + vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT, + "vfio-msi%s[%d](%s)", msix ? "x" : "", vector, pci_name(pdev)); if (!vdev->ctx[vector].name) diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e352a033b4aef..e27de61ac9fe7 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -470,7 +470,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, goto out_unlock; } - ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL); + ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT); if (!ioeventfd) { ret = -ENOMEM; goto out_unlock; diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 414e98d82b02e..a928c68df4763 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -115,7 +115,7 @@ int vfio_virqfd_enable(void *opaque, int ret = 0; __poll_t events; - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); + virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT); if (!virqfd) return -ENOMEM; -- GitLab From dcc6b99c61ec6ae87bab2aaff2cd98f8c47c754a Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 11 May 2023 08:44:28 -0700 Subject: [PATCH 1287/2290] vfio/pci: Consolidate irq cleanup on MSI/MSI-X disable [ Upstream commit a65f35cfd504e5135540939cffd4323083190b36 ] vfio_msi_disable() releases all previously allocated state associated with each interrupt before disabling MSI/MSI-X. vfio_msi_disable() iterates twice over the interrupt state: first directly with a for loop to do virqfd cleanup, followed by another for loop within vfio_msi_set_block() that removes the interrupt handler and its associated state using vfio_msi_set_vector_signal(). Simplify interrupt cleanup by iterating over allocated interrupts once. Signed-off-by: Reinette Chatre Reviewed-by: Kevin Tian Acked-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/837acb8cbe86a258a50da05e56a1f17c1a19abbe.1683740667.git.reinette.chatre@intel.com Signed-off-by: Alex Williamson Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ") Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_intrs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index bffb0741518b9..6a9c6a143cc3a 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -426,10 +426,9 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) for (i = 0; i < vdev->num_ctx; i++) { vfio_virqfd_disable(&vdev->ctx[i].unmask); vfio_virqfd_disable(&vdev->ctx[i].mask); + vfio_msi_set_vector_signal(vdev, i, -1, msix); } - vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); - cmd = vfio_pci_memory_lock_and_enable(vdev); pci_free_irq_vectors(pdev); vfio_pci_memory_unlock_and_restore(vdev, cmd); -- GitLab From f56ba2d484341c0397df896070c55800252461cb Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 11 May 2023 08:44:29 -0700 Subject: [PATCH 1288/2290] vfio/pci: Remove negative check on unsigned vector [ Upstream commit 6578ed85c7d63693669bfede01e0237d0e24211a ] User space provides the vector as an unsigned int that is checked early for validity (vfio_set_irqs_validate_and_prepare()). A later negative check of the provided vector is not necessary. Remove the negative check and ensure the type used for the vector is consistent as an unsigned int. Signed-off-by: Reinette Chatre Reviewed-by: Kevin Tian Acked-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/28521e1b0b091849952b0ecb8c118729fc8cdc4f.1683740667.git.reinette.chatre@intel.com Signed-off-by: Alex Williamson Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ") Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_intrs.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 6a9c6a143cc3a..258de57ef9564 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -317,14 +317,14 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi } static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, - int vector, int fd, bool msix) + unsigned int vector, int fd, bool msix) { struct pci_dev *pdev = vdev->pdev; struct eventfd_ctx *trigger; int irq, ret; u16 cmd; - if (vector < 0 || vector >= vdev->num_ctx) + if (vector >= vdev->num_ctx) return -EINVAL; irq = pci_irq_vector(pdev, vector); @@ -399,7 +399,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, unsigned count, int32_t *fds, bool msix) { - int i, j, ret = 0; + unsigned int i, j; + int ret = 0; if (start >= vdev->num_ctx || start + count > vdev->num_ctx) return -EINVAL; @@ -410,8 +411,8 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, } if (ret) { - for (--j; j >= (int)start; j--) - vfio_msi_set_vector_signal(vdev, j, -1, msix); + for (i = start; i < j; i++) + vfio_msi_set_vector_signal(vdev, i, -1, msix); } return ret; @@ -420,7 +421,7 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) { struct pci_dev *pdev = vdev->pdev; - int i; + unsigned int i; u16 cmd; for (i = 0; i < vdev->num_ctx; i++) { @@ -542,7 +543,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - int i; + unsigned int i; bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false; if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) { -- GitLab From 3fe0ac10bd117df847c93408a9d428a453cd60e5 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 8 Mar 2024 16:05:23 -0700 Subject: [PATCH 1289/2290] vfio/pci: Lock external INTx masking ops [ Upstream commit 810cd4bb53456d0503cc4e7934e063835152c1b7 ] Mask operations through config space changes to DisINTx may race INTx configuration changes via ioctl. Create wrappers that add locking for paths outside of the core interrupt code. In particular, irq_type is updated holding igate, therefore testing is_intx() requires holding igate. For example clearing DisINTx from config space can otherwise race changes of the interrupt configuration. This aligns interfaces which may trigger the INTx eventfd into two camps, one side serialized by igate and the other only enabled while INTx is configured. A subsequent patch introduces synchronization for the latter flows. Cc: Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Reported-by: Reinette Chatre Reviewed-by: Kevin Tian Reviewed-by: Reinette Chatre Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-3-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_intrs.c | 34 +++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 258de57ef9564..8c8b04d858454 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -60,12 +60,14 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) } /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ -bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; unsigned long flags; bool masked_changed = false; + lockdep_assert_held(&vdev->igate); + spin_lock_irqsave(&vdev->irqlock, flags); /* @@ -95,6 +97,17 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) return masked_changed; } +bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +{ + bool mask_changed; + + mutex_lock(&vdev->igate); + mask_changed = __vfio_pci_intx_mask(vdev); + mutex_unlock(&vdev->igate); + + return mask_changed; +} + /* * If this is triggered by an eventfd, we can't call eventfd_signal * or else we'll deadlock on the eventfd wait queue. Return >0 when @@ -137,12 +150,21 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) return ret; } -void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) +static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) { + lockdep_assert_held(&vdev->igate); + if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0) vfio_send_intx_eventfd(vdev, NULL); } +void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) +{ + mutex_lock(&vdev->igate); + __vfio_pci_intx_unmask(vdev); + mutex_unlock(&vdev->igate); +} + static irqreturn_t vfio_intx_handler(int irq, void *dev_id) { struct vfio_pci_core_device *vdev = dev_id; @@ -457,11 +479,11 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t unmask = *(uint8_t *)data; if (unmask) - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) @@ -484,11 +506,11 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_pci_intx_mask(vdev); + __vfio_pci_intx_mask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t mask = *(uint8_t *)data; if (mask) - vfio_pci_intx_mask(vdev); + __vfio_pci_intx_mask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { return -ENOTTY; /* XXX implement me */ } -- GitLab From 4ee09d4099bbed3c7d211102a3a5e7410ce87fb3 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 8 Mar 2024 16:05:26 -0700 Subject: [PATCH 1290/2290] vfio/platform: Disable virqfds on cleanup [ Upstream commit fcdc0d3d40bc26c105acf8467f7d9018970944ae ] irqfds for mask and unmask that are not specifically disabled by the user are leaked. Remove any irqfds during cleanup Cc: Eric Auger Cc: Fixes: a7fa7c77cf15 ("vfio/platform: implement IRQ masking/unmasking via an eventfd") Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-6-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/platform/vfio_platform_irq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index c5b09ec0a3c98..f2893f2fcaabd 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -321,8 +321,11 @@ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) { int i; - for (i = 0; i < vdev->num_irqs; i++) + for (i = 0; i < vdev->num_irqs; i++) { + vfio_virqfd_disable(&vdev->irqs[i].mask); + vfio_virqfd_disable(&vdev->irqs[i].unmask); vfio_set_trigger(vdev, i, -1, NULL); + } vdev->num_irqs = 0; kfree(vdev->irqs); -- GitLab From 2952d0db18cfcbca0ddeffd69eea9a3a5f17de18 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Thu, 22 Feb 2024 10:58:21 +0100 Subject: [PATCH 1291/2290] ksmbd: retrieve number of blocks using vfs_getattr in set_file_allocation_info [ Upstream commit 34cd86b6632718b7df3999d96f51e63de41c5e4f ] Use vfs_getattr() to retrieve stat information, rather than make assumptions about how a filesystem fills inode structs. Cc: stable@vger.kernel.org Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/smb2pdu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 66d25d0e34d8b..39fc078284c8e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5757,15 +5757,21 @@ static int set_file_allocation_info(struct ksmbd_work *work, loff_t alloc_blks; struct inode *inode; + struct kstat stat; int rc; if (!(fp->daccess & FILE_WRITE_DATA_LE)) return -EACCES; + rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); + if (rc) + return rc; + alloc_blks = (le64_to_cpu(file_alloc_info->AllocationSize) + 511) >> 9; inode = file_inode(fp->filp); - if (alloc_blks > inode->i_blocks) { + if (alloc_blks > stat.blocks) { smb_break_all_levII_oplock(work, fp, 1); rc = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0, alloc_blks * 512); @@ -5773,7 +5779,7 @@ static int set_file_allocation_info(struct ksmbd_work *work, pr_err("vfs_fallocate is failed : %d\n", rc); return rc; } - } else if (alloc_blks < inode->i_blocks) { + } else if (alloc_blks < stat.blocks) { loff_t size; /* -- GitLab From 6615ef6e3166a7ac4b23f87bc4d2f42e9dce2d3e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Mar 2024 15:24:03 -0500 Subject: [PATCH 1292/2290] ring-buffer: Fix waking up ring buffer readers [ Upstream commit b3594573681b53316ec0365332681a30463edfd6 ] A task can wait on a ring buffer for when it fills up to a specific watermark. The writer will check the minimum watermark that waiters are waiting for and if the ring buffer is past that, it will wake up all the waiters. The waiters are in a wait loop, and will first check if a signal is pending and then check if the ring buffer is at the desired level where it should break out of the loop. If a file that uses a ring buffer closes, and there's threads waiting on the ring buffer, it needs to wake up those threads. To do this, a "wait_index" was used. Before entering the wait loop, the waiter will read the wait_index. On wakeup, it will check if the wait_index is different than when it entered the loop, and will exit the loop if it is. The waker will only need to update the wait_index before waking up the waiters. This had a couple of bugs. One trivial one and one broken by design. The trivial bug was that the waiter checked the wait_index after the schedule() call. It had to be checked between the prepare_to_wait() and the schedule() which it was not. The main bug is that the first check to set the default wait_index will always be outside the prepare_to_wait() and the schedule(). That's because the ring_buffer_wait() doesn't have enough context to know if it should break out of the loop. The loop itself is not needed, because all the callers to the ring_buffer_wait() also has their own loop, as the callers have a better sense of what the context is to decide whether to break out of the loop or not. Just have the ring_buffer_wait() block once, and if it gets woken up, exit the function and let the callers decide what to do next. Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNSRZfg@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240308202431.792933613@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Linus Torvalds Cc: linke li Cc: Rabin Vincent Fixes: e30f53aad2202 ("tracing: Do not busy wait in buffer splice") Signed-off-by: Steven Rostedt (Google) Stable-dep-of: 761d9473e27f ("ring-buffer: Do not set shortest_full when full target is hit") Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 139 ++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 71 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e019a9278794f..3c4d62f499505 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -414,7 +414,6 @@ struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; wait_queue_head_t full_waiters; - long wait_index; bool waiters_pending; bool full_waiters_pending; bool wakeup_full; @@ -949,14 +948,40 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) rbwork = &cpu_buffer->irq_work; } - rbwork->wait_index++; - /* make sure the waiters see the new index */ - smp_wmb(); - /* This can be called in any context */ irq_work_queue(&rbwork->work); } +static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer; + bool ret = false; + + /* Reads of all CPUs always waits for any data */ + if (cpu == RING_BUFFER_ALL_CPUS) + return !ring_buffer_empty(buffer); + + cpu_buffer = buffer->buffers[cpu]; + + if (!ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; + + if (!full) + return true; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + ret = !pagebusy && full_hit(buffer, cpu, full); + + if (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full) + cpu_buffer->shortest_full = full; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + } + return ret; +} + /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on @@ -972,7 +997,6 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); struct rb_irq_work *work; - long wait_index; int ret = 0; /* @@ -991,81 +1015,54 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) work = &cpu_buffer->irq_work; } - wait_index = READ_ONCE(work->wait_index); - - while (true) { - if (full) - prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); - else - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - - /* - * The events can happen in critical sections where - * checking a work queue can cause deadlocks. - * After adding a task to the queue, this flag is set - * only to notify events to try to wake up the queue - * using irq_work. - * - * We don't clear it even if the buffer is no longer - * empty. The flag only causes the next event to run - * irq_work to do the work queue wake up. The worse - * that can happen if we race with !trace_empty() is that - * an event will cause an irq_work to try to wake up - * an empty queue. - * - * There's no reason to protect this flag either, as - * the work queue and irq_work logic will do the necessary - * synchronization for the wake ups. The only thing - * that is necessary is that the wake up happens after - * a task has been queued. It's OK for spurious wake ups. - */ - if (full) - work->full_waiters_pending = true; - else - work->waiters_pending = true; - - if (signal_pending(current)) { - ret = -EINTR; - break; - } - - if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) - break; - - if (cpu != RING_BUFFER_ALL_CPUS && - !ring_buffer_empty_cpu(buffer, cpu)) { - unsigned long flags; - bool pagebusy; - bool done; - - if (!full) - break; - - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - done = !pagebusy && full_hit(buffer, cpu, full); + if (full) + prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); + else + prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full > full) - cpu_buffer->shortest_full = full; - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (done) - break; - } + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + if (full) + work->full_waiters_pending = true; + else + work->waiters_pending = true; - schedule(); + if (rb_watermark_hit(buffer, cpu, full)) + goto out; - /* Make sure to see the new wait index */ - smp_rmb(); - if (wait_index != work->wait_index) - break; + if (signal_pending(current)) { + ret = -EINTR; + goto out; } + schedule(); + out: if (full) finish_wait(&work->full_waiters, &wait); else finish_wait(&work->waiters, &wait); + if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current)) + ret = -EINTR; + return ret; } -- GitLab From 1df7dcfb8af3a89cca5f2068fa889c823925cc51 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Mar 2024 11:56:41 -0400 Subject: [PATCH 1293/2290] ring-buffer: Do not set shortest_full when full target is hit [ Upstream commit 761d9473e27f0c8782895013a3e7b52a37c8bcfc ] The rb_watermark_hit() checks if the amount of data in the ring buffer is above the percentage level passed in by the "full" variable. If it is, it returns true. But it also sets the "shortest_full" field of the cpu_buffer that informs writers that it needs to call the irq_work if the amount of data on the ring buffer is above the requested amount. The rb_watermark_hit() always sets the shortest_full even if the amount in the ring buffer is what it wants. As it is not going to wait, because it has what it wants, there's no reason to set shortest_full. Link: https://lore.kernel.org/linux-trace-kernel/20240312115641.6aa8ba08@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Fixes: 42fb0a1e84ff5 ("tracing/ring-buffer: Have polling block on watermark") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3c4d62f499505..c934839f625df 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -974,9 +974,10 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; ret = !pagebusy && full_hit(buffer, cpu, full); - if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full > full) - cpu_buffer->shortest_full = full; + if (!ret && (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full)) { + cpu_buffer->shortest_full = full; + } raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } return ret; -- GitLab From 6c2f0e055085ce6d84ae8f3e9dd3de519d96d5df Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Mar 2024 15:24:04 -0500 Subject: [PATCH 1294/2290] ring-buffer: Fix resetting of shortest_full [ Upstream commit 68282dd930ea38b068ce2c109d12405f40df3f93 ] The "shortest_full" variable is used to keep track of the waiter that is waiting for the smallest amount on the ring buffer before being woken up. When a tasks waits on the ring buffer, it passes in a "full" value that is a percentage. 0 means wake up on any data. 1-100 means wake up from 1% to 100% full buffer. As all waiters are on the same wait queue, the wake up happens for the waiter with the smallest percentage. The problem is that the smallest_full on the cpu_buffer that stores the smallest amount doesn't get reset when all the waiters are woken up. It does get reset when the ring buffer is reset (echo > /sys/kernel/tracing/trace). This means that tasks may be woken up more often then when they want to be. Instead, have the shortest_full field get reset just before waking up all the tasks. If the tasks wait again, they will update the shortest_full before sleeping. Also add locking around setting of shortest_full in the poll logic, and change "work" to "rbwork" to match the variable name for rb_irq_work structures that are used in other places. Link: https://lore.kernel.org/linux-trace-kernel/20240308202431.948914369@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Linus Torvalds Cc: linke li Cc: Rabin Vincent Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader") Signed-off-by: Steven Rostedt (Google) Stable-dep-of: 8145f1c35fa6 ("ring-buffer: Fix full_waiters_pending in poll") Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c934839f625df..3b50e17e2c9ab 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -907,8 +907,19 @@ static void rb_wake_up_waiters(struct irq_work *work) wake_up_all(&rbwork->waiters); if (rbwork->full_waiters_pending || rbwork->wakeup_full) { + /* Only cpu_buffer sets the above flags */ + struct ring_buffer_per_cpu *cpu_buffer = + container_of(rbwork, struct ring_buffer_per_cpu, irq_work); + + /* Called from interrupt context */ + raw_spin_lock(&cpu_buffer->reader_lock); rbwork->wakeup_full = false; rbwork->full_waiters_pending = false; + + /* Waking up all waiters, they will reset the shortest full */ + cpu_buffer->shortest_full = 0; + raw_spin_unlock(&cpu_buffer->reader_lock); + wake_up_all(&rbwork->full_waiters); } } @@ -1086,28 +1097,33 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; - struct rb_irq_work *work; + struct rb_irq_work *rbwork; if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; + rbwork = &buffer->irq_work; full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; + rbwork = &cpu_buffer->irq_work; } if (full) { - poll_wait(filp, &work->full_waiters, poll_table); - work->full_waiters_pending = true; + unsigned long flags; + + poll_wait(filp, &rbwork->full_waiters, poll_table); + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + rbwork->full_waiters_pending = true; if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } else { - poll_wait(filp, &work->waiters, poll_table); - work->waiters_pending = true; + poll_wait(filp, &rbwork->waiters, poll_table); + rbwork->waiters_pending = true; } /* -- GitLab From 6e0f7e6fb1c40c4df421ece7fabdbbd15cb5c326 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Mar 2024 09:19:20 -0400 Subject: [PATCH 1295/2290] ring-buffer: Fix full_waiters_pending in poll [ Upstream commit 8145f1c35fa648da662078efab299c4467b85ad5 ] If a reader of the ring buffer is doing a poll, and waiting for the ring buffer to hit a specific watermark, there could be a case where it gets into an infinite ping-pong loop. The poll code has: rbwork->full_waiters_pending = true; if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; The writer will see full_waiters_pending and check if the ring buffer is filled over the percentage of the shortest_full value. If it is, it calls an irq_work to wake up all the waiters. But the code could get into a circular loop: CPU 0 CPU 1 ----- ----- [ Poll ] [ shortest_full = 0 ] rbwork->full_waiters_pending = true; if (rbwork->full_waiters_pending && [ buffer percent ] > shortest_full) { rbwork->wakeup_full = true; [ queue_irqwork ] cpu_buffer->shortest_full = full; [ IRQ work ] if (rbwork->wakeup_full) { cpu_buffer->shortest_full = 0; wakeup poll waiters; [woken] if ([ buffer percent ] > full) break; rbwork->full_waiters_pending = true; if (rbwork->full_waiters_pending && [ buffer percent ] > shortest_full) { rbwork->wakeup_full = true; [ queue_irqwork ] cpu_buffer->shortest_full = full; [ IRQ work ] if (rbwork->wakeup_full) { cpu_buffer->shortest_full = 0; wakeup poll waiters; [woken] [ Wash, rinse, repeat! ] In the poll, the shortest_full needs to be set before the full_pending_waiters, as once that is set, the writer will compare the current shortest_full (which is incorrect) to decide to call the irq_work, which will reset the shortest_full (expecting the readers to update it). Also move the setting of full_waiters_pending after the check if the ring buffer has the required percentage filled. There's no reason to tell the writer to wake up waiters if there are no waiters. Link: https://lore.kernel.org/linux-trace-kernel/20240312131952.630922155@goodmis.org Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 42fb0a1e84ff5 ("tracing/ring-buffer: Have polling block on watermark") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3b50e17e2c9ab..e07f45d1890d3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1116,16 +1116,32 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, poll_wait(filp, &rbwork->full_waiters, poll_table); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - rbwork->full_waiters_pending = true; if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - } else { - poll_wait(filp, &rbwork->waiters, poll_table); - rbwork->waiters_pending = true; + if (full_hit(buffer, cpu, full)) + return EPOLLIN | EPOLLRDNORM; + /* + * Only allow full_waiters_pending update to be seen after + * the shortest_full is set. If the writer sees the + * full_waiters_pending flag set, it will compare the + * amount in the ring buffer to shortest_full. If the amount + * in the ring buffer is greater than the shortest_full + * percent, it will call the irq_work handler to wake up + * this list. The irq_handler will reset shortest_full + * back to zero. That's done under the reader_lock, but + * the below smp_mb() makes sure that the update to + * full_waiters_pending doesn't leak up into the above. + */ + smp_mb(); + rbwork->full_waiters_pending = true; + return 0; } + poll_wait(filp, &rbwork->waiters, poll_table); + rbwork->waiters_pending = true; + /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit @@ -1141,9 +1157,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, */ smp_mb(); - if (full) - return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; -- GitLab From b1cf18e5bd871498be689c8c471b66e5e0d08655 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Mar 2024 08:15:07 -0400 Subject: [PATCH 1296/2290] ring-buffer: Use wait_event_interruptible() in ring_buffer_wait() [ Upstream commit 7af9ded0c2caac0a95f33df5cb04706b0f502588 ] Convert ring_buffer_wait() over to wait_event_interruptible(). The default condition is to execute the wait loop inside __wait_event() just once. This does not change the ring_buffer_wait() prototype yet, but restructures the code so that it can take a "cond" and "data" parameter and will call wait_event_interruptible() with a helper function as the condition. The helper function (rb_wait_cond) takes the cond function and data parameters. It will first check if the buffer hit the watermark defined by the "full" parameter and then call the passed in condition parameter. If either are true, it returns true. If rb_wait_cond() does not return true, it will set the appropriate "waiters_pending" flag and returns false. Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wgsNgewHFxZAJiAQznwPMqEtQmi1waeS2O1v6L4c_Um5A@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240312121703.399598519@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Linus Torvalds Cc: linke li Cc: Rabin Vincent Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 116 +++++++++++++++++++++--------------- 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3c7d295746f67..3e7bfc0f65aee 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -98,6 +98,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +typedef bool (*ring_buffer_cond_fn)(void *data); int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e07f45d1890d3..431a922e5c89e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -994,43 +994,15 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) return ret; } -/** - * ring_buffer_wait - wait for input to the ring buffer - * @buffer: buffer to wait on - * @cpu: the cpu buffer to wait on - * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS - * - * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon - * as data is added to any of the @buffer's cpu buffers. Otherwise - * it will wait for data to be added to a specific cpu buffer. - */ -int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) +static inline bool +rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, + int cpu, int full, ring_buffer_cond_fn cond, void *data) { - struct ring_buffer_per_cpu *cpu_buffer; - DEFINE_WAIT(wait); - struct rb_irq_work *work; - int ret = 0; - - /* - * Depending on what the caller is waiting for, either any - * data in any cpu buffer, or a specific buffer, put the - * caller on the appropriate wait queue. - */ - if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; - /* Full only makes sense on per cpu reads */ - full = 0; - } else { - if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -ENODEV; - cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; - } + if (rb_watermark_hit(buffer, cpu, full)) + return true; - if (full) - prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); - else - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); + if (cond(data)) + return true; /* * The events can happen in critical sections where @@ -1053,27 +1025,75 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) * a task has been queued. It's OK for spurious wake ups. */ if (full) - work->full_waiters_pending = true; + rbwork->full_waiters_pending = true; else - work->waiters_pending = true; + rbwork->waiters_pending = true; - if (rb_watermark_hit(buffer, cpu, full)) - goto out; + return false; +} - if (signal_pending(current)) { - ret = -EINTR; - goto out; +/* + * The default wait condition for ring_buffer_wait() is to just to exit the + * wait loop the first time it is woken up. + */ +static bool rb_wait_once(void *data) +{ + long *once = data; + + /* wait_event() actually calls this twice before scheduling*/ + if (*once > 1) + return true; + + (*once)++; + return false; +} + +/** + * ring_buffer_wait - wait for input to the ring buffer + * @buffer: buffer to wait on + * @cpu: the cpu buffer to wait on + * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS + * + * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon + * as data is added to any of the @buffer's cpu buffers. Otherwise + * it will wait for data to be added to a specific cpu buffer. + */ +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer; + struct wait_queue_head *waitq; + ring_buffer_cond_fn cond; + struct rb_irq_work *rbwork; + void *data; + long once = 0; + int ret = 0; + + cond = rb_wait_once; + data = &once; + + /* + * Depending on what the caller is waiting for, either any + * data in any cpu buffer, or a specific buffer, put the + * caller on the appropriate wait queue. + */ + if (cpu == RING_BUFFER_ALL_CPUS) { + rbwork = &buffer->irq_work; + /* Full only makes sense on per cpu reads */ + full = 0; + } else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -ENODEV; + cpu_buffer = buffer->buffers[cpu]; + rbwork = &cpu_buffer->irq_work; } - schedule(); - out: if (full) - finish_wait(&work->full_waiters, &wait); + waitq = &rbwork->full_waiters; else - finish_wait(&work->waiters, &wait); + waitq = &rbwork->waiters; - if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current)) - ret = -EINTR; + ret = wait_event_interruptible((*waitq), + rb_wait_cond(rbwork, buffer, cpu, full, cond, data)); return ret; } -- GitLab From a62168653774c36398d65846a98034436ee66d03 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 11 Mar 2024 12:38:29 -0400 Subject: [PATCH 1297/2290] soc: fsl: qbman: Always disable interrupts when taking cgr_lock [ Upstream commit 584c2a9184a33a40fceee838f856de3cffa19be3 ] smp_call_function_single disables IRQs when executing the callback. To prevent deadlocks, we must disable IRQs when taking cgr_lock elsewhere. This is already done by qman_update_cgr and qman_delete_cgr; fix the other lockers. Fixes: 96f413f47677 ("soc/fsl/qbman: fix issue in qman_delete_cgr_safe()") CC: stable@vger.kernel.org Signed-off-by: Sean Anderson Reviewed-by: Camelia Groza Tested-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/soc/fsl/qbman/qman.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 739e4eee6b75c..1bf1f1ea67f00 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1456,11 +1456,11 @@ static void qm_congestion_task(struct work_struct *work) union qm_mc_result *mcr; struct qman_cgr *cgr; - spin_lock(&p->cgr_lock); + spin_lock_irq(&p->cgr_lock); qm_mc_start(&p->p); qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); if (!qm_mc_result_timeout(&p->p, &mcr)) { - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); qman_p_irqsource_add(p, QM_PIRQ_CSCI); return; @@ -1476,7 +1476,7 @@ static void qm_congestion_task(struct work_struct *work) list_for_each_entry(cgr, &p->cgr_cbs, node) if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); qman_p_irqsource_add(p, QM_PIRQ_CSCI); } @@ -2440,7 +2440,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, preempt_enable(); cgr->chan = p->config->channel; - spin_lock(&p->cgr_lock); + spin_lock_irq(&p->cgr_lock); if (opts) { struct qm_mcc_initcgr local_opts = *opts; @@ -2477,7 +2477,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, qman_cgrs_get(&p->cgrs[1], cgr->cgrid)) cgr->cb(p, cgr, 1); out: - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); put_affine_portal(); return ret; } -- GitLab From d6b5aac451c9cc12e43ab7308e0e2ddc52c62c14 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 11 Mar 2024 12:38:30 -0400 Subject: [PATCH 1298/2290] soc: fsl: qbman: Use raw spinlock for cgr_lock [ Upstream commit fbec4e7fed89b579f2483041fabf9650fb0dd6bc ] smp_call_function always runs its callback in hard IRQ context, even on PREEMPT_RT, where spinlocks can sleep. So we need to use a raw spinlock for cgr_lock to ensure we aren't waiting on a sleeping task. Although this bug has existed for a while, it was not apparent until commit ef2a8d5478b9 ("net: dpaa: Adjust queue depth on rate change") which invokes smp_call_function_single via qman_update_cgr_safe every time a link goes up or down. Fixes: 96f413f47677 ("soc/fsl/qbman: fix issue in qman_delete_cgr_safe()") CC: stable@vger.kernel.org Reported-by: Vladimir Oltean Closes: https://lore.kernel.org/all/20230323153935.nofnjucqjqnz34ej@skbuf/ Reported-by: Steffen Trumtrar Closes: https://lore.kernel.org/linux-arm-kernel/87wmsyvclu.fsf@pengutronix.de/ Signed-off-by: Sean Anderson Reviewed-by: Camelia Groza Tested-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/soc/fsl/qbman/qman.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 1bf1f1ea67f00..7e9074519ad22 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -991,7 +991,7 @@ struct qman_portal { /* linked-list of CSCN handlers. */ struct list_head cgr_cbs; /* list lock */ - spinlock_t cgr_lock; + raw_spinlock_t cgr_lock; struct work_struct congestion_work; struct work_struct mr_work; char irqname[MAX_IRQNAME]; @@ -1281,7 +1281,7 @@ static int qman_create_portal(struct qman_portal *portal, /* if the given mask is NULL, assume all CGRs can be seen */ qman_cgrs_fill(&portal->cgrs[0]); INIT_LIST_HEAD(&portal->cgr_cbs); - spin_lock_init(&portal->cgr_lock); + raw_spin_lock_init(&portal->cgr_lock); INIT_WORK(&portal->congestion_work, qm_congestion_task); INIT_WORK(&portal->mr_work, qm_mr_process_task); portal->bits = 0; @@ -1456,11 +1456,14 @@ static void qm_congestion_task(struct work_struct *work) union qm_mc_result *mcr; struct qman_cgr *cgr; - spin_lock_irq(&p->cgr_lock); + /* + * FIXME: QM_MCR_TIMEOUT is 10ms, which is too long for a raw spinlock! + */ + raw_spin_lock_irq(&p->cgr_lock); qm_mc_start(&p->p); qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); if (!qm_mc_result_timeout(&p->p, &mcr)) { - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); qman_p_irqsource_add(p, QM_PIRQ_CSCI); return; @@ -1476,7 +1479,7 @@ static void qm_congestion_task(struct work_struct *work) list_for_each_entry(cgr, &p->cgr_cbs, node) if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); qman_p_irqsource_add(p, QM_PIRQ_CSCI); } @@ -2440,7 +2443,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, preempt_enable(); cgr->chan = p->config->channel; - spin_lock_irq(&p->cgr_lock); + raw_spin_lock_irq(&p->cgr_lock); if (opts) { struct qm_mcc_initcgr local_opts = *opts; @@ -2477,7 +2480,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, qman_cgrs_get(&p->cgrs[1], cgr->cgrid)) cgr->cb(p, cgr, 1); out: - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); put_affine_portal(); return ret; } @@ -2512,7 +2515,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) return -EINVAL; memset(&local_opts, 0, sizeof(struct qm_mcc_initcgr)); - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); list_del(&cgr->node); /* * If there are no other CGR objects for this CGRID in the list, @@ -2537,7 +2540,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) /* add back to the list */ list_add(&cgr->node, &p->cgr_cbs); release_lock: - spin_unlock_irqrestore(&p->cgr_lock, irqflags); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; } @@ -2577,9 +2580,9 @@ static int qman_update_cgr(struct qman_cgr *cgr, struct qm_mcc_initcgr *opts) if (!p) return -EINVAL; - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); ret = qm_modify_cgr(cgr, 0, opts); - spin_unlock_irqrestore(&p->cgr_lock, irqflags); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; } -- GitLab From b7f6c3630eb3f103115ab0d7613588064f665d0d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 29 Feb 2024 15:20:09 +0100 Subject: [PATCH 1299/2290] s390/zcrypt: fix reference counting on zcrypt card objects [ Upstream commit 50ed48c80fecbe17218afed4f8bed005c802976c ] Tests with hot-plugging crytpo cards on KVM guests with debug kernel build revealed an use after free for the load field of the struct zcrypt_card. The reason was an incorrect reference handling of the zcrypt card object which could lead to a free of the zcrypt card object while it was still in use. This is an example of the slab message: kernel: 0x00000000885a7512-0x00000000885a7513 @offset=1298. First byte 0x68 instead of 0x6b kernel: Allocated in zcrypt_card_alloc+0x36/0x70 [zcrypt] age=18046 cpu=3 pid=43 kernel: kmalloc_trace+0x3f2/0x470 kernel: zcrypt_card_alloc+0x36/0x70 [zcrypt] kernel: zcrypt_cex4_card_probe+0x26/0x380 [zcrypt_cex4] kernel: ap_device_probe+0x15c/0x290 kernel: really_probe+0xd2/0x468 kernel: driver_probe_device+0x40/0xf0 kernel: __device_attach_driver+0xc0/0x140 kernel: bus_for_each_drv+0x8c/0xd0 kernel: __device_attach+0x114/0x198 kernel: bus_probe_device+0xb4/0xc8 kernel: device_add+0x4d2/0x6e0 kernel: ap_scan_adapter+0x3d0/0x7c0 kernel: ap_scan_bus+0x5a/0x3b0 kernel: ap_scan_bus_wq_callback+0x40/0x60 kernel: process_one_work+0x26e/0x620 kernel: worker_thread+0x21c/0x440 kernel: Freed in zcrypt_card_put+0x54/0x80 [zcrypt] age=9024 cpu=3 pid=43 kernel: kfree+0x37e/0x418 kernel: zcrypt_card_put+0x54/0x80 [zcrypt] kernel: ap_device_remove+0x4c/0xe0 kernel: device_release_driver_internal+0x1c4/0x270 kernel: bus_remove_device+0x100/0x188 kernel: device_del+0x164/0x3c0 kernel: device_unregister+0x30/0x90 kernel: ap_scan_adapter+0xc8/0x7c0 kernel: ap_scan_bus+0x5a/0x3b0 kernel: ap_scan_bus_wq_callback+0x40/0x60 kernel: process_one_work+0x26e/0x620 kernel: worker_thread+0x21c/0x440 kernel: kthread+0x150/0x168 kernel: __ret_from_fork+0x3c/0x58 kernel: ret_from_fork+0xa/0x30 kernel: Slab 0x00000372022169c0 objects=20 used=18 fp=0x00000000885a7c88 flags=0x3ffff00000000a00(workingset|slab|node=0|zone=1|lastcpupid=0x1ffff) kernel: Object 0x00000000885a74b8 @offset=1208 fp=0x00000000885a7c88 kernel: Redzone 00000000885a74b0: bb bb bb bb bb bb bb bb ........ kernel: Object 00000000885a74b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a74c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a74d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a74e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a74f8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk kernel: Object 00000000885a7508: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 68 4b 6b 6b 6b a5 kkkkkkkkkkhKkkk. kernel: Redzone 00000000885a7518: bb bb bb bb bb bb bb bb ........ kernel: Padding 00000000885a756c: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZ kernel: CPU: 0 PID: 387 Comm: systemd-udevd Not tainted 6.8.0-HF #2 kernel: Hardware name: IBM 3931 A01 704 (KVM/Linux) kernel: Call Trace: kernel: [<00000000ca5ab5b8>] dump_stack_lvl+0x90/0x120 kernel: [<00000000c99d78bc>] check_bytes_and_report+0x114/0x140 kernel: [<00000000c99d53cc>] check_object+0x334/0x3f8 kernel: [<00000000c99d820c>] alloc_debug_processing+0xc4/0x1f8 kernel: [<00000000c99d852e>] get_partial_node.part.0+0x1ee/0x3e0 kernel: [<00000000c99d94ec>] ___slab_alloc+0xaf4/0x13c8 kernel: [<00000000c99d9e38>] __slab_alloc.constprop.0+0x78/0xb8 kernel: [<00000000c99dc8dc>] __kmalloc+0x434/0x590 kernel: [<00000000c9b4c0ce>] ext4_htree_store_dirent+0x4e/0x1c0 kernel: [<00000000c9b908a2>] htree_dirblock_to_tree+0x17a/0x3f0 kernel: [<00000000c9b919dc>] ext4_htree_fill_tree+0x134/0x400 kernel: [<00000000c9b4b3d0>] ext4_dx_readdir+0x160/0x2f0 kernel: [<00000000c9b4bedc>] ext4_readdir+0x5f4/0x760 kernel: [<00000000c9a7efc4>] iterate_dir+0xb4/0x280 kernel: [<00000000c9a7f1ea>] __do_sys_getdents64+0x5a/0x120 kernel: [<00000000ca5d6946>] __do_syscall+0x256/0x310 kernel: [<00000000ca5eea10>] system_call+0x70/0x98 kernel: INFO: lockdep is turned off. kernel: FIX kmalloc-96: Restoring Poison 0x00000000885a7512-0x00000000885a7513=0x6b kernel: FIX kmalloc-96: Marking all objects used The fix is simple: Before use of the queue not only the queue object but also the card object needs to increase it's reference count with a call to zcrypt_card_get(). Similar after use of the queue not only the queue but also the card object's reference count is decreased with zcrypt_card_put(). Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- drivers/s390/crypto/zcrypt_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 28e34d155334b..6f44963d34bbf 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -617,6 +617,7 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc, { if (!zq || !try_module_get(zq->queue->ap_dev.device.driver->owner)) return NULL; + zcrypt_card_get(zc); zcrypt_queue_get(zq); get_device(&zq->queue->ap_dev.device); atomic_add(weight, &zc->load); @@ -636,6 +637,7 @@ static inline void zcrypt_drop_queue(struct zcrypt_card *zc, atomic_sub(weight, &zq->load); put_device(&zq->queue->ap_dev.device); zcrypt_queue_put(zq); + zcrypt_card_put(zc); module_put(mod); } -- GitLab From cb676955d1acb065b06d92219d1b1fb8231823fe Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:39 +0200 Subject: [PATCH 1300/2290] drm/probe-helper: warn about negative .get_modes() [ Upstream commit 7af03e688792293ba33149fb8df619a8dff90e80 ] The .get_modes() callback is supposed to return the number of modes, never a negative error code. If a negative value is returned, it'll just be interpreted as a negative count, and added to previous calculations. Document the rules, but handle the negative values gracefully with an error message. Cc: stable@vger.kernel.org Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/50208c866facc33226a3c77b82bb96aeef8ef310.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_probe_helper.c | 7 +++++++ include/drm/drm_modeset_helper_vtables.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 3b968ad187cf3..52dbaf74fe164 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -362,6 +362,13 @@ static int drm_helper_probe_get_modes(struct drm_connector *connector) count = connector_funcs->get_modes(connector); + /* The .get_modes() callback should not return negative values. */ + if (count < 0) { + drm_err(connector->dev, ".get_modes() returned %pe\n", + ERR_PTR(count)); + count = 0; + } + /* * Fallback for when DDC probe failed in drm_get_edid() and thus skipped * override/firmware EDID. diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index fafa70ac1337f..6f19cf5c210e5 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -896,7 +896,8 @@ struct drm_connector_helper_funcs { * * RETURNS: * - * The number of modes added by calling drm_mode_probed_add(). + * The number of modes added by calling drm_mode_probed_add(). Return 0 + * on failures (no modes) instead of negative error codes. */ int (*get_modes)(struct drm_connector *connector); -- GitLab From 191df89d8fb0a6b5b4165912ff6b6c7a8138323c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:40 +0200 Subject: [PATCH 1301/2290] drm/panel: do not return negative error codes from drm_panel_get_modes() [ Upstream commit fc4e97726530241d96dd7db72eb65979217422c9 ] None of the callers of drm_panel_get_modes() expect it to return negative error codes. Either they propagate the return value in their struct drm_connector_helper_funcs .get_modes() hook (which is also not supposed to return negative codes), or add it to other counts leading to bogus values. On the other hand, many of the struct drm_panel_funcs .get_modes() hooks do return negative error codes, so handle them gracefully instead of propagating further. Return 0 for no modes, whatever the reason. Cc: Neil Armstrong Cc: Jessica Zhang Cc: Sam Ravnborg Cc: stable@vger.kernel.org Reviewed-by: Neil Armstrong Reviewed-by: Jessica Zhang Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/79f559b72d8c493940417304e222a4b04dfa19c4.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index f634371c717a8..7fd3de89ed079 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -207,19 +207,24 @@ EXPORT_SYMBOL(drm_panel_disable); * The modes probed from the panel are automatically added to the connector * that the panel is attached to. * - * Return: The number of modes available from the panel on success or a - * negative error code on failure. + * Return: The number of modes available from the panel on success, or 0 on + * failure (no modes). */ int drm_panel_get_modes(struct drm_panel *panel, struct drm_connector *connector) { if (!panel) - return -EINVAL; + return 0; - if (panel->funcs && panel->funcs->get_modes) - return panel->funcs->get_modes(panel, connector); + if (panel->funcs && panel->funcs->get_modes) { + int num; - return -EOPNOTSUPP; + num = panel->funcs->get_modes(panel, connector); + if (num > 0) + return num; + } + + return 0; } EXPORT_SYMBOL(drm_panel_get_modes); -- GitLab From 8f914db6fe252c5e78a9b8b03adc1b0a33aec25d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:41 +0200 Subject: [PATCH 1302/2290] drm/exynos: do not return negative values from .get_modes() [ Upstream commit 13d5b040363c7ec0ac29c2de9cf661a24a8aa531 ] The .get_modes() hooks aren't supposed to return negative error codes. Return 0 for no modes, whatever the reason. Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: stable@vger.kernel.org Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/d8665f620d9c252aa7d5a4811ff6b16e773903a2.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 4 ++-- drivers/gpu/drm/exynos/exynos_hdmi.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index f5e1adfcaa514..fb941a8c99f0f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -316,14 +316,14 @@ static int vidi_get_modes(struct drm_connector *connector) */ if (!ctx->raw_edid) { DRM_DEV_DEBUG_KMS(ctx->dev, "raw_edid is null.\n"); - return -EFAULT; + return 0; } edid_len = (1 + ctx->raw_edid->extensions) * EDID_LENGTH; edid = kmemdup(ctx->raw_edid, edid_len, GFP_KERNEL); if (!edid) { DRM_DEV_DEBUG_KMS(ctx->dev, "failed to allocate edid\n"); - return -ENOMEM; + return 0; } drm_connector_update_edid_property(connector, edid); diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 1a7194a653ae5..be2d9cbaaef2e 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -887,11 +887,11 @@ static int hdmi_get_modes(struct drm_connector *connector) int ret; if (!hdata->ddc_adpt) - return -ENODEV; + return 0; edid = drm_get_edid(connector, hdata->ddc_adpt); if (!edid) - return -ENODEV; + return 0; hdata->dvi_mode = !connector->display_info.is_hdmi; DRM_DEV_DEBUG_KMS(hdata->dev, "%s : width[%d] x height[%d]\n", -- GitLab From ae696b7c00ef30285228fb2b001f59bed711636e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:43 +0200 Subject: [PATCH 1303/2290] drm/imx/ipuv3: do not return negative values from .get_modes() [ Upstream commit c2da9ada64962fcd2e6395ed9987b9874ea032d3 ] The .get_modes() hooks aren't supposed to return negative error codes. Return 0 for no modes, whatever the reason. Cc: Philipp Zabel Cc: stable@vger.kernel.org Acked-by: Philipp Zabel Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/311f6eec96d47949b16a670529f4d89fcd97aefa.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/imx/parallel-display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 06723b2e9b847..64b6bc2de873e 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -72,14 +72,14 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) int ret; if (!mode) - return -EINVAL; + return 0; ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); if (ret) { drm_mode_destroy(connector->dev, mode); - return ret; + return 0; } drm_mode_copy(mode, &imxpd->mode); -- GitLab From fcf7345280af593ebd41c58a3ad0675acdad5119 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Mar 2024 18:03:44 +0200 Subject: [PATCH 1304/2290] drm/vc4: hdmi: do not return negative values from .get_modes() [ Upstream commit abf493988e380f25242c1023275c68bd3579c9ce ] The .get_modes() hooks aren't supposed to return negative error codes. Return 0 for no modes, whatever the reason. Cc: Maxime Ripard Cc: stable@vger.kernel.org Acked-by: Maxime Ripard Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/dcda6d4003e2c6192987916b35c7304732800e08.1709913674.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index ea2eaf6032caa..f696818913499 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -497,7 +497,7 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector) edid = drm_get_edid(connector, vc4_hdmi->ddc); cec_s_phys_addr_from_edid(vc4_hdmi->cec_adap, edid); if (!edid) - return -ENODEV; + return 0; drm_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); -- GitLab From 50e9f82b1c1dc125c919d2f0d6a2cb77efe75fbb Mon Sep 17 00:00:00 2001 From: Qiang Zhang Date: Tue, 12 Mar 2024 16:04:23 +0800 Subject: [PATCH 1305/2290] memtest: use {READ,WRITE}_ONCE in memory scanning [ Upstream commit 82634d7e24271698e50a3ec811e5f50de790a65f ] memtest failed to find bad memory when compiled with clang. So use {WRITE,READ}_ONCE to access memory to avoid compiler over optimization. Link: https://lkml.kernel.org/r/20240312080422.691222-1-qiang4.zhang@intel.com Signed-off-by: Qiang Zhang Cc: Bill Wendling Cc: Justin Stitt Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/memtest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memtest.c b/mm/memtest.c index f53ace709ccd8..d407373f225b4 100644 --- a/mm/memtest.c +++ b/mm/memtest.c @@ -46,10 +46,10 @@ static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size last_bad = 0; for (p = start; p < end; p++) - *p = pattern; + WRITE_ONCE(*p, pattern); for (p = start; p < end; p++, start_phys_aligned += incr) { - if (*p == pattern) + if (READ_ONCE(*p) == pattern) continue; if (start_phys_aligned == last_bad + incr) { last_bad += incr; -- GitLab From af4b1a5aa00bd947bce1d7074a9b899a53bc37dd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 13 Mar 2024 14:42:18 -0700 Subject: [PATCH 1306/2290] Revert "block/mq-deadline: use correct way to throttling write requests" [ Upstream commit 256aab46e31683d76d45ccbedc287b4d3f3e322b ] The code "max(1U, 3 * (1U << shift) / 4)" comes from the Kyber I/O scheduler. The Kyber I/O scheduler maintains one internal queue per hwq and hence derives its async_depth from the number of hwq tags. Using this approach for the mq-deadline scheduler is wrong since the mq-deadline scheduler maintains one internal queue for all hwqs combined. Hence this revert. Cc: stable@vger.kernel.org Cc: Damien Le Moal Cc: Harshit Mogalapalli Cc: Zhiguo Niu Fixes: d47f9717e5cf ("block/mq-deadline: use correct way to throttling write requests") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240313214218.1736147-1-bvanassche@acm.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/mq-deadline.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 55e26065c2e27..f10c2a0d18d41 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -622,9 +622,8 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; - unsigned int shift = tags->bitmap_tags.sb.shift; - dd->async_depth = max(1U, 3 * (1U << shift) / 4); + dd->async_depth = max(1UL, 3 * q->nr_requests / 4); sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth); } -- GitLab From 1f5124c74aaafdb6b20adc8d479d4c7cd225eb70 Mon Sep 17 00:00:00 2001 From: Sunmin Jeong Date: Wed, 13 Mar 2024 20:26:19 +0900 Subject: [PATCH 1307/2290] f2fs: mark inode dirty for FI_ATOMIC_COMMITTED flag [ Upstream commit 4bf78322346f6320313683dc9464e5423423ad5c ] In f2fs_update_inode, i_size of the atomic file isn't updated until FI_ATOMIC_COMMITTED flag is set. When committing atomic write right after the writeback of the inode, i_size of the raw inode will not be updated. It can cause the atomicity corruption due to a mismatch between old file size and new data. To prevent the problem, let's mark inode dirty for FI_ATOMIC_COMMITTED Atomic write thread Writeback thread __writeback_single_inode write_inode f2fs_update_inode - skip i_size update f2fs_ioc_commit_atomic_write f2fs_commit_atomic_write set_inode_flag(inode, FI_ATOMIC_COMMITTED) f2fs_do_sync_file f2fs_fsync_node_pages - skip f2fs_update_inode since the inode is clean Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable@vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo Reviewed-by: Yeongjin Gil Signed-off-by: Sunmin Jeong Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5ae1c4aa3ae92..b54d681c6457d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3000,6 +3000,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: + case FI_ATOMIC_COMMITTED: f2fs_mark_inode_dirty_sync(inode, true); } } -- GitLab From f5bc133ef62e0a355249e9c40657832ef76734fe Mon Sep 17 00:00:00 2001 From: Sunmin Jeong Date: Wed, 13 Mar 2024 20:26:20 +0900 Subject: [PATCH 1308/2290] f2fs: truncate page cache before clearing flags when aborting atomic write [ Upstream commit 74b0ebcbdde4c7fe23c979e4cfc2fdbf349c39a3 ] In f2fs_do_write_data_page, FI_ATOMIC_FILE flag selects the target inode between the original inode and COW inode. When aborting atomic write and writeback occur simultaneously, invalid data can be written to original inode if the FI_ATOMIC_FILE flag is cleared meanwhile. To prevent the problem, let's truncate all pages before clearing the flag Atomic write thread Writeback thread f2fs_abort_atomic_write clear_inode_flag(inode, FI_ATOMIC_FILE) __writeback_single_inode do_writepages f2fs_do_write_data_page - use dn of original inode truncate_inode_pages_final Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable@vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo Reviewed-by: Yeongjin Gil Signed-off-by: Sunmin Jeong Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index aa1ba2fdfe00d..205216c1db91f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -192,6 +192,9 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (!f2fs_is_atomic_file(inode)) return; + if (clean) + truncate_inode_pages_final(inode->i_mapping); + release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_FILE); @@ -200,7 +203,6 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) F2FS_I(inode)->atomic_write_task = NULL; if (clean) { - truncate_inode_pages_final(inode->i_mapping); f2fs_i_size_write(inode, fi->original_i_size); fi->original_i_size = 0; } -- GitLab From 46b832e09d43b394ac0f6d9485d2b1a06593f0b7 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 13 Mar 2024 19:58:26 +0900 Subject: [PATCH 1309/2290] nilfs2: fix failure to detect DAT corruption in btree and direct mappings [ Upstream commit f2f26b4a84a0ef41791bd2d70861c8eac748f4ba ] Patch series "nilfs2: fix kernel bug at submit_bh_wbc()". This resolves a kernel BUG reported by syzbot. Since there are two flaws involved, I've made each one a separate patch. The first patch alone resolves the syzbot-reported bug, but I think both fixes should be sent to stable, so I've tagged them as such. This patch (of 2): Syzbot has reported a kernel bug in submit_bh_wbc() when writing file data to a nilfs2 file system whose metadata is corrupted. There are two flaws involved in this issue. The first flaw is that when nilfs_get_block() locates a data block using btree or direct mapping, if the disk address translation routine nilfs_dat_translate() fails with internal code -ENOENT due to DAT metadata corruption, it can be passed back to nilfs_get_block(). This causes nilfs_get_block() to misidentify an existing block as non-existent, causing both data block lookup and insertion to fail inconsistently. The second flaw is that nilfs_get_block() returns a successful status in this inconsistent state. This causes the caller __block_write_begin_int() or others to request a read even though the buffer is not mapped, resulting in a BUG_ON check for the BH_Mapped flag in submit_bh_wbc() failing. This fixes the first issue by changing the return value to code -EINVAL when a conversion using DAT fails with code -ENOENT, avoiding the conflicting condition that leads to the kernel bug described above. Here, code -EINVAL indicates that metadata corruption was detected during the block lookup, which will be properly handled as a file system error and converted to -EIO when passing through the nilfs2 bmap layer. Link: https://lkml.kernel.org/r/20240313105827.5296-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240313105827.5296-2-konishi.ryusuke@gmail.com Fixes: c3a7abf06ce7 ("nilfs2: support contiguous lookup of blocks") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+cfed5b56649bddf80d6e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=cfed5b56649bddf80d6e Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/btree.c | 9 +++++++-- fs/nilfs2/direct.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 40ce92a332fe7..146640f0607a3 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -724,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr = blocknr; } cnt = 1; @@ -743,7 +743,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) @@ -781,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, out: nilfs_btree_free_path(path); return ret; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + goto out; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index a35f2795b2422..8f802f7b0840b 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -66,7 +66,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, dat = nilfs_bmap_get_dat(direct); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr = blocknr; } @@ -79,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt) @@ -87,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, } *ptrp = ptr; return cnt; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + return ret; } static __u64 -- GitLab From 192e9f9078c96be30b31c4b44d6294b24520fce5 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 13 Mar 2024 19:58:27 +0900 Subject: [PATCH 1310/2290] nilfs2: prevent kernel bug at submit_bh_wbc() [ Upstream commit 269cdf353b5bdd15f1a079671b0f889113865f20 ] Fix a bug where nilfs_get_block() returns a successful status when searching and inserting the specified block both fail inconsistently. If this inconsistent behavior is not due to a previously fixed bug, then an unexpected race is occurring, so return a temporary error -EAGAIN instead. This prevents callers such as __block_write_begin_int() from requesting a read into a buffer that is not mapped, which would cause the BUG_ON check for the BH_Mapped flag in submit_bh_wbc() to fail. Link: https://lkml.kernel.org/r/20240313105827.5296-3-konishi.ryusuke@gmail.com Fixes: 1f5abe7e7dbc ("nilfs2: replace BUG_ON and BUG calls triggerable from ioctl") Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index f625872321cca..8eb4288d46fe0 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -112,7 +112,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", __func__, inode->i_ino, (unsigned long long)blkoff); - err = 0; + err = -EAGAIN; } nilfs_transaction_abort(inode->i_sb); goto out; -- GitLab From bbec4e4d84c3a9005ccacc520837c4f650965654 Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Fri, 1 Mar 2024 17:53:44 +0300 Subject: [PATCH 1311/2290] cifs: open_cached_dir(): add FILE_READ_EA to desired access [ Upstream commit f1b8224b4e6ed59e7e6f5c548673c67410098d8d ] Since smb2_query_eas() reads EA and uses cached directory, open_cached_dir() should request FILE_READ_EA access. Otherwise listxattr() and getxattr() will fail with EACCES (0xc0000022 STATUS_ACCESS_DENIED SMB status). Link: https://bugzilla.kernel.org/show_bug.cgi?id=218543 Cc: stable@vger.kernel.org Signed-off-by: Eugene Korenevsky Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cached_dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fd082151c5f9b..86fe433b1d324 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -218,7 +218,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, .tcon = tcon, .path = path, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_FILE), - .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES, + .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES | + FILE_READ_EA, .disposition = FILE_OPEN, .fid = pfid, }; -- GitLab From 9f23176ad72ff28adadd638fd1236419081ebc63 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 14 Mar 2024 13:54:57 +0100 Subject: [PATCH 1312/2290] cpufreq: dt: always allocate zeroed cpumask [ Upstream commit d2399501c2c081eac703ca9597ceb83c7875a537 ] Commit 0499a78369ad ("ARM64: Dynamically allocate cpumasks and increase supported CPUs to 512") changed the handling of cpumasks on ARM 64bit, what resulted in the strange issues and warnings during cpufreq-dt initialization on some big.LITTLE platforms. This was caused by mixing OPPs between big and LITTLE cores, because OPP-sharing information between big and LITTLE cores is computed on cpumask, which in turn was not zeroed on allocation. Fix this by switching to zalloc_cpumask_var() call. Fixes: dc279ac6e5b4 ("cpufreq: dt: Refactor initialization to handle probe deferral properly") CC: stable@vger.kernel.org # v5.10+ Signed-off-by: Marek Szyprowski Reviewed-by: Christoph Lameter (Ampere) Reviewed-by: Dhruva Gole Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq-dt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 4aec4b2a52259..8f8f1949d66f6 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -208,7 +208,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) if (!priv) return -ENOMEM; - if (!alloc_cpumask_var(&priv->cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&priv->cpus, GFP_KERNEL)) return -ENOMEM; cpumask_set_cpu(cpu, priv->cpus); -- GitLab From efd67e570ae77abf674a3d5e69192928f1f61e55 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 15 Mar 2024 22:42:27 +0100 Subject: [PATCH 1313/2290] x86/CPU/AMD: Update the Zenbleed microcode revisions [ Upstream commit 5c84b051bd4e777cf37aaff983277e58c99618d5 ] Update them to the correct revision numbers. Fixes: 522b1d69219d ("x86/cpu/amd: Add a Zenbleed fix") Signed-off-by: Borislav Petkov (AMD) Cc: Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/amd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c1d09c8844d67..425092806f8fe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -997,11 +997,11 @@ static bool cpu_has_zenbleed_microcode(void) u32 good_rev = 0; switch (boot_cpu_data.x86_model) { - case 0x30 ... 0x3f: good_rev = 0x0830107a; break; - case 0x60 ... 0x67: good_rev = 0x0860010b; break; - case 0x68 ... 0x6f: good_rev = 0x08608105; break; - case 0x70 ... 0x7f: good_rev = 0x08701032; break; - case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; + case 0x30 ... 0x3f: good_rev = 0x0830107b; break; + case 0x60 ... 0x67: good_rev = 0x0860010c; break; + case 0x68 ... 0x6f: good_rev = 0x08608107; break; + case 0x70 ... 0x7f: good_rev = 0x08701033; break; + case 0xa0 ... 0xaf: good_rev = 0x08a00009; break; default: return false; -- GitLab From 6bcf9d2cade110ab6a18f3f39210243a053df5a7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 22 Feb 2024 12:28:28 -0500 Subject: [PATCH 1314/2290] NFSD: Fix nfsd_clid_class use of __string_len() macro [ Upstream commit 9388a2aa453321bcf1ad2603959debea9e6ab6d4 ] I'm working on restructuring the __string* macros so that it doesn't need to recalculate the string twice. That is, it will save it off when processing __string() and the __assign_str() will not need to do the work again as it currently does. Currently __string_len(item, src, len) doesn't actually use "src", but my changes will require src to be correct as that is where the __assign_str() will get its value from. The event class nfsd_clid_class has: __string_len(name, name, clp->cl_name.len) But the second "name" does not exist and causes my changes to fail to build. That second parameter should be: clp->cl_name.data. Link: https://lore.kernel.org/linux-trace-kernel/20240222122828.3d8d213c@gandalf.local.home Cc: Neil Brown Cc: Olga Kornievskaia Cc: Dai Ngo Cc: Tom Talpey Cc: stable@vger.kernel.org Fixes: d27b74a8675ca ("NFSD: Use new __string_len C macros for nfsd_clid_class") Acked-by: Chuck Lever Acked-by: Jeff Layton Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- fs/nfsd/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 4183819ea0829..84f26f281fe9f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -842,7 +842,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class, __array(unsigned char, addr, sizeof(struct sockaddr_in6)) __field(unsigned long, flavor) __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) - __string_len(name, name, clp->cl_name.len) + __string_len(name, clp->cl_name.data, clp->cl_name.len) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; -- GitLab From 0e21852b888d0f040dab07122e70f6f297f77340 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 13 Mar 2024 09:34:54 -0400 Subject: [PATCH 1315/2290] net: hns3: tracing: fix hclgevf trace event strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3f9952e8d80cca2da3b47ecd5ad9ec16cfd1a649 ] The __string() and __assign_str() helper macros of the TRACE_EVENT() macro are going through some optimizations where only the source string of __string() will be used and the __assign_str() source will be ignored and later removed. To make sure that there's no issues, a new check is added between the __string() src argument and the __assign_str() src argument that does a strcmp() to make sure they are the same string. The hclgevf trace events have: __assign_str(devname, &hdev->nic.kinfo.netdev->name); Which triggers the warning: hclgevf_trace.h:34:39: error: passing argument 1 of ‘strcmp’ from incompatible pointer type [-Werror=incompatible-pointer-types] 34 | __assign_str(devname, &hdev->nic.kinfo.netdev->name); [..] arch/x86/include/asm/string_64.h:75:24: note: expected ‘const char *’ but argument is of type ‘char (*)[16]’ 75 | int strcmp(const char *cs, const char *ct); | ~~~~~~~~~~~~^~ Because __assign_str() now has: WARN_ON_ONCE(__builtin_constant_p(src) ? \ strcmp((src), __data_offsets.dst##_ptr_) : \ (src) != __data_offsets.dst##_ptr_); \ The problem is the '&' on hdev->nic.kinfo.netdev->name. That's because that name is: char name[IFNAMSIZ] Where passing an address '&' of a char array is not compatible with strcmp(). The '&' is not necessary, remove it. Link: https://lore.kernel.org/linux-trace-kernel/20240313093454.3909afe7@gandalf.local.home Cc: netdev Cc: Yisen Zhuang Cc: Salil Mehta Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Yufeng Mo Cc: Huazhong Tan Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Reviewed-by: Jijie Shao Fixes: d8355240cf8fb ("net: hns3: add trace event support for PF/VF mailbox") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h | 8 ++++---- .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h index 8510b88d49820..f3cd5a376eca9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h @@ -24,7 +24,7 @@ TRACE_EVENT(hclge_pf_mbx_get, __field(u8, code) __field(u8, subcode) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->vport[0].nic.kinfo.netdev->name) + __string(devname, hdev->vport[0].nic.kinfo.netdev->name) __array(u32, mbx_data, PF_GET_MBX_LEN) ), @@ -33,7 +33,7 @@ TRACE_EVENT(hclge_pf_mbx_get, __entry->code = req->msg.code; __entry->subcode = req->msg.subcode; __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name); + __assign_str(devname, hdev->vport[0].nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_vf_to_pf_cmd)); ), @@ -56,7 +56,7 @@ TRACE_EVENT(hclge_pf_mbx_send, __field(u8, vfid) __field(u16, code) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->vport[0].nic.kinfo.netdev->name) + __string(devname, hdev->vport[0].nic.kinfo.netdev->name) __array(u32, mbx_data, PF_SEND_MBX_LEN) ), @@ -64,7 +64,7 @@ TRACE_EVENT(hclge_pf_mbx_send, __entry->vfid = req->dest_vfid; __entry->code = le16_to_cpu(req->msg.code); __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name); + __assign_str(devname, hdev->vport[0].nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_pf_to_vf_cmd)); ), diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h index 5d4895bb57a17..b259e95dd53c2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h @@ -23,7 +23,7 @@ TRACE_EVENT(hclge_vf_mbx_get, __field(u8, vfid) __field(u16, code) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->nic.kinfo.netdev->name) + __string(devname, hdev->nic.kinfo.netdev->name) __array(u32, mbx_data, VF_GET_MBX_LEN) ), @@ -31,7 +31,7 @@ TRACE_EVENT(hclge_vf_mbx_get, __entry->vfid = req->dest_vfid; __entry->code = le16_to_cpu(req->msg.code); __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->nic.kinfo.netdev->name); + __assign_str(devname, hdev->nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_pf_to_vf_cmd)); ), @@ -55,7 +55,7 @@ TRACE_EVENT(hclge_vf_mbx_send, __field(u8, code) __field(u8, subcode) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->nic.kinfo.netdev->name) + __string(devname, hdev->nic.kinfo.netdev->name) __array(u32, mbx_data, VF_SEND_MBX_LEN) ), @@ -64,7 +64,7 @@ TRACE_EVENT(hclge_vf_mbx_send, __entry->code = req->msg.code; __entry->subcode = req->msg.subcode; __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->nic.kinfo.netdev->name); + __assign_str(devname, hdev->nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_vf_to_pf_cmd)); ), -- GitLab From 77eae2d257d63c291090b96b0dadcda00115eeb6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: [PATCH 1316/2290] LoongArch: Change __my_cpu_offset definition to avoid mis-optimization [ Upstream commit c87e12e0e8c1241410e758e181ca6bf23efa5b5b ] From GCC commit 3f13154553f8546a ("df-scan: remove ad-hoc handling of global regs in asms"), global registers will no longer be forced to add to the def-use chain. Then current_thread_info(), current_stack_pointer and __my_cpu_offset may be lifted out of the loop because they are no longer treated as "volatile variables". This optimization is still correct for the current_thread_info() and current_stack_pointer usages because they are associated to a thread. However it is wrong for __my_cpu_offset because it is associated to a CPU rather than a thread: if the thread migrates to a different CPU in the loop, __my_cpu_offset should be changed. Change __my_cpu_offset definition to treat it as a "volatile variable", in order to avoid such a mis-optimization. Cc: stable@vger.kernel.org Reported-by: Xiaotian Wu Reported-by: Miao Wang Signed-off-by: Xing Li Signed-off-by: Hongchen Zhang Signed-off-by: Rui Wang Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/include/asm/percpu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 302f0e33975a2..c90c560941685 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -25,7 +25,12 @@ static inline void set_my_cpu_offset(unsigned long off) __my_cpu_offset = off; csr_write64(off, PERCPU_BASE_KS); } -#define __my_cpu_offset __my_cpu_offset + +#define __my_cpu_offset \ +({ \ + __asm__ __volatile__("":"+r"(__my_cpu_offset)); \ + __my_cpu_offset; \ +}) #define PERCPU_OP(op, asm_op, c_op) \ static __always_inline unsigned long __percpu_##op(void *ptr, \ -- GitLab From 97cd43ba824aec764f5ea2790d0c0a318f885167 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: [PATCH 1317/2290] LoongArch: Define the __io_aw() hook as mmiowb() [ Upstream commit 9c68ece8b2a5c5ff9b2fcaea923dd73efeb174cd ] Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of mmiowb()") remove all mmiowb() in drivers, but it says: "NOTE: mmiowb() has only ever guaranteed ordering in conjunction with spin_unlock(). However, pairing each mmiowb() removal in this patch with the corresponding call to spin_unlock() is not at all trivial, so there is a small chance that this change may regress any drivers incorrectly relying on mmiowb() to order MMIO writes between CPUs using lock-free synchronisation." The mmio in radeon_ring_commit() is protected by a mutex rather than a spinlock, but in the mutex fastpath it behaves similar to spinlock. We can add mmiowb() calls in the radeon driver but the maintainer says he doesn't like such a workaround, and radeon is not the only example of mutex protected mmio. So we should extend the mmiowb tracking system from spinlock to mutex, and maybe other locking primitives. This is not easy and error prone, so we solve it in the architectural code, by simply defining the __io_aw() hook as mmiowb(). And we no longer need to override queued_spin_unlock() so use the generic definition. Without this, we get such an error when run 'glxgears' on weak ordering architectures such as LoongArch: radeon 0000:04:00.0: ring 0 stalled for more than 10324msec radeon 0000:04:00.0: ring 3 stalled for more than 10240msec radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3) radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.com/T/#t Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loongson.cn/T/#t Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/include/asm/io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 402a7d9e3a53e..427d147f30d7f 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -72,6 +72,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l)) #define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l)) +#define __io_aw() mmiowb() + #include #define ARCH_HAS_VALID_PHYS_ADDR_RANGE -- GitLab From b7cea3a9af0853fdbb1b16633a458f991dde6aac Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 14 Mar 2024 16:49:09 -0600 Subject: [PATCH 1318/2290] wireguard: netlink: check for dangling peer via is_dead instead of empty list [ Upstream commit 55b6c738673871c9b0edae05d0c97995c1ff08c4 ] If all peers are removed via wg_peer_remove_all(), rather than setting peer_list to empty, the peer is added to a temporary list with a head on the stack of wg_peer_remove_all(). If a netlink dump is resumed and the cursored peer is one that has been removed via wg_peer_remove_all(), it will iterate from that peer and then attempt to dump freed peers. Fix this by instead checking peer->is_dead, which was explictly created for this purpose. Also move up the device_update_lock lockdep assertion, since reading is_dead relies on that. It can be reproduced by a small script like: echo "Setting config..." ip link add dev wg0 type wireguard wg setconf wg0 /big-config ( while true; do echo "Showing config..." wg showconf wg0 > /dev/null done ) & sleep 4 wg setconf wg0 <(printf "[Peer]\nPublicKey=$(wg genkey)\n") Resulting in: BUG: KASAN: slab-use-after-free in __lock_acquire+0x182a/0x1b20 Read of size 8 at addr ffff88811956ec70 by task wg/59 CPU: 2 PID: 59 Comm: wg Not tainted 6.8.0-rc2-debug+ #5 Call Trace: dump_stack_lvl+0x47/0x70 print_address_description.constprop.0+0x2c/0x380 print_report+0xab/0x250 kasan_report+0xba/0xf0 __lock_acquire+0x182a/0x1b20 lock_acquire+0x191/0x4b0 down_read+0x80/0x440 get_peer+0x140/0xcb0 wg_get_device_dump+0x471/0x1130 Cc: stable@vger.kernel.org Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Reported-by: Lillian Berry Signed-off-by: Jason A. Donenfeld Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wireguard/netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index 6d1bd9f52d02a..81eef56773a23 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -255,17 +255,17 @@ static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!peers_nest) goto out; ret = 0; - /* If the last cursor was removed via list_del_init in peer_remove, then + lockdep_assert_held(&wg->device_update_lock); + /* If the last cursor was removed in peer_remove or peer_remove_all, then * we just treat this the same as there being no more peers left. The * reason is that seq_nr should indicate to userspace that this isn't a * coherent dump anyway, so they'll try again. */ if (list_empty(&wg->peer_list) || - (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { + (ctx->next_peer && ctx->next_peer->is_dead)) { nla_nest_cancel(skb, peers_nest); goto out; } - lockdep_assert_held(&wg->device_update_lock); peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { if (get_peer(peer, skb, ctx)) { -- GitLab From 09c3fa70f65175861ca948cb2f0f791e666c90e5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 14 Mar 2024 16:49:10 -0600 Subject: [PATCH 1319/2290] wireguard: netlink: access device through ctx instead of peer [ Upstream commit 71cbd32e3db82ea4a74e3ef9aeeaa6971969c86f ] The previous commit fixed a bug that led to a NULL peer->device being dereferenced. It's actually easier and faster performance-wise to instead get the device from ctx->wg. This semantically makes more sense too, since ctx->wg->peer_allowedips.seq is compared with ctx->allowedips_seq, basing them both in ctx. This also acts as a defence in depth provision against freed peers. Cc: stable@vger.kernel.org Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wireguard/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index 81eef56773a23..81b716e6612e2 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -164,8 +164,8 @@ get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) if (!allowedips_node) goto no_allowedips; if (!ctx->allowedips_seq) - ctx->allowedips_seq = peer->device->peer_allowedips.seq; - else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) + ctx->allowedips_seq = ctx->wg->peer_allowedips.seq; + else if (ctx->allowedips_seq != ctx->wg->peer_allowedips.seq) goto no_allowedips; allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); -- GitLab From 25bc986570219f98ded2bf9b335191c80948508b Mon Sep 17 00:00:00 2001 From: "Andrey Jr. Melnikov" Date: Wed, 14 Feb 2024 17:57:57 +0100 Subject: [PATCH 1320/2290] ahci: asm1064: correct count of reported ports [ Upstream commit 9815e39617541ef52d0dfac4be274ad378c6dc09 ] The ASM1064 SATA host controller always reports wrongly, that it has 24 ports. But in reality, it only has four ports. before: ahci 0000:04:00.0: SSS flag set, parallel bus scan disabled ahci 0000:04:00.0: AHCI 0001.0301 32 slots 24 ports 6 Gbps 0xffff0f impl SATA mode ahci 0000:04:00.0: flags: 64bit ncq sntf stag pm led only pio sxs deso sadm sds apst after: ahci 0000:04:00.0: ASM1064 has only four ports ahci 0000:04:00.0: forcing port_map 0xffff0f -> 0xf ahci 0000:04:00.0: SSS flag set, parallel bus scan disabled ahci 0000:04:00.0: AHCI 0001.0301 32 slots 24 ports 6 Gbps 0xf impl SATA mode ahci 0000:04:00.0: flags: 64bit ncq sntf stag pm led only pio sxs deso sadm sds apst Signed-off-by: "Andrey Jr. Melnikov" Signed-off-by: Niklas Cassel Stable-dep-of: 6cd8adc3e189 ("ahci: asm1064: asm1166: don't limit reported ports") Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 1790a2ecb9fac..9de1731b6b444 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -671,9 +671,17 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { - if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { - dev_info(&pdev->dev, "ASM1166 has only six ports\n"); - hpriv->saved_port_map = 0x3f; + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA) { + switch (pdev->device) { + case 0x1166: + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + break; + case 0x1064: + dev_info(&pdev->dev, "ASM1064 has only four ports\n"); + hpriv->saved_port_map = 0xf; + break; + } } if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { -- GitLab From 139845240d2731733a4f548e8da2c61d7d708bf5 Mon Sep 17 00:00:00 2001 From: Conrad Kostecki Date: Wed, 13 Mar 2024 22:46:50 +0100 Subject: [PATCH 1321/2290] ahci: asm1064: asm1166: don't limit reported ports [ Upstream commit 6cd8adc3e18960f6e59d797285ed34ef473cc896 ] Previously, patches have been added to limit the reported count of SATA ports for asm1064 and asm1166 SATA controllers, as those controllers do report more ports than physically having. While it is allowed to report more ports than physically having in CAP.NP, it is not allowed to report more ports than physically having in the PI (Ports Implemented) register, which is what these HBAs do. (This is a AHCI spec violation.) Unfortunately, it seems that the PMP implementation in these ASMedia HBAs is also violating the AHCI and SATA-IO PMP specification. What these HBAs do is that they do not report that they support PMP (CAP.SPM (Supports Port Multiplier) is not set). Instead, they have decided to add extra "virtual" ports in the PI register that is used if a port multiplier is connected to any of the physical ports of the HBA. Enumerating the devices behind the PMP as specified in the AHCI and SATA-IO specifications, by using PMP READ and PMP WRITE commands to the physical ports of the HBA is not possible, you have to use the "virtual" ports. This is of course bad, because this gives us no way to detect the device and vendor ID of the PMP actually connected to the HBA, which means that we can not apply the proper PMP quirks for the PMP that is connected to the HBA. Limiting the port map will thus stop these controllers from working with SATA Port Multipliers. This patch reverts both patches for asm1064 and asm1166, so old behavior is restored and SATA PMP will work again, but it will also reintroduce the (minutes long) extra boot time for the ASMedia controllers that do not have a PMP connected (either on the PCIe card itself, or an external PMP). However, a longer boot time for some, is the lesser evil compared to some other users not being able to detect their drives at all. Fixes: 0077a504e1a4 ("ahci: asm1166: correct count of reported ports") Fixes: 9815e3961754 ("ahci: asm1064: correct count of reported ports") Cc: stable@vger.kernel.org Reported-by: Matt Signed-off-by: Conrad Kostecki Reviewed-by: Hans de Goede [cassel: rewrote commit message] Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 9de1731b6b444..17119e8dc8c30 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -671,19 +671,6 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { - if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA) { - switch (pdev->device) { - case 0x1166: - dev_info(&pdev->dev, "ASM1166 has only six ports\n"); - hpriv->saved_port_map = 0x3f; - break; - case 0x1064: - dev_info(&pdev->dev, "ASM1064 has only four ports\n"); - hpriv->saved_port_map = 0xf; - break; - } - } - if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->saved_port_map = 1; -- GitLab From 589c414138a1bed98e652c905937d8f790804efe Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 11 Mar 2024 18:07:34 -0400 Subject: [PATCH 1322/2290] drm/amdgpu: amdgpu_ttm_gart_bind set gtt bound flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6c6064cbe58b43533e3451ad6a8ba9736c109ac3 ] Otherwise after the GTT bo is released, the GTT and gart space is freed but amdgpu_ttm_backend_unbind will not clear the gart page table entry and leave valid mapping entry pointing to the stale system page. Then if GPU access the gart address mistakely, it will read undefined value instead page fault, harder to debug and reproduce the real issue. Cc: stable@vger.kernel.org Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 158b791883f03..dfb9d42007730 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -838,6 +838,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); } + gtt->bound = true; } /* -- GitLab From 0581a4dac6616b80712f6524efcd4ddaf6ca1942 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 14 Feb 2024 13:29:51 -0700 Subject: [PATCH 1323/2290] drm/amd/display: Return the correct HDCP error code [ Upstream commit e64b3f55e458ce7e2087a0051f47edabf74545e7 ] [WHY & HOW] If the display is null when creating an HDCP session, return a proper error code. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index ee67a35c2a8ed..ff930a71e496a 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -513,6 +513,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp) hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + hdcp_cmd->in_msg.hdcp2_create_session_v2.display_handle = display->index; if (hdcp->connection.link.adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_0) -- GitLab From 3070c70a3e46165e4828d519944694694d76a963 Mon Sep 17 00:00:00 2001 From: Leo Ma Date: Fri, 28 Jul 2023 08:35:07 -0400 Subject: [PATCH 1324/2290] drm/amd/display: Fix noise issue on HDMI AV mute [ Upstream commit 69e3be6893a7e668660b05a966bead82bbddb01d ] [Why] When mode switching is triggered there is momentary noise visible on some HDMI TV or displays. [How] Wait for 2 frames to make sure we have enough time to send out AV mute and sink receives a full frame. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Wenjing Liu Acked-by: Wayne Lin Signed-off-by: Leo Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 72bec33e371f3..0225b2c96041d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -651,10 +651,20 @@ void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) if (pipe_ctx == NULL) return; - if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) + if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) { pipe_ctx->stream_res.stream_enc->funcs->set_avmute( pipe_ctx->stream_res.stream_enc, enable); + + /* Wait for two frame to make sure AV mute is sent out */ + if (enable) { + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + } + } } void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx) -- GitLab From 3d47eb405781cc5127deca9a14e24b27696087a1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 20 Mar 2024 18:43:11 +0100 Subject: [PATCH 1325/2290] dm snapshot: fix lockup in dm_exception_table_exit [ Upstream commit 6e7132ed3c07bd8a6ce3db4bb307ef2852b322dc ] There was reported lockup when we exit a snapshot with many exceptions. Fix this by adding "cond_resched" to the loop that frees the exceptions. Reported-by: John Pittman Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-snap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index b748901a4fb55..1c601508ce0b4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -679,8 +679,10 @@ static void dm_exception_table_exit(struct dm_exception_table *et, for (i = 0; i < size; i++) { slot = et->table + i; - hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) + hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) { kmem_cache_free(mem, ex); + cond_resched(); + } } kvfree(et->table); -- GitLab From 62e06fdd180a61560dc844aac89b9aafde2348f2 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Thu, 14 Mar 2024 14:26:56 +0000 Subject: [PATCH 1326/2290] x86/pm: Work around false positive kmemleak report in msr_build_context() [ Upstream commit e3f269ed0accbb22aa8f25d2daffa23c3fccd407 ] Since: 7ee18d677989 ("x86/power: Make restore_processor_context() sane") kmemleak reports this issue: unreferenced object 0xf68241e0 (size 32): comm "swapper/0", pid 1, jiffies 4294668610 (age 68.432s) hex dump (first 32 bytes): 00 cc cc cc 29 10 01 c0 00 00 00 00 00 00 00 00 ....)........... 00 42 82 f6 cc cc cc cc cc cc cc cc cc cc cc cc .B.............. backtrace: [<461c1d50>] __kmem_cache_alloc_node+0x106/0x260 [] __kmalloc+0x54/0x160 [] msr_build_context.constprop.0+0x35/0x100 [<46635aff>] pm_check_save_msr+0x63/0x80 [<6b6bb938>] do_one_initcall+0x41/0x1f0 [<3f3add60>] kernel_init_freeable+0x199/0x1e8 [<3b538fde>] kernel_init+0x1a/0x110 [<938ae2b2>] ret_from_fork+0x1c/0x28 Which is a false positive. Reproducer: - Run rsync of whole kernel tree (multiple times if needed). - start a kmemleak scan - Note this is just an example: a lot of our internal tests hit these. The root cause is similar to the fix in: b0b592cf0836 x86/pm: Fix false positive kmemleak report in msr_build_context() ie. the alignment within the packed struct saved_context which has everything unaligned as there is only "u16 gs;" at start of struct where in the past there were four u16 there thus aligning everything afterwards. The issue is with the fact that Kmemleak only searches for pointers that are aligned (see how pointers are scanned in kmemleak.c) so when the struct members are not aligned it doesn't see them. Testing: We run a lot of tests with our CI, and after applying this fix we do not see any kmemleak issues any more whilst without it we see hundreds of the above report. From a single, simple test run consisting of 416 individual test cases on kernel 5.10 x86 with kmemleak enabled we got 20 failures due to this, which is quite a lot. With this fix applied we get zero kmemleak related failures. Fixes: 7ee18d677989 ("x86/power: Make restore_processor_context() sane") Signed-off-by: Anton Altaparmakov Signed-off-by: Ingo Molnar Acked-by: "Rafael J. Wysocki" Cc: stable@vger.kernel.org Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240314142656.17699-1-anton@tuxera.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/suspend_32.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index a800abb1a9925..d8416b3bf832e 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -12,11 +12,6 @@ /* image of the saved processor state */ struct saved_context { - /* - * On x86_32, all segment registers except gs are saved at kernel - * entry in pt_regs. - */ - u16 gs; unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; struct saved_msrs saved_msrs; @@ -27,6 +22,11 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + /* + * On x86_32, all segment registers except gs are saved at kernel + * entry in pt_regs. + */ + u16 gs; bool misc_enable_saved; } __attribute__((packed)); -- GitLab From e82dc11315a7a113dc695e4feca54ecd5f1570b4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Mar 2024 15:21:45 +0100 Subject: [PATCH 1327/2290] cpufreq: brcmstb-avs-cpufreq: fix up "add check for cpufreq_cpu_get's return value" In commit e72160cb6e23 ("cpufreq: brcmstb-avs-cpufreq: add check for cpufreq_cpu_get's return value"), build warnings occur because a variable is created after some logic, resulting in: drivers/cpufreq/brcmstb-avs-cpufreq.c: In function 'brcm_avs_cpufreq_get': drivers/cpufreq/brcmstb-avs-cpufreq.c:486:9: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 486 | struct private_data *priv = policy->driver_data; | ^~~~~~ cc1: all warnings being treated as errors make[2]: *** [scripts/Makefile.build:289: drivers/cpufreq/brcmstb-avs-cpufreq.o] Error 1 make[1]: *** [scripts/Makefile.build:552: drivers/cpufreq] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:1907: drivers] Error 2 Fix this up. Link: https://lore.kernel.org/r/e114d9e5-26af-42be-9baa-72c3a6ec8fe5@oracle.com Link: https://lore.kernel.org/stable/20240327015023.GC7502@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/T/#m15bff0fe96986ef780e848b4fff362bf8ea03f08 Reported-by: Harshit Mogalapalli Reported-by: Linux Kernel Functional Testing Fixes: e72160cb6e23 ("cpufreq: brcmstb-avs-cpufreq: add check for cpufreq_cpu_get's return value") Cc: Anastasia Belova Cc: Viresh Kumar Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 38ec0fedb247f..552db816ed22c 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -481,10 +481,11 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct private_data *priv; + if (!policy) return 0; - struct private_data *priv = policy->driver_data; - + priv = policy->driver_data; cpufreq_cpu_put(policy); return brcm_avs_get_frequency(priv->base); -- GitLab From 36c676e2ed3609dc8d50ba763eee2ba4e4b493c2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 4 Mar 2024 14:43:55 +0100 Subject: [PATCH 1328/2290] platform/x86: p2sb: On Goldmont only cache P2SB and SPI devfn BAR commit aec7d25b497ce4a8d044e9496de0aa433f7f8f06 upstream. On Goldmont p2sb_bar() only ever gets called for 2 devices, the actual P2SB devfn 13,0 and the SPI controller which is part of the P2SB, devfn 13,2. But the current p2sb code tries to cache BAR0 info for all of devfn 13,0 to 13,7 . This involves calling pci_scan_single_device() for device 13 functions 0-7 and the hw does not seem to like pci_scan_single_device() getting called for some of the other hidden devices. E.g. on an ASUS VivoBook D540NV-GQ065T this leads to continuous ACPI errors leading to high CPU usage. Fix this by only caching BAR0 info and thus only calling pci_scan_single_device() for the P2SB and the SPI controller. Fixes: 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe") Reported-by: Danil Rybakov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218531 Tested-by: Danil Rybakov Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240304134356.305375-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/p2sb.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 17cc4b45e0239..a64f56ddd4a44 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -20,9 +20,11 @@ #define P2SBC_HIDE BIT(8) #define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1) +#define P2SB_DEVFN_GOLDMONT PCI_DEVFN(13, 0) +#define SPI_DEVFN_GOLDMONT PCI_DEVFN(13, 2) static const struct x86_cpu_id p2sb_cpu_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, P2SB_DEVFN_GOLDMONT), {} }; @@ -98,21 +100,12 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) { - unsigned int slot, fn; - - if (PCI_FUNC(devfn) == 0) { - /* - * When function number of the P2SB device is zero, scan it and - * other function numbers, and if devices are available, cache - * their BAR0s. - */ - slot = PCI_SLOT(devfn); - for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) - p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); - } else { - /* Scan the P2SB device and cache its BAR0 */ - p2sb_scan_and_cache_devfn(bus, devfn); - } + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + + /* On Goldmont p2sb_bar() also gets called for the SPI controller */ + if (devfn == P2SB_DEVFN_GOLDMONT) + p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) return -ENOENT; -- GitLab From 196f198ca6fce04ba6ce262f5a0e4d567d7d219d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:20 -0800 Subject: [PATCH 1329/2290] tls: fix race between tx work scheduling and socket close commit e01e3934a1b2d122919f73bc6ddbe1cdafc4bbdb upstream. Similarly to previous commit, the submitting thread (recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete(). Reorder scheduling the work before calling complete(). This seems more logical in the first place, as it's the inverse order of what the submitting thread will do. Reported-by: valis Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller [Lee: Fixed merge-conflict in Stable branches linux-6.1.y and older] Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 2bd27b77769cb..d53587ff9ddea 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -449,7 +449,6 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) struct scatterlist *sge; struct sk_msg *msg_en; struct tls_rec *rec; - bool ready = false; struct sock *sk; rec = container_of(aead_req, struct tls_rec, aead_req); @@ -486,19 +485,16 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (rec == first_rec) - ready = true; + if (rec == first_rec) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &ctx->tx_bitmask)) + schedule_delayed_work(&ctx->tx_work.work, 1); + } } if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - - if (!ready) - return; - - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) - schedule_delayed_work(&ctx->tx_work.work, 1); } static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) -- GitLab From 406b0241d0eb598a0b330ab20ae325537d8d8163 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Mar 2024 14:22:12 +0100 Subject: [PATCH 1330/2290] netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout commit 552705a3650bbf46a22b1adedc1b04181490fc36 upstream. While the rhashtable set gc runs asynchronously, a race allows it to collect elements from anonymous sets with timeouts while it is being released from the commit path. Mingi Cho originally reported this issue in a different path in 6.1.x with a pipapo set with low timeouts which is not possible upstream since 7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set element timeout"). Fix this by setting on the dead flag for anonymous sets to skip async gc in this case. According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on transaction abort"), Florian plans to accelerate abort path by releasing objects via workqueue, therefore, this sets on the dead flag for abort path too. Cc: stable@vger.kernel.org Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Mingi Cho Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0a86c019a75de..4e0a34fbede38 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5132,6 +5132,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); -- GitLab From 72c1efe3f247a581667b7d368fff3bd9a03cd57a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 1 Mar 2024 00:11:10 +0100 Subject: [PATCH 1331/2290] netfilter: nf_tables: disallow anonymous set with timeout flag commit 16603605b667b70da974bea8216c93e7db043bf1 upstream. Anonymous sets are never used with timeout from userspace, reject this. Exception to this rule is NFT_SET_EVAL to ensure legacy meters still work. Cc: stable@vger.kernel.org Fixes: 761da2935d6e ("netfilter: nf_tables: add set timeout API support") Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4e0a34fbede38..1706605139e36 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4711,6 +4711,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; -- GitLab From 73e4f955b3f8daf0fcc548f945d9bb50024fb1de Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 1 Mar 2024 01:04:11 +0100 Subject: [PATCH 1332/2290] netfilter: nf_tables: reject constant set with timeout commit 5f4fc4bd5cddb4770ab120ce44f02695c4505562 upstream. This set combination is weird: it allows for elements to be added/deleted, but once bound to the rule it cannot be updated anymore. Eventually, all elements expire, leading to an empty set which cannot be updated anymore. Reject this flags combination. Cc: stable@vger.kernel.org Fixes: 761da2935d6e ("netfilter: nf_tables: add set timeout API support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1706605139e36..2a5d9075a081d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4714,6 +4714,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) == + (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; -- GitLab From 4f34b79c77ec49263e1630ab2d8cacf9e9a3dcba Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 28 Feb 2024 16:45:33 -0800 Subject: [PATCH 1333/2290] Drivers: hv: vmbus: Calculate ring buffer size for more efficient use of memory commit b8209544296edbd1af186e2ea9c648642c37b18c upstream. The VMBUS_RING_SIZE macro adds space for a ring buffer header to the requested ring buffer size. The header size is always 1 page, and so its size varies based on the PAGE_SIZE for which the kernel is built. If the requested ring buffer size is a large power-of-2 size and the header size is small, the resulting size is inefficient in its use of memory. For example, a 512 Kbyte ring buffer with a 4 Kbyte page size results in a 516 Kbyte allocation, which is rounded to up 1 Mbyte by the memory allocator, and wastes 508 Kbytes of memory. In such situations, the exact size of the ring buffer isn't that important, and it's OK to allocate the 4 Kbyte header at the beginning of the 512 Kbytes, leaving the ring buffer itself with just 508 Kbytes. The memory allocation can be 512 Kbytes instead of 1 Mbyte and nothing is wasted. Update VMBUS_RING_SIZE to implement this approach for "large" ring buffer sizes. "Large" is somewhat arbitrarily defined as 8 times the size of the ring buffer header (which is of size PAGE_SIZE). For example, for 4 Kbyte PAGE_SIZE, ring buffers of 32 Kbytes and larger use the first 4 Kbytes as the ring buffer header. For 64 Kbyte PAGE_SIZE, ring buffers of 512 Kbytes and larger use the first 64 Kbytes as the ring buffer header. In both cases, smaller sizes add space for the header so the ring size isn't reduced too much by using part of the space for the header. For example, with a 64 Kbyte page size, we don't want a 128 Kbyte ring buffer to be reduced to 64 Kbytes by allocating half of the space for the header. In such a case, the memory allocation is less efficient, but it's the best that can be done. While the new algorithm slightly changes the amount of space allocated for ring buffers by drivers that use VMBUS_RING_SIZE, the devices aren't known to be sensitive to small changes in ring buffer size, so there shouldn't be any effect. Fixes: c1135c7fd0e9 ("Drivers: hv: vmbus: Introduce types of GPADL") Fixes: 6941f67ad37d ("hv_netvsc: Calculate correct ring size when PAGE_SIZE is not 4 Kbytes") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218502 Cc: stable@vger.kernel.org Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Reviewed-by: Dexuan Cui Tested-by: Souradeep Chakrabarti Link: https://lore.kernel.org/r/20240229004533.313662-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240229004533.313662-1-mhklinux@outlook.com> Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 646f1da9f27e0..4fbd5d8417111 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { -- GitLab From 408837d7ef6698e3cec90ff9ccecb18e66880531 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 21 Feb 2024 14:46:21 -0700 Subject: [PATCH 1334/2290] xfrm: Avoid clang fortify warning in copy_to_user_tmpl() commit 1a807e46aa93ebad1dfbed4f82dc3bf779423a6e upstream. After a couple recent changes in LLVM, there is a warning (or error with CONFIG_WERROR=y or W=e) from the compile time fortify source routines, specifically the memset() in copy_to_user_tmpl(). In file included from net/xfrm/xfrm_user.c:14: ... include/linux/fortify-string.h:438:4: error: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror,-Wattribute-warning] 438 | __write_overflow_field(p_size_field, size); | ^ 1 error generated. While ->xfrm_nr has been validated against XFRM_MAX_DEPTH when its value is first assigned in copy_templates() by calling validate_tmpl() first (so there should not be any issue in practice), LLVM/clang cannot really deduce that across the boundaries of these functions. Without that knowledge, it cannot assume that the loop stops before i is greater than XFRM_MAX_DEPTH, which would indeed result a stack buffer overflow in the memset(). To make the bounds of ->xfrm_nr clear to the compiler and add additional defense in case copy_to_user_tmpl() is ever used in a path where ->xfrm_nr has not been properly validated against XFRM_MAX_DEPTH first, add an explicit bound check and early return, which clears up the warning. Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1985 Signed-off-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d042ca01211fa..0cc4ed29e9015 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1979,6 +1979,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) if (xp->xfrm_nr == 0) return 0; + if (xp->xfrm_nr > XFRM_MAX_DEPTH) + return -ENOBUFS; + for (i = 0; i < xp->xfrm_nr; i++) { struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; -- GitLab From d24a79ccdf25d35ed9515483225fabcdba1e24c4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 23 Feb 2024 09:08:27 -0800 Subject: [PATCH 1335/2290] init/Kconfig: lower GCC version check for -Warray-bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3e00f5802fabf2f504070a591b14b648523ede13 upstream. We continue to see false positives from -Warray-bounds even in GCC 10, which is getting reported in a few places[1] still: security/security.c:811:2: warning: `memcpy' offset 32 is out of the bounds [0, 0] [-Warray-bounds] Lower the GCC version check from 11 to 10. Link: https://lkml.kernel.org/r/20240223170824.work.768-kees@kernel.org Reported-by: Lu Yao Closes: https://lore.kernel.org/lkml/20240117014541.8887-1-yaolu@kylinos.cn/ Link: https://lore.kernel.org/linux-next/65d84438.620a0220.7d171.81a7@mx.google.com [1] Signed-off-by: Kees Cook Reviewed-by: Paul Moore Cc: Ard Biesheuvel Cc: Christophe Leroy Cc: Greg Kroah-Hartman Cc: "Gustavo A. R. Silva" Cc: Johannes Weiner Cc: Marc Aurèle La France Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nhat Pham Cc: Petr Mladek Cc: Randy Dunlap Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index ffb927bf6034f..b63dce6706c5c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -902,14 +902,14 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-11+ array-bounds globally. +# Currently, disable gcc-10+ array-bounds globally. # It's still broken in gcc-13, so no upper bound yet. -config GCC11_NO_ARRAY_BOUNDS +config GCC10_NO_ARRAY_BOUNDS def_bool y config CC_NO_ARRAY_BOUNDS bool - default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS + default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS # # For architectures that know their GCC __int128 support is sound -- GitLab From 726374dde5d608b15b9756bd52b6fc283fda7a06 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 14 Feb 2024 17:00:03 -0800 Subject: [PATCH 1336/2290] KVM: x86: Mark target gfn of emulated atomic instruction as dirty commit 910c57dfa4d113aae6571c2a8b9ae8c430975902 upstream. When emulating an atomic access on behalf of the guest, mark the target gfn dirty if the CMPXCHG by KVM is attempted and doesn't fault. This fixes a bug where KVM effectively corrupts guest memory during live migration by writing to guest memory without informing userspace that the page is dirty. Marking the page dirty got unintentionally dropped when KVM's emulated CMPXCHG was converted to do a user access. Before that, KVM explicitly mapped the guest page into kernel memory, and marked the page dirty during the unmap phase. Mark the page dirty even if the CMPXCHG fails, as the old data is written back on failure, i.e. the page is still written. The value written is guaranteed to be the same because the operation is atomic, but KVM's ABI is that all writes are dirty logged regardless of the value written. And more importantly, that's what KVM did before the buggy commit. Huge kudos to the folks on the Cc list (and many others), who did all the actual work of triaging and debugging. Fixes: 1c2361f667f3 ("KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses") Cc: stable@vger.kernel.org Cc: David Matlack Cc: Pasha Tatashin Cc: Michael Krebs base-commit: 6769ea8da8a93ed4630f1ce64df6aafcaabfce64 Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20240215010004.1456078-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 688bc7b72eb66..0e6e63a8f0949 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7758,6 +7758,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (r < 0) return X86EMUL_UNHANDLEABLE; + + /* + * Mark the page dirty _before_ checking whether or not the CMPXCHG was + * successful, as the old value is written back on failure. Note, for + * live migration, this is unnecessarily conservative as CMPXCHG writes + * back the original value and the access is atomic, but KVM's ABI is + * that all writes are dirty logged, regardless of the value written. + */ + kvm_vcpu_mark_page_dirty(vcpu, gpa_to_gfn(gpa)); + if (r) return X86EMUL_CMPXCHG_FAILED; -- GitLab From 4868c0ecdb6cfde7c70cf478c46e06bb9c7e5865 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 16 Feb 2024 17:34:30 -0800 Subject: [PATCH 1337/2290] KVM: SVM: Flush pages under kvm->lock to fix UAF in svm_register_enc_region() commit 5ef1d8c1ddbf696e47b226e11888eaf8d9e8e807 upstream. Do the cache flush of converted pages in svm_register_enc_region() before dropping kvm->lock to fix use-after-free issues where region and/or its array of pages could be freed by a different task, e.g. if userspace has __unregister_enc_region_locked() already queued up for the region. Note, the "obvious" alternative of using local variables doesn't fully resolve the bug, as region->pages is also dynamically allocated. I.e. the region structure itself would be fine, but region->pages could be freed. Flushing multiple pages under kvm->lock is unfortunate, but the entire flow is a rare slow path, and the manual flush is only needed on CPUs that lack coherency for encrypted memory. Fixes: 19a23da53932 ("Fix unsynchronized access to sev members through svm_register_enc_region") Reported-by: Gabe Kirkpatrick Cc: Josh Eads Cc: Peter Gonda Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20240217013430.2079561-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/sev.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3060fe4e9731a..3dc0ee1fe9db9 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1958,20 +1958,22 @@ int sev_mem_enc_register_region(struct kvm *kvm, goto e_free; } - region->uaddr = range->addr; - region->size = range->size; - - list_add_tail(®ion->list, &sev->regions_list); - mutex_unlock(&kvm->lock); - /* * The guest may change the memory encryption attribute from C=0 -> C=1 * or vice versa for this memory range. Lets make sure caches are * flushed to ensure that guest data gets written into memory with - * correct C-bit. + * correct C-bit. Note, this must be done before dropping kvm->lock, + * as region and its array of pages can be freed by a different task + * once kvm->lock is released. */ sev_clflush_pages(region->pages, region->npages); + region->uaddr = range->addr; + region->size = range->size; + + list_add_tail(®ion->list, &sev->regions_list); + mutex_unlock(&kvm->lock); + return ret; e_free: -- GitLab From 3a70b1c9326457c7bd93974d77b2ba55a742e742 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Mar 2024 15:24:05 -0500 Subject: [PATCH 1338/2290] tracing: Use .flush() call to wake up readers commit e5d7c1916562f0e856eb3d6f569629fcd535fed2 upstream. The .release() function does not get called until all readers of a file descriptor are finished. If a thread is blocked on reading a file descriptor in ring_buffer_wait(), and another thread closes the file descriptor, it will not wake up the other thread as ring_buffer_wake_waiters() is called by .release(), and that will not get called until the .read() is finished. The issue originally showed up in trace-cmd, but the readers are actually other processes with their own file descriptors. So calling close() would wake up the other tasks because they are blocked on another descriptor then the one that was closed(). But there's other wake ups that solve that issue. When a thread is blocked on a read, it can still hang even when another thread closed its descriptor. This is what the .flush() callback is for. Have the .flush() wake up the readers. Link: https://lore.kernel.org/linux-trace-kernel/20240308202432.107909457@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Linus Torvalds Cc: linke li Cc: Rabin Vincent Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f667d6bdddda5..f2b00ea38111a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8278,6 +8278,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return size; } +static int tracing_buffers_flush(struct file *file, fl_owner_t id) +{ + struct ftrace_buffer_info *info = file->private_data; + struct trace_iterator *iter = &info->iter; + + iter->wait_index++; + /* Make sure the waiters see the new wait_index */ + smp_wmb(); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + + return 0; +} + static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; @@ -8289,12 +8303,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); - iter->wait_index++; - /* Make sure the waiters see the new wait_index */ - smp_wmb(); - - ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); - if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); @@ -8508,6 +8516,7 @@ static const struct file_operations tracing_buffers_fops = { .read = tracing_buffers_read, .poll = tracing_buffers_poll, .release = tracing_buffers_release, + .flush = tracing_buffers_flush, .splice_read = tracing_buffers_splice_read, .unlocked_ioctl = tracing_buffers_ioctl, .llseek = no_llseek, -- GitLab From 7f122486a06107438cd58aa32ab4a55c2bb60a3c Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 1 Mar 2024 15:36:58 +0800 Subject: [PATCH 1339/2290] drm/amdgpu/pm: Fix the error of pwm1_enable setting commit 0dafaf659cc463f2db0af92003313a8bc46781cd upstream. Fix the pwm_mode value error which used for pwm1_enable setting Signed-off-by: Ma Jun Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 29f3d8431089e..cdb406690b7e7 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2344,6 +2344,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); int err, ret; + u32 pwm_mode; int value; if (amdgpu_in_reset(adev)) @@ -2355,13 +2356,22 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, if (err) return err; + if (value == 0) + pwm_mode = AMD_FAN_CTRL_NONE; + else if (value == 1) + pwm_mode = AMD_FAN_CTRL_MANUAL; + else if (value == 2) + pwm_mode = AMD_FAN_CTRL_AUTO; + else + return -EINVAL; + ret = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (ret < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return ret; } - ret = amdgpu_dpm_set_fan_control_mode(adev, value); + ret = amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); -- GitLab From 2eadf1adf513f798172c50fccbaf6ce037ac7e9b Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 19 Feb 2024 13:50:47 +0100 Subject: [PATCH 1340/2290] drm/i915: Check before removing mm notifier commit 01bb1ae35006e473138c90711bad1a6b614a1823 upstream. Error in mmu_interval_notifier_insert() can leave a NULL notifier.mm pointer. Catch that and return early. Fixes: ed29c2691188 ("drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.") Cc: # v5.13+ [tursulin: Added Fixes and cc stable.] Cc: Andi Shyti Cc: Shawn Lee Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240219125047.28906-1-nirmoy.das@intel.com Signed-off-by: Tvrtko Ursulin (cherry picked from commit db7bbd13f08774cde0332c705f042e327fe21e73) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index ba14b18d65f38..2e7c52c2e47dd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -378,6 +378,9 @@ i915_gem_userptr_release(struct drm_i915_gem_object *obj) { GEM_WARN_ON(obj->userptr.page_ref); + if (!obj->userptr.notifier.mm) + return; + mmu_interval_notifier_remove(&obj->userptr.notifier); obj->userptr.notifier.mm = NULL; } -- GitLab From e5b3ad3e8ee4cc88870353a47d6a14661fe1566b Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 1 Mar 2024 15:29:50 +0800 Subject: [PATCH 1341/2290] ALSA: hda/realtek - Fix headset Mic no show at resume back for Lenovo ALC897 platform commit d397b6e56151099cf3b1f7bfccb204a6a8591720 upstream. Headset Mic will no show at resume back. This patch will fix this issue. Fixes: d7f32791a9fc ("ALSA: hda/realtek - Add headset Mic support for Lenovo ALC897 platform") Cc: Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/4713d48a372e47f98bba0c6120fd8254@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6e759032eba2e..0b58536fc15bb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11439,8 +11439,7 @@ static void alc897_hp_automute_hook(struct hda_codec *codec, snd_hda_gen_hp_automute(codec, jack); vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP; - snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, - vref); + snd_hda_set_pin_ctl(codec, 0x1b, vref); } static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, @@ -11449,6 +11448,10 @@ static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->gen.hp_automute_hook = alc897_hp_automute_hook; + spec->no_shutup_pins = 1; + } + if (action == HDA_FIXUP_ACT_PROBE) { + snd_hda_set_pin_ctl_cache(codec, 0x1a, PIN_IN | AC_PINCTL_VREF_100); } } -- GitLab From f42ba916689f5c7b1642092266d2f53cf527aaaa Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 29 Feb 2024 14:30:06 -0500 Subject: [PATCH 1342/2290] USB: usb-storage: Prevent divide-by-0 error in isd200_ata_command commit 014bcf41d946b36a8f0b8e9b5d9529efbb822f49 upstream. The isd200 sub-driver in usb-storage uses the HEADS and SECTORS values in the ATA ID information to calculate cylinder and head values when creating a CDB for READ or WRITE commands. The calculation involves division and modulus operations, which will cause a crash if either of these values is 0. While this never happens with a genuine device, it could happen with a flawed or subversive emulation, as reported by the syzbot fuzzer. Protect against this possibility by refusing to bind to the device if either the ATA_ID_HEADS or ATA_ID_SECTORS value in the device's ID information is 0. This requires isd200_Initialization() to return a negative error code when initialization fails; currently it always returns 0 (even when there is an error). Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+28748250ab47a8f04100@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-usb/0000000000003eb868061245ba7f@google.com/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reviewed-by: PrasannaKumar Muralidharan Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/b1e605ea-333f-4ac0-9511-da04f411763e@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/isd200.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c index 4e0eef1440b7f..300aeef160e75 100644 --- a/drivers/usb/storage/isd200.c +++ b/drivers/usb/storage/isd200.c @@ -1105,7 +1105,7 @@ static void isd200_dump_driveid(struct us_data *us, u16 *id) static int isd200_get_inquiry_data( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; - int retStatus = ISD200_GOOD; + int retStatus; u16 *id = info->id; usb_stor_dbg(us, "Entering isd200_get_inquiry_data\n"); @@ -1137,6 +1137,13 @@ static int isd200_get_inquiry_data( struct us_data *us ) isd200_fix_driveid(id); isd200_dump_driveid(us, id); + /* Prevent division by 0 in isd200_scsi_to_ata() */ + if (id[ATA_ID_HEADS] == 0 || id[ATA_ID_SECTORS] == 0) { + usb_stor_dbg(us, " Invalid ATA Identify data\n"); + retStatus = ISD200_ERROR; + goto Done; + } + memset(&info->InquiryData, 0, sizeof(info->InquiryData)); /* Standard IDE interface only supports disks */ @@ -1202,6 +1209,7 @@ static int isd200_get_inquiry_data( struct us_data *us ) } } + Done: usb_stor_dbg(us, "Leaving isd200_get_inquiry_data %08X\n", retStatus); return(retStatus); @@ -1481,22 +1489,27 @@ static int isd200_init_info(struct us_data *us) static int isd200_Initialization(struct us_data *us) { + int rc = 0; + usb_stor_dbg(us, "ISD200 Initialization...\n"); /* Initialize ISD200 info struct */ - if (isd200_init_info(us) == ISD200_ERROR) { + if (isd200_init_info(us) < 0) { usb_stor_dbg(us, "ERROR Initializing ISD200 Info struct\n"); + rc = -ENOMEM; } else { /* Get device specific data */ - if (isd200_get_inquiry_data(us) != ISD200_GOOD) + if (isd200_get_inquiry_data(us) != ISD200_GOOD) { usb_stor_dbg(us, "ISD200 Initialization Failure\n"); - else + rc = -EINVAL; + } else { usb_stor_dbg(us, "ISD200 Initialization complete\n"); + } } - return 0; + return rc; } -- GitLab From 7664ee8bd80309b90d53488b619764f0a057f2b7 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 28 Feb 2024 17:24:41 +0530 Subject: [PATCH 1343/2290] usb: gadget: ncm: Fix handling of zero block length packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f90ce1e04cbcc76639d6cba0fdbd820cd80b3c70 upstream. While connecting to a Linux host with CDC_NCM_NTB_DEF_SIZE_TX set to 65536, it has been observed that we receive short packets, which come at interval of 5-10 seconds sometimes and have block length zero but still contain 1-2 valid datagrams present. According to the NCM spec: "If wBlockLength = 0x0000, the block is terminated by a short packet. In this case, the USB transfer must still be shorter than dwNtbInMaxSize or dwNtbOutMaxSize. If exactly dwNtbInMaxSize or dwNtbOutMaxSize bytes are sent, and the size is a multiple of wMaxPacketSize for the given pipe, then no ZLP shall be sent. wBlockLength= 0x0000 must be used with extreme care, because of the possibility that the host and device may get out of sync, and because of test issues. wBlockLength = 0x0000 allows the sender to reduce latency by starting to send a very large NTB, and then shortening it when the sender discovers that there’s not sufficient data to justify sending a large NTB" However, there is a potential issue with the current implementation, as it checks for the occurrence of multiple NTBs in a single giveback by verifying if the leftover bytes to be processed is zero or not. If the block length reads zero, we would process the same NTB infintely because the leftover bytes is never zero and it leads to a crash. Fix this by bailing out if block length reads zero. Cc: stable@vger.kernel.org Fixes: 427694cfaafa ("usb: gadget: ncm: Handle decoding of multiple NTB's in unwrap call") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240228115441.2105585-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 5e78fcc63e4d3..14601a2d25427 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1348,7 +1348,7 @@ parse_ntb: if (to_process == 1 && (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { to_process--; - } else if (to_process > 0) { + } else if ((to_process > 0) && (block_len != 0)) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } -- GitLab From ccaa4fb8dc4f2fac50aa68698e915fc872b133de Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 23 Feb 2024 01:33:43 +0200 Subject: [PATCH 1344/2290] usb: port: Don't try to peer unused USB ports based on location commit 69c63350e573367f9c8594162288cffa8a26d0d1 upstream. Unused USB ports may have bogus location data in ACPI PLD tables. This causes port peering failures as these unused USB2 and USB3 ports location may match. Due to these failures the driver prints a "usb: port power management may be unreliable" warning, and unnecessarily blocks port power off during runtime suspend. This was debugged on a couple DELL systems where the unused ports all returned zeroes in their location data. Similar bugreports exist for other systems. Don't try to peer or match ports that have connect type set to USB_PORT_NOT_USED. Fixes: 3bfd659baec8 ("usb: find internal hub tier mismatch via acpi") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218465 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218486 Tested-by: Paul Menzel Link: https://lore.kernel.org/linux-usb/5406d361-f5b7-4309-b0e6-8c94408f7d75@molgen.mpg.de Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Mathias Nyman Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218490 Link: https://lore.kernel.org/r/20240222233343.71856-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 38c1a4f4fdeae..17aef216cb501 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -534,7 +534,7 @@ static int match_location(struct usb_device *peer_hdev, void *p) struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev); struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent); - if (!peer_hub) + if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED) return 0; hcd = bus_to_hcd(hdev->bus); @@ -545,7 +545,8 @@ static int match_location(struct usb_device *peer_hdev, void *p) for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) { peer = peer_hub->ports[port1 - 1]; - if (peer && peer->location == port_dev->location) { + if (peer && peer->connect_type != USB_PORT_NOT_USED && + peer->location == port_dev->location) { link_peers_report(port_dev, peer); return 1; /* done */ } -- GitLab From d40ab69217481d938b3434f5218032f59a58b935 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Tue, 5 Mar 2024 09:57:06 +0800 Subject: [PATCH 1345/2290] tty: serial: fsl_lpuart: avoid idle preamble pending if CTS is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 74cb7e0355fae9641f825afa389d3fba3b617714 upstream. If the remote uart device is not connected or not enabled after booting up, the CTS line is high by default. At this time, if we enable the flow control when opening the device(for example, using “stty -F /dev/ttyLP4 crtscts” command), there will be a pending idle preamble(first writing 0 and then writing 1 to UARTCTRL_TE will queue an idle preamble) that cannot be sent out, resulting in the uart port fail to close(waiting for TX empty), so the user space stty will have to wait for a long time or forever. This is an LPUART IP bug(idle preamble has higher priority than CTS), here add a workaround patch to enable TX CTS after enabling UARTCTRL_TE, so that the idle preamble does not get stuck due to CTS is deasserted. Fixes: 380c966c093e ("tty: serial: fsl_lpuart: add 32-bit register interface support") Cc: stable Signed-off-by: Sherry Sun Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240305015706.1050769-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index c5a9b89c4d313..f94c782638686 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2213,9 +2213,12 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, lpuart32_write(&sport->port, bd, UARTBAUD); lpuart32_serial_setbrg(sport, baud); - lpuart32_write(&sport->port, modem, UARTMODIR); - lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* disable CTS before enabling UARTCTRL_TE to avoid pending idle preamble */ + lpuart32_write(&sport->port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); /* restore control register */ + lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* re-enable the CTS if needed */ + lpuart32_write(&sport->port, modem, UARTMODIR); if ((ctrl & (UARTCTRL_PE | UARTCTRL_M)) == UARTCTRL_PE) sport->is_cs7 = true; -- GitLab From 4154e767354140db7804207117e7238fb337b0e7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 Feb 2024 20:00:35 +0100 Subject: [PATCH 1346/2290] misc: lis3lv02d_i2c: Fix regulators getting en-/dis-abled twice on suspend/resume commit ac3e0384073b2408d6cb0d972fee9fcc3776053d upstream. When not configured for wakeup lis3lv02d_i2c_suspend() will call lis3lv02d_poweroff() even if the device has already been turned off by the runtime-suspend handler and if configured for wakeup and the device is runtime-suspended at this point then it is not turned back on to serve as a wakeup source. Before commit b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting of the reg_ctrl callback"), lis3lv02d_poweroff() failed to disable the regulators which as a side effect made calling poweroff() twice ok. Now that poweroff() correctly disables the regulators, doing this twice triggers a WARN() in the regulator core: unbalanced disables for regulator-dummy WARNING: CPU: 1 PID: 92 at drivers/regulator/core.c:2999 _regulator_disable ... Fix lis3lv02d_i2c_suspend() to not call poweroff() a second time if already runtime-suspended and add a poweron() call when necessary to make wakeup work. lis3lv02d_i2c_resume() has similar issues, with an added weirness that it always powers on the device if it is runtime suspended, after which the first runtime-resume will call poweron() again, causing the enabled count for the regulator to increase by 1 every suspend/resume. These unbalanced regulator_enable() calls cause the regulator to never be turned off and trigger the following WARN() on driver unbind: WARNING: CPU: 1 PID: 1724 at drivers/regulator/core.c:2396 _regulator_put Fix this by making lis3lv02d_i2c_resume() mirror the new suspend(). Fixes: b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting of the reg_ctrl callback") Reported-by: Paul Menzel Closes: https://lore.kernel.org/regressions/5fc6da74-af0a-4aac-b4d5-a000b39a63a5@molgen.mpg.de/ Cc: stable@vger.kernel.org Cc: regressions@lists.linux.dev Signed-off-by: Hans de Goede Tested-by: Paul Menzel # Dell XPS 15 7590 Reviewed-by: Paul Menzel Link: https://lore.kernel.org/r/20240220190035.53402-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index fdec2c30eb165..63c717053e36b 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -199,8 +199,14 @@ static int lis3lv02d_i2c_suspend(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct lis3lv02d *lis3 = i2c_get_clientdata(client); - if (!lis3->pdata || !lis3->pdata->wakeup_flags) + /* Turn on for wakeup if turned off by runtime suspend */ + if (lis3->pdata && lis3->pdata->wakeup_flags) { + if (pm_runtime_suspended(dev)) + lis3lv02d_poweron(lis3); + /* For non wakeup turn off if not already turned off by runtime suspend */ + } else if (!pm_runtime_suspended(dev)) lis3lv02d_poweroff(lis3); + return 0; } @@ -209,13 +215,12 @@ static int lis3lv02d_i2c_resume(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct lis3lv02d *lis3 = i2c_get_clientdata(client); - /* - * pm_runtime documentation says that devices should always - * be powered on at resume. Pm_runtime turns them off after system - * wide resume is complete. - */ - if (!lis3->pdata || !lis3->pdata->wakeup_flags || - pm_runtime_suspended(dev)) + /* Turn back off if turned on for wakeup and runtime suspended*/ + if (lis3->pdata && lis3->pdata->wakeup_flags) { + if (pm_runtime_suspended(dev)) + lis3lv02d_poweroff(lis3); + /* For non wakeup turn back on if not runtime suspended */ + } else if (!pm_runtime_suspended(dev)) lis3lv02d_poweron(lis3); return 0; -- GitLab From 2d56aca54847770fe847a0cf31df031a6f9e66c9 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 11 Feb 2024 12:39:11 +0200 Subject: [PATCH 1347/2290] mei: me: add arrow lake point S DID commit 7a9b9012043e126f6d6f4683e67409312d1b707b upstream. Add Arrow Lake S device id. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240211103912.117105-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index bdc65d50b945f..fd598f4ec774a 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -112,6 +112,7 @@ #define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ #define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ +#define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 5bf0d50d55a00..d2ada09019c90 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -119,6 +119,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } -- GitLab From 99cd2d491d1c5b72b6430925f4da9ba8ed326463 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 11 Feb 2024 12:39:12 +0200 Subject: [PATCH 1348/2290] mei: me: add arrow lake point H DID commit 8436f25802ec028ac7254990893f3e01926d9b79 upstream. Add Arrow Lake H device id. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240211103912.117105-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index fd598f4ec774a..3390ff5111033 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -113,6 +113,7 @@ #define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ #define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ +#define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index d2ada09019c90..f8219cbd2c7ce 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -120,6 +120,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } -- GitLab From 994a1e583c0c206c8ca7d03334a65b79f4d8bc51 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 29 Feb 2024 17:15:27 -0500 Subject: [PATCH 1349/2290] vt: fix unicode buffer corruption when deleting characters commit 1581dafaf0d34bc9c428a794a22110d7046d186d upstream. This is the same issue that was fixed for the VGA text buffer in commit 39cdb68c64d8 ("vt: fix memory overlapping when deleting chars in the buffer"). The cure is also the same i.e. replace memcpy() with memmove() due to the overlaping buffers. Signed-off-by: Nicolas Pitre Fixes: 81732c3b2fed ("tty vt: Fix line garbage in virtual console on command line edition") Cc: stable Link: https://lore.kernel.org/r/sn184on2-3p0q-0qrq-0218-895349s4753o@syhkavp.arg Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 9e30ef2b6eb8c..48a9ed7c93c97 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -398,7 +398,7 @@ static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr) char32_t *ln = uniscr->lines[vc->state.y]; unsigned int x = vc->state.x, cols = vc->vc_cols; - memcpy(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); + memmove(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); memset32(&ln[cols - nr], ' ', nr); } } -- GitLab From 18d5fc3c16cc317bd0e5f5dabe0660df415cadb7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 4 Mar 2024 15:57:15 -0800 Subject: [PATCH 1350/2290] fs/aio: Check IOCB_AIO_RW before the struct aio_kiocb conversion commit 961ebd120565cb60cebe21cb634fbc456022db4a upstream. The first kiocb_set_cancel_fn() argument may point at a struct kiocb that is not embedded inside struct aio_kiocb. With the current code, depending on the compiler, the req->ki_ctx read happens either before the IOCB_AIO_RW test or after that test. Move the req->ki_ctx read such that it is guaranteed that the IOCB_AIO_RW test happens first. Reported-by: Eric Biggers Cc: Benjamin LaHaise Cc: Eric Biggers Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Fixes: b820de741ae4 ("fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240304235715.3790858-1-bvanassche@acm.org Reviewed-by: Jens Axboe Reviewed-by: Eric Biggers Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 849c3e3ed558b..3e3bf6fdc5ab6 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -591,8 +591,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); - struct kioctx *ctx = req->ki_ctx; + struct aio_kiocb *req; + struct kioctx *ctx; unsigned long flags; /* @@ -602,9 +602,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) if (!(iocb->ki_flags & IOCB_AIO_RW)) return; + req = container_of(iocb, struct aio_kiocb, rw); + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; + ctx = req->ki_ctx; + spin_lock_irqsave(&ctx->ctx_lock, flags); list_add_tail(&req->ki_list, &ctx->active_reqs); req->ki_cancel = cancel; -- GitLab From 635e23035dd497fc408eea6fa5b0c22cc035d503 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 1 Mar 2024 15:04:02 +0800 Subject: [PATCH 1351/2290] ALSA: hda/realtek - Add Headset Mic supported Acer NB platform commit 34ab5bbc6e82214d7f7393eba26d164b303ebb4e upstream. It will be enable headset Mic for Acer NB platform. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/fe0eb6661ca240f3b7762b5b3257710d@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0b58536fc15bb..5e8acb96ab2b4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10750,6 +10750,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { * at most one tbl is allowed to define for the same vendor and same codec */ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1025, "Acer", ALC2XX_FIXUP_HEADSET_MIC, + {0x19, 0x40000000}), SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, {0x19, 0x40000000}, {0x1b, 0x40000000}), -- GitLab From 2cfa00b0e87f46c8a9b3087beabc18b3c0606984 Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Mon, 4 Mar 2024 21:40:32 +0800 Subject: [PATCH 1352/2290] ALSA: hda/realtek: fix mute/micmute LEDs for HP EliteBook commit a17bd44c0146b00fcaa692915789c16bd1fb2a81 upstream. The HP EliteBook using ALC236 codec which using 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20240304134033.773348-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5e8acb96ab2b4..fb12034d464ee 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9792,6 +9792,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c8a, "HP EliteBook 630", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c90, "HP EliteBook 640", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), -- GitLab From e5b5948c769aa1ebf962dddfb972f87d8f166f95 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Fri, 1 Mar 2024 20:07:31 +0530 Subject: [PATCH 1353/2290] tee: optee: Fix kernel panic caused by incorrect error handling commit 95915ba4b987cf2b222b0f251280228a1ff977ac upstream. The error path while failing to register devices on the TEE bus has a bug leading to kernel panic as follows: [ 15.398930] Unable to handle kernel paging request at virtual address ffff07ed00626d7c [ 15.406913] Mem abort info: [ 15.409722] ESR = 0x0000000096000005 [ 15.413490] EC = 0x25: DABT (current EL), IL = 32 bits [ 15.418814] SET = 0, FnV = 0 [ 15.421878] EA = 0, S1PTW = 0 [ 15.425031] FSC = 0x05: level 1 translation fault [ 15.429922] Data abort info: [ 15.432813] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 15.438310] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 15.443372] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 15.448697] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000d9e3e000 [ 15.455413] [ffff07ed00626d7c] pgd=1800000bffdf9003, p4d=1800000bffdf9003, pud=0000000000000000 [ 15.464146] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Commit 7269cba53d90 ("tee: optee: Fix supplicant based device enumeration") lead to the introduction of this bug. So fix it appropriately. Reported-by: Mikko Rapeli Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218542 Fixes: 7269cba53d90 ("tee: optee: Fix supplicant based device enumeration") Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Signed-off-by: Jens Wiklander Signed-off-by: Greg Kroah-Hartman --- drivers/tee/optee/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 4b10921276942..1892e49a8e6a6 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -90,13 +90,14 @@ static int optee_register_device(const uuid_t *device_uuid, u32 func) if (rc) { pr_err("device registration failed, err: %d\n", rc); put_device(&optee_device->dev); + return rc; } if (func == PTA_CMD_GET_DEVICES_SUPP) device_create_file(&optee_device->dev, &dev_attr_need_supplicant); - return rc; + return 0; } static int __optee_enumerate_devices(u32 func) -- GitLab From 155a3d8d8f77d0aa969015336c4747554305704e Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 21 Feb 2024 12:43:58 +0100 Subject: [PATCH 1354/2290] mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations commit 803de9000f334b771afacb6ff3e78622916668b0 upstream. Sven reports an infinite loop in __alloc_pages_slowpath() for costly order __GFP_RETRY_MAYFAIL allocations that are also GFP_NOIO. Such combination can happen in a suspend/resume context where a GFP_KERNEL allocation can have __GFP_IO masked out via gfp_allowed_mask. Quoting Sven: 1. try to do a "costly" allocation (order > PAGE_ALLOC_COSTLY_ORDER) with __GFP_RETRY_MAYFAIL set. 2. page alloc's __alloc_pages_slowpath tries to get a page from the freelist. This fails because there is nothing free of that costly order. 3. page alloc tries to reclaim by calling __alloc_pages_direct_reclaim, which bails out because a zone is ready to be compacted; it pretends to have made a single page of progress. 4. page alloc tries to compact, but this always bails out early because __GFP_IO is not set (it's not passed by the snd allocator, and even if it were, we are suspending so the __GFP_IO flag would be cleared anyway). 5. page alloc believes reclaim progress was made (because of the pretense in item 3) and so it checks whether it should retry compaction. The compaction retry logic thinks it should try again, because: a) reclaim is needed because of the early bail-out in item 4 b) a zonelist is suitable for compaction 6. goto 2. indefinite stall. (end quote) The immediate root cause is confusing the COMPACT_SKIPPED returned from __alloc_pages_direct_compact() (step 4) due to lack of __GFP_IO to be indicating a lack of order-0 pages, and in step 5 evaluating that in should_compact_retry() as a reason to retry, before incrementing and limiting the number of retries. There are however other places that wrongly assume that compaction can happen while we lack __GFP_IO. To fix this, introduce gfp_compaction_allowed() to abstract the __GFP_IO evaluation and switch the open-coded test in try_to_compact_pages() to use it. Also use the new helper in: - compaction_ready(), which will make reclaim not bail out in step 3, so there's at least one attempt to actually reclaim, even if chances are small for a costly order - in_reclaim_compaction() which will make should_continue_reclaim() return false and we don't over-reclaim unnecessarily - in __alloc_pages_slowpath() to set a local variable can_compact, which is then used to avoid retrying reclaim/compaction for costly allocations (step 5) if we can't compact and also to skip the early compaction attempt that we do in some cases Link: https://lkml.kernel.org/r/20240221114357.13655-2-vbabka@suse.cz Fixes: 3250845d0526 ("Revert "mm, oom: prevent premature OOM killer invocation for high order request"") Signed-off-by: Vlastimil Babka Reported-by: Sven van Ashbrook Closes: https://lore.kernel.org/all/CAG-rBihs_xMKb3wrMO1%2B-%2Bp4fowP9oy1pa_OTkfxBzPUVOZF%2Bg@mail.gmail.com/ Tested-by: Karthikeyan Ramasubramanian Cc: Brian Geffon Cc: Curtis Malainey Cc: Jaroslav Kysela Cc: Mel Gorman Cc: Michal Hocko Cc: Takashi Iwai Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/gfp.h | 9 +++++++++ mm/compaction.c | 7 +------ mm/page_alloc.c | 10 ++++++---- mm/vmscan.c | 5 ++++- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 65a78773dccad..e2ccb47c42643 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -341,6 +341,15 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); extern void pm_restrict_gfp_mask(void); extern void pm_restore_gfp_mask(void); +/* + * Check if the gfp flags allow compaction - GFP_NOIO is a really + * tricky context because the migration might require IO. + */ +static inline bool gfp_compaction_allowed(gfp_t gfp_mask) +{ + return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO); +} + extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_PM_SLEEP diff --git a/mm/compaction.c b/mm/compaction.c index 8238e83385a79..23af5f3b2ccaf 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2570,16 +2570,11 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, struct page **capture) { - int may_perform_io = (__force int)(gfp_mask & __GFP_IO); struct zoneref *z; struct zone *zone; enum compact_result rc = COMPACT_SKIPPED; - /* - * Check if the GFP flags allow compaction - GFP_NOIO is really - * tricky context because the migration might require IO - */ - if (!may_perform_io) + if (!gfp_compaction_allowed(gfp_mask)) return COMPACT_SKIPPED; trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c783806eefc9f..a7537da43bd45 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5012,6 +5012,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) { bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; + bool can_compact = gfp_compaction_allowed(gfp_mask); const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER; struct page *page = NULL; unsigned int alloc_flags; @@ -5090,7 +5091,7 @@ restart: * Don't try this for allocations that are allowed to ignore * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen. */ - if (can_direct_reclaim && + if (can_direct_reclaim && can_compact && (costly_order || (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) && !gfp_pfmemalloc_allowed(gfp_mask)) { @@ -5188,9 +5189,10 @@ retry: /* * Do not retry costly high order allocations unless they are - * __GFP_RETRY_MAYFAIL + * __GFP_RETRY_MAYFAIL and we can compact */ - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) + if (costly_order && (!can_compact || + !(gfp_mask & __GFP_RETRY_MAYFAIL))) goto nopage; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, @@ -5203,7 +5205,7 @@ retry: * implementation of the compaction depends on the sufficient amount * of free memory (see __compaction_suitable) */ - if (did_some_progress > 0 && + if (did_some_progress > 0 && can_compact && should_compact_retry(ac, order, alloc_flags, compact_result, &compact_priority, &compaction_retries)) diff --git a/mm/vmscan.c b/mm/vmscan.c index 9f3cfb7caa48d..a3b1d8e5dbb3d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6024,7 +6024,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) /* Use reclaim/compaction for costly allocs or under memory pressure */ static bool in_reclaim_compaction(struct scan_control *sc) { - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + if (gfp_compaction_allowed(sc->gfp_mask) && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || sc->priority < DEF_PRIORITY - 2)) return true; @@ -6266,6 +6266,9 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) unsigned long watermark; enum compact_result suitable; + if (!gfp_compaction_allowed(sc->gfp_mask)) + return false; + suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); if (suitable == COMPACT_SUCCESS) /* Allocation should succeed already. Don't reclaim. */ -- GitLab From 5df4c386d35efafdcd2d5d344c749daca649c59b Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 Feb 2024 05:36:50 +0200 Subject: [PATCH 1355/2290] iio: accel: adxl367: fix DEVID read after reset commit 1b926914bbe4e30cb32f268893ef7d82a85275b8 upstream. regmap_read_poll_timeout() will not sleep before reading, causing the first read to return -ENXIO on I2C, since the chip does not respond to it while it is being reset. The datasheet specifies that a soft reset operation has a latency of 7.5ms. Add a 15ms sleep between reset and reading the DEVID register, and switch to a simple regmap_read() call. Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240207033657.206171-1-demonsingur@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/adxl367.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 7c7d780407937..f1a41b92543af 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -1444,9 +1444,11 @@ static int adxl367_verify_devid(struct adxl367_state *st) unsigned int val; int ret; - ret = regmap_read_poll_timeout(st->regmap, ADXL367_REG_DEVID, val, - val == ADXL367_DEVID_AD, 1000, 10000); + ret = regmap_read(st->regmap, ADXL367_REG_DEVID, &val); if (ret) + return dev_err_probe(st->dev, ret, "Failed to read dev id\n"); + + if (val != ADXL367_DEVID_AD) return dev_err_probe(st->dev, -ENODEV, "Invalid dev id 0x%02X, expected 0x%02X\n", val, ADXL367_DEVID_AD); @@ -1543,6 +1545,8 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops, if (ret) return ret; + fsleep(15000); + ret = adxl367_verify_devid(st); if (ret) return ret; -- GitLab From d50fd2f24f3e6e7f39a45d6060a249c9c0f06583 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 Feb 2024 05:36:51 +0200 Subject: [PATCH 1356/2290] iio: accel: adxl367: fix I2C FIFO data register commit 11dadb631007324c7a8bcb2650eda88ed2b9eed0 upstream. As specified in the datasheet, the I2C FIFO data register is 0x18, not 0x42. 0x42 was used by mistake when adapting the ADXL372 driver. Fix this mistake. Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240207033657.206171-2-demonsingur@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/adxl367_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c index 3606efa25835e..5c040915276d1 100644 --- a/drivers/iio/accel/adxl367_i2c.c +++ b/drivers/iio/accel/adxl367_i2c.c @@ -11,7 +11,7 @@ #include "adxl367.h" -#define ADXL367_I2C_FIFO_DATA 0x42 +#define ADXL367_I2C_FIFO_DATA 0x18 struct adxl367_i2c_state { struct regmap *regmap; -- GitLab From 43fe5dc9faa09d30aaff587ed1b380019eec9619 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 4 Mar 2024 21:31:06 +0100 Subject: [PATCH 1357/2290] i2c: i801: Avoid potential double call to gpiod_remove_lookup_table commit ceb013b2d9a2946035de5e1827624edc85ae9484 upstream. If registering the platform device fails, the lookup table is removed in the error path. On module removal we would try to remove the lookup table again. Fix this by setting priv->lookup only if registering the platform device was successful. In addition free the memory allocated for the lookup table in the error path. Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing") Cc: stable@vger.kernel.org Reviewed-by: Andi Shyti Reviewed-by: Linus Walleij Signed-off-by: Heiner Kallweit Signed-off-by: Andi Shyti Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 9a4e9bf304c28..1c970842624ba 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1422,7 +1422,6 @@ static void i801_add_mux(struct i801_priv *priv) lookup->table[i] = GPIO_LOOKUP(mux_config->gpio_chip, mux_config->gpios[i], "mux", 0); gpiod_add_lookup_table(lookup); - priv->lookup = lookup; /* * Register the mux device, we use PLATFORM_DEVID_NONE here @@ -1436,7 +1435,10 @@ static void i801_add_mux(struct i801_priv *priv) sizeof(struct i2c_mux_gpio_platform_data)); if (IS_ERR(priv->mux_pdev)) { gpiod_remove_lookup_table(lookup); + devm_kfree(dev, lookup); dev_err(dev, "Failed to register i2c-mux-gpio device\n"); + } else { + priv->lookup = lookup; } } -- GitLab From 7a391de927a7322617a9e1c60cc7d224868b4ec9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Feb 2024 15:59:22 -0500 Subject: [PATCH 1358/2290] drm/amd/display: handle range offsets in VRR ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 937844d661354bf142dc1c621396fdab10ecbacc upstream. Need to check the offset bits for values greater than 255. v2: also update amdgpu_dm_connector values. Suggested-by: Mano Ségransan Tested-by: Mano Ségransan Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3203 Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a6c6f286a5988..ff460c9802eb2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10503,14 +10503,23 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (range->flags != 1) continue; - amdgpu_dm_connector->min_vfreq = range->min_vfreq; - amdgpu_dm_connector->max_vfreq = range->max_vfreq; - amdgpu_dm_connector->pixel_clock_mhz = - range->pixel_clock_mhz * 10; - connector->display_info.monitor_range.min_vfreq = range->min_vfreq; connector->display_info.monitor_range.max_vfreq = range->max_vfreq; + if (edid->revision >= 4) { + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MIN_VFREQ) + connector->display_info.monitor_range.min_vfreq += 255; + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MAX_VFREQ) + connector->display_info.monitor_range.max_vfreq += 255; + } + + amdgpu_dm_connector->min_vfreq = + connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = + connector->display_info.monitor_range.max_vfreq; + amdgpu_dm_connector->pixel_clock_mhz = + range->pixel_clock_mhz * 10; + break; } -- GitLab From 2149f8a56e2ed345c7a4d022a79f6b8fc53ae926 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 22 Mar 2024 17:03:58 +0200 Subject: [PATCH 1359/2290] x86/efistub: Call mixed mode boot services on the firmware's stack commit cefcd4fe2e3aaf792c14c9e56dab89e3d7a65d02 upstream. Normally, the EFI stub calls into the EFI boot services using the stack that was live when the stub was entered. According to the UEFI spec, this stack needs to be at least 128k in size - this might seem large but all asynchronous processing and event handling in EFI runs from the same stack and so quite a lot of space may be used in practice. In mixed mode, the situation is a bit different: the bootloader calls the 32-bit EFI stub entry point, which calls the decompressor's 32-bit entry point, where the boot stack is set up, using a fixed allocation of 16k. This stack is still in use when the EFI stub is started in 64-bit mode, and so all calls back into the EFI firmware will be using the decompressor's limited boot stack. Due to the placement of the boot stack right after the boot heap, any stack overruns have gone unnoticed. However, commit 5c4feadb0011983b ("x86/decompressor: Move global symbol references to C code") moved the definition of the boot heap into C code, and now the boot stack is placed right at the base of BSS, where any overruns will corrupt the end of the .data section. While it would be possible to work around this by increasing the size of the boot stack, doing so would affect all x86 systems, and mixed mode systems are a tiny (and shrinking) fraction of the x86 installed base. So instead, record the firmware stack pointer value when entering from the 32-bit firmware, and switch to this stack every time a EFI boot service call is made. Cc: # v6.1+ Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 8232c5b2a9bf5..07873f269b7bd 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -49,6 +49,11 @@ SYM_FUNC_START(startup_64_mixed_mode) lea efi32_boot_args(%rip), %rdx mov 0(%rdx), %edi mov 4(%rdx), %esi + + /* Switch to the firmware's stack */ + movl efi32_boot_sp(%rip), %esp + andl $~7, %esp + #ifdef CONFIG_EFI_HANDOVER_PROTOCOL mov 8(%rdx), %edx // saved bootparams pointer test %edx, %edx @@ -260,6 +265,9 @@ SYM_FUNC_START_LOCAL(efi32_entry) /* Store firmware IDT descriptor */ sidtl (efi32_boot_idt - 1b)(%ebx) + /* Store firmware stack pointer */ + movl %esp, (efi32_boot_sp - 1b)(%ebx) + /* Store boot arguments */ leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) @@ -324,5 +332,6 @@ SYM_DATA_END(efi32_boot_idt) SYM_DATA_LOCAL(efi32_boot_cs, .word 0) SYM_DATA_LOCAL(efi32_boot_ds, .word 0) +SYM_DATA_LOCAL(efi32_boot_sp, .long 0) SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) SYM_DATA(efi_is64, .byte 1) -- GitLab From cd1bbca03f3c1d845ce274c0d0a66de8e5929f72 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:21 -0800 Subject: [PATCH 1360/2290] net: tls: handle backlogging of crypto requests commit 8590541473188741055d27b955db0777569438e3 upstream. Since we're setting the CRYPTO_TFM_REQ_MAY_BACKLOG flag on our requests to the crypto API, crypto_aead_{encrypt,decrypt} can return -EBUSY instead of -EINPROGRESS in valid situations. For example, when the cryptd queue for AESNI is full (easy to trigger with an artificially low cryptd.cryptd_max_cpu_qlen), requests will be enqueued to the backlog but still processed. In that case, the async callback will also be called twice: first with err == -EINPROGRESS, which it seems we can just ignore, then with err == 0. Compared to Sabrina's original patch this version uses the new tls_*crypt_async_wait() helpers and converts the EBUSY to EINPROGRESS to avoid having to modify all the error handling paths. The handling is identical. Fixes: a54667f6728c ("tls: Add support for encryption using async offload accelerator") Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records") Co-developed-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/netdev/9681d1febfec295449a62300938ed2ae66983f28.1694018970.git.sd@queasysnail.net/ Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller [Srish: v2: fixed hunk failures fixed merge-conflict in stable branch linux-6.1.y, needs to go on top of https://lore.kernel.org/stable/20240307155930.913525-1-lee@kernel.org/] Signed-off-by: Srish Srinivasan Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d53587ff9ddea..e723584fc644b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -195,6 +195,17 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) struct sock *sk; int aead_size; + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); @@ -268,6 +279,10 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + ret = ret ?: -EINPROGRESS; + } if (ret == -EINPROGRESS) { if (darg->async) return 0; @@ -451,6 +466,9 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) struct tls_rec *rec; struct sock *sk; + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; + rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; @@ -556,6 +574,10 @@ static int tls_do_encryption(struct sock *sk, atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; -- GitLab From 6ee02e35593193d25f49aed7a97f76ae1ff07a3d Mon Sep 17 00:00:00 2001 From: Jiawei Wang Date: Wed, 13 Mar 2024 09:58:52 +0800 Subject: [PATCH 1361/2290] ASoC: amd: yc: Revert "Fix non-functional mic on Lenovo 21J2" commit 861b3415e4dee06cc00cd1754808a7827b9105bf upstream. This reverts commit ed00a6945dc32462c2d3744a3518d2316da66fcc, which added a quirk entry to enable the Yellow Carp (YC) driver for the Lenovo 21J2 laptop. Although the microphone functioned with the YC driver, it resulted in incorrect driver usage. The Lenovo 21J2 is not a Yellow Carp platform, but a Pink Sardine platform, which already has an upstreamed driver. The microphone on the Lenovo 21J2 operates correctly with the CONFIG_SND_SOC_AMD_PS flag enabled and does not require the quirk entry. So this patch removes the quirk entry. Thanks to Mukunda Vijendar [1] for pointing this out. Link: https://lore.kernel.org/linux-sound/023092e1-689c-4b00-b93f-4092c3724fb6@amd.com/ [1] Signed-off-by: Jiawei Wang Link: https://lore.kernel.org/linux-sound/023092e1-689c-4b00-b93f-4092c3724fb6@amd.com/ [1] Link: https://msgid.link/r/20240313015853.3573242-2-me@jwang.link Signed-off-by: Mark Brown Cc: Luca Stefani Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index e0f406b6646ba..0568e64d10150 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,13 +199,6 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, - { - .driver_data = &acp6x_card, - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "21J2"), - } - }, { .driver_data = &acp6x_card, .matches = { -- GitLab From 64c3873cb255d23047843ca3a1b9769c07d7ecef Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Mon, 4 Mar 2024 16:40:50 +0530 Subject: [PATCH 1362/2290] iommu: Avoid races around default domain allocations This fix is applicable for LTS kernel, 6.1.y. In latest kernels, this race issue is fixed by the patch series [1] and [2]. The right thing to do here would have been propagating these changes from latest kernel to the stable branch, 6.1.y. However, these changes seems too intrusive to be picked for stable branches. Hence, the fix proposed can be taken as an alternative instead of backporting the patch series. [1] https://lore.kernel.org/all/0-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com/ [2] https://lore.kernel.org/all/0-v5-1b99ae392328+44574-iommu_err_unwind_jgg@nvidia.com/ Issue: A race condition is observed when arm_smmu_device_probe and modprobe of client devices happens in parallel. This results in the allocation of a new default domain for the iommu group even though it was previously allocated and the respective iova domain(iovad) was initialized. However, for this newly allocated default domain, iovad will not be initialized. As a result, for devices requesting dma allocations, this uninitialized iovad will be used, thereby causing NULL pointer dereference issue. Flow: - During arm_smmu_device_probe, bus_iommu_probe() will be called as part of iommu_device_register(). This results in the device probe, __iommu_probe_device(). - When the modprobe of the client device happens in parallel, it sets up the DMA configuration for the device using of_dma_configure_id(), which inturn calls iommu_probe_device(). Later, default domain is allocated and attached using iommu_alloc_default_domain() and __iommu_attach_device() respectively. It then ends up initializing a mapping domain(IOVA domain) and rcaches for the device via arch_setup_dma_ops()->iommu_setup_dma_ops(). - Now, in the bus_iommu_probe() path, it again tries to allocate a default domain via probe_alloc_default_domain(). This results in allocating a new default domain(along with IOVA domain) via __iommu_domain_alloc(). However, this newly allocated IOVA domain will not be initialized. - Now, when the same client device tries dma allocations via iommu_dma_alloc(), it ends up accessing the rcaches of the newly allocated IOVA domain, which is not initialized. This results into NULL pointer dereferencing. Fix this issue by adding a check in probe_alloc_default_domain() to see if the iommu_group already has a default domain allocated and initialized. Cc: # see patch description, fix applicable only for 6.1.y Signed-off-by: Charan Teja Kalla Co-developed-by: Nikhil V Signed-off-by: Nikhil V Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8b38972394776..83736824f17d1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1741,6 +1741,9 @@ static void probe_alloc_default_domain(struct bus_type *bus, { struct __group_domain_type gtype; + if (group->default_domain) + return; + memset(>ype, 0, sizeof(gtype)); /* Ask for default domain requirements of all devices in the group */ -- GitLab From 936381380ae83563d328ab729251d8bc40581fac Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 18 Feb 2024 18:41:37 +0100 Subject: [PATCH 1363/2290] clocksource/drivers/arm_global_timer: Fix maximum prescaler value [ Upstream commit b34b9547cee41575a4fddf390f615570759dc999 ] The prescaler in the "Global Timer Control Register bit assignments" is documented to use bits [15:8], which means that the maximum prescaler register value is 0xff. Fixes: 171b45a4a70e ("clocksource/drivers/arm_global_timer: Implement rate compensation whenever source clock changes") Signed-off-by: Martin Blumenstingl Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20240218174138.1942418-2-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/clocksource/arm_global_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 44a61dc6f9320..e1c773bb55359 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -32,7 +32,7 @@ #define GT_CONTROL_IRQ_ENABLE BIT(2) /* banked */ #define GT_CONTROL_AUTO_INC BIT(3) /* banked */ #define GT_CONTROL_PRESCALER_SHIFT 8 -#define GT_CONTROL_PRESCALER_MAX 0xF +#define GT_CONTROL_PRESCALER_MAX 0xFF #define GT_CONTROL_PRESCALER_MASK (GT_CONTROL_PRESCALER_MAX << \ GT_CONTROL_PRESCALER_SHIFT) -- GitLab From 3e5222a17914f663e56c5ffca15586cf0960b4b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20R=C3=B6sti?= Date: Mon, 11 Mar 2024 21:17:04 +0000 Subject: [PATCH 1364/2290] entry: Respect changes to system call number by trace_sys_enter() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fb13b11d53875e28e7fbf0c26b288e4ea676aa9f ] When a probe is registered at the trace_sys_enter() tracepoint, and that probe changes the system call number, the old system call still gets executed. This worked correctly until commit b6ec41346103 ("core/entry: Report syscall correctly for trace and audit"), which removed the re-evaluation of the syscall number after the trace point. Restore the original semantics by re-evaluating the system call number after trace_sys_enter(). The performance impact of this re-evaluation is minimal because it only takes place when a trace point is active, and compared to the actual trace point overhead the read from a cache hot variable is negligible. Fixes: b6ec41346103 ("core/entry: Report syscall correctly for trace and audit") Signed-off-by: André Rösti Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240311211704.7262-1-an.roesti@gmail.com Signed-off-by: Sasha Levin --- kernel/entry/common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index be61332c66b54..ccf2b1e1b40be 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -77,8 +77,14 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, /* Either of the above might have changed the syscall number */ syscall = syscall_get_nr(current, regs); - if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { trace_sys_enter(regs, syscall); + /* + * Probes or BPF hooks in the tracepoint may have changed the + * system call number as well. + */ + syscall = syscall_get_nr(current, regs); + } syscall_enter_audit(regs, syscall); -- GitLab From 8d8be62a7d5386fe224c5455361da980ce64a96f Mon Sep 17 00:00:00 2001 From: David Laight Date: Mon, 18 Sep 2023 08:16:30 +0000 Subject: [PATCH 1365/2290] minmax: add umin(a, b) and umax(a, b) [ Upstream commit 80fcac55385ccb710d33a20dc1caaef29bd5a921 ] Patch series "minmax: Relax type checks in min() and max()", v4. The min() (etc) functions in minmax.h require that the arguments have exactly the same types. However when the type check fails, rather than look at the types and fix the type of a variable/constant, everyone seems to jump on min_t(). In reality min_t() ought to be rare - when something unusual is being done, not normality. The orginal min() (added in 2.4.9) replaced several inline functions and included the type - so matched the implicit casting of the function call. This was renamed min_t() in 2.4.10 and the current min() added. There is no actual indication that the conversion of negatve values to large unsigned values has ever been an actual problem. A quick grep shows 5734 min() and 4597 min_t(). Having the casts on almost half of the calls shows that something is clearly wrong. If the wrong type is picked (and it is far too easy to pick the type of the result instead of the larger input) then significant bits can get discarded. Pretty much the worst example is in the derived clamp_val(), consider: unsigned char x = 200u; y = clamp_val(x, 10u, 300u); I also suspect that many of the min_t(u16, ...) are actually wrong. For example copy_data() in printk_ringbuffer.c contains: data_size = min_t(u16, buf_size, len); Here buf_size is 'unsigned int' and len 'u16', pass a 64k buffer (can you prove that doesn't happen?) and no data is returned. Apparantly it did - and has since been fixed. The only reason that most of the min_t() are 'fine' is that pretty much all the values in the kernel are between 0 and INT_MAX. Patch 1 adds umin(), this uses integer promotions to convert both arguments to 'unsigned long long'. It can be used to compare a signed type that is known to contain a non-negative value with an unsigned type. The compiler typically optimises it all away. Added first so that it can be referred to in patch 2. Patch 2 replaces the 'same type' check with a 'same signedness' one. This makes min(unsigned_int_var, sizeof()) be ok. The error message is also improved and will contain the expanded form of both arguments (useful for seeing how constants are defined). Patch 3 just fixes some whitespace. Patch 4 allows comparisons of 'unsigned char' and 'unsigned short' to signed types. The integer promotion rules convert them both to 'signed int' prior to the comparison so they can never cause a negative value be converted to a large positive one. Patch 5 (rewritted for v4) allows comparisons of unsigned values against non-negative constant integer expressions. This makes min(unsigned_int_var, 4) be ok. The only common case that is still errored is the comparison of signed values against unsigned constant integer expressions below __INT_MAX__. Typcally min(int_val, sizeof (foo)), the real fix for this is casting the constant: min(int_var, (int)sizeof (foo)). With all the patches applied pretty much all the min_t() could be replaced by min(), and most of the rest by umin(). However they all need careful inspection due to code like: sz = min_t(unsigned char, sz - 1, LIM - 1) + 1; which converts 0 to LIM. This patch (of 6): umin() and umax() can be used when min()/max() errors a signed v unsigned compare when the signed value is known to be non-negative. Unlike min_t(some_unsigned_type, a, b) umin() will never mask off high bits if an inappropriate type is selected. The '+ 0u + 0ul + 0ull' may look strange. The '+ 0u' is needed for 'signed int' on 64bit systems. The '+ 0ul' is needed for 'signed long' on 32bit systems. The '+ 0ull' is needed for 'signed long long'. Link: https://lkml.kernel.org/r/b97faef60ad24922b530241c5d7c933c@AcuMS.aculab.com Link: https://lkml.kernel.org/r/41d93ca827a248698ec64bf57e0c05a5@AcuMS.aculab.com Signed-off-by: David Laight Cc: Andy Shevchenko Cc: Christoph Hellwig Cc: Jason A. Donenfeld Cc: Linus Torvalds Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Stable-dep-of: 51b30ecb73b4 ("swiotlb: Fix alignment checks when both allocation and DMA masks are present") Signed-off-by: Sasha Levin --- include/linux/minmax.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 5433c08fcc685..1aea34b8f19bf 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -51,6 +51,23 @@ */ #define max(x, y) __careful_cmp(x, y, >) +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp((x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull, <) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp((x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull, >) + /** * min3 - return minimum of three values * @x: first value -- GitLab From ef80ecc721274c0602719abe822d98ec7e6073fe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Mar 2024 15:28:27 +0000 Subject: [PATCH 1366/2290] swiotlb: Fix alignment checks when both allocation and DMA masks are present [ Upstream commit 51b30ecb73b481d5fac6ccf2ecb4a309c9ee3310 ] Nicolin reports that swiotlb buffer allocations fail for an NVME device behind an IOMMU using 64KiB pages. This is because we end up with a minimum allocation alignment of 64KiB (for the IOMMU to map the buffer safely) but a minimum DMA alignment mask corresponding to a 4KiB NVME page (i.e. preserving the 4KiB page offset from the original allocation). If the original address is not 4KiB-aligned, the allocation will fail because swiotlb_search_pool_area() erroneously compares these unmasked bits with the 64KiB-aligned candidate allocation. Tweak swiotlb_search_pool_area() so that the DMA alignment mask is reduced based on the required alignment of the allocation. Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers") Link: https://lore.kernel.org/r/cover.1707851466.git.nicolinc@nvidia.com Reported-by: Nicolin Chen Signed-off-by: Will Deacon Reviewed-by: Michael Kelley Tested-by: Nicolin Chen Tested-by: Michael Kelley Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- kernel/dma/swiotlb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index ad6333c3fe1ff..db89ac94e7db4 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -654,8 +654,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); - unsigned int iotlb_align_mask = - dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); + unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; unsigned int index, wrap, count = 0, i; unsigned int offset = swiotlb_align_offset(dev, orig_addr); @@ -666,6 +665,14 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, BUG_ON(!nslots); BUG_ON(area_index >= mem->nareas); + /* + * Ensure that the allocation is at least slot-aligned and update + * 'iotlb_align_mask' to ignore bits that will be preserved when + * offsetting into the allocation. + */ + alloc_align_mask |= (IO_TLB_SIZE - 1); + iotlb_align_mask &= ~alloc_align_mask; + /* * For mappings with an alignment requirement don't bother looping to * unaligned slots once we found an aligned one. For allocations of -- GitLab From d7f62fde4191b912f68ff958fb333237869c2cdb Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 8 Mar 2024 15:28:28 +0000 Subject: [PATCH 1367/2290] iommu/dma: Force swiotlb_max_mapping_size on an untrusted device [ Upstream commit afc5aa46ed560f01ceda897c053c6a40c77ce5c4 ] The swiotlb does not support a mapping size > swiotlb_max_mapping_size(). On the other hand, with a 64KB PAGE_SIZE configuration, it's observed that an NVME device can map a size between 300KB~512KB, which certainly failed the swiotlb mappings, though the default pool of swiotlb has many slots: systemd[1]: Started Journal Service. => nvme 0000:00:01.0: swiotlb buffer is full (sz: 327680 bytes), total 32768 (slots), used 32 (slots) note: journal-offline[392] exited with irqs disabled note: journal-offline[392] exited with preempt_count 1 Call trace: [ 3.099918] swiotlb_tbl_map_single+0x214/0x240 [ 3.099921] iommu_dma_map_page+0x218/0x328 [ 3.099928] dma_map_page_attrs+0x2e8/0x3a0 [ 3.101985] nvme_prep_rq.part.0+0x408/0x878 [nvme] [ 3.102308] nvme_queue_rqs+0xc0/0x300 [nvme] [ 3.102313] blk_mq_flush_plug_list.part.0+0x57c/0x600 [ 3.102321] blk_add_rq_to_plug+0x180/0x2a0 [ 3.102323] blk_mq_submit_bio+0x4c8/0x6b8 [ 3.103463] __submit_bio+0x44/0x220 [ 3.103468] submit_bio_noacct_nocheck+0x2b8/0x360 [ 3.103470] submit_bio_noacct+0x180/0x6c8 [ 3.103471] submit_bio+0x34/0x130 [ 3.103473] ext4_bio_write_folio+0x5a4/0x8c8 [ 3.104766] mpage_submit_folio+0xa0/0x100 [ 3.104769] mpage_map_and_submit_buffers+0x1a4/0x400 [ 3.104771] ext4_do_writepages+0x6a0/0xd78 [ 3.105615] ext4_writepages+0x80/0x118 [ 3.105616] do_writepages+0x90/0x1e8 [ 3.105619] filemap_fdatawrite_wbc+0x94/0xe0 [ 3.105622] __filemap_fdatawrite_range+0x68/0xb8 [ 3.106656] file_write_and_wait_range+0x84/0x120 [ 3.106658] ext4_sync_file+0x7c/0x4c0 [ 3.106660] vfs_fsync_range+0x3c/0xa8 [ 3.106663] do_fsync+0x44/0xc0 Since untrusted devices might go down the swiotlb pathway with dma-iommu, these devices should not map a size larger than swiotlb_max_mapping_size. To fix this bug, add iommu_dma_max_mapping_size() for untrusted devices to take into account swiotlb_max_mapping_size() v.s. iova_rcache_range() from the iommu_dma_opt_mapping_size(). Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers") Link: https://lore.kernel.org/r/ee51a3a5c32cf885b18f6416171802669f4a718a.1707851466.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen [will: Drop redundant is_swiotlb_active(dev) check] Signed-off-by: Will Deacon Reviewed-by: Michael Kelley Acked-by: Robin Murphy Tested-by: Nicolin Chen Tested-by: Michael Kelley Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/iommu/dma-iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index aa6d62cc567ae..3fa66dba0a326 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1547,6 +1547,14 @@ static size_t iommu_dma_opt_mapping_size(void) return iova_rcache_range(); } +static size_t iommu_dma_max_mapping_size(struct device *dev) +{ + if (dev_is_untrusted(dev)) + return swiotlb_max_mapping_size(dev); + + return SIZE_MAX; +} + static const struct dma_map_ops iommu_dma_ops = { .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, @@ -1569,6 +1577,7 @@ static const struct dma_map_ops iommu_dma_ops = { .unmap_resource = iommu_dma_unmap_resource, .get_merge_boundary = iommu_dma_get_merge_boundary, .opt_mapping_size = iommu_dma_opt_mapping_size, + .max_mapping_size = iommu_dma_max_mapping_size, }; /* -- GitLab From 2f67c82ed04ee43241b3cc94516fcad9a4c21cf4 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 26 Feb 2024 13:07:24 +0106 Subject: [PATCH 1368/2290] printk: Update @console_may_schedule in console_trylock_spinning() [ Upstream commit 8076972468584d4a21dab9aa50e388b3ea9ad8c7 ] console_trylock_spinning() may takeover the console lock from a schedulable context. Update @console_may_schedule to make sure it reflects a trylock acquire. Reported-by: Mukesh Ojha Closes: https://lore.kernel.org/lkml/20240222090538.23017-1-quic_mojha@quicinc.com Fixes: dbdda842fe96 ("printk: Add console owner and waiter logic to load balance console writes") Signed-off-by: John Ogness Reviewed-by: Mukesh Ojha Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/875xybmo2z.fsf@jogness.linutronix.de Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c55ee859dbd08..0ae06d5046bb0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1947,6 +1947,12 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } -- GitLab From 0250a68a35c95843174c1403882b3dc12c98a28c Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Nov 2023 13:18:16 +0200 Subject: [PATCH 1369/2290] irqchip/renesas-rzg2l: Implement restriction when writing ISCR register [ Upstream commit ef88eefb1a81a8701eabb7d5ced761a66a465a49 ] The RZ/G2L manual (chapter "IRQ Status Control Register (ISCR)") describes the operation to clear interrupts through the ISCR register as follows: [Write operation] When "Falling-edge detection", "Rising-edge detection" or "Falling/Rising-edge detection" is set in IITSR: - In case ISTAT is 1 0: IRQn interrupt detection status is cleared. 1: Invalid to write. - In case ISTAT is 0 Invalid to write. When "Low-level detection" is set in IITSR.: Invalid to write. Take the interrupt type into account when clearing interrupts through the ISCR register to avoid writing the ISCR when the interrupt type is level. Signed-off-by: Claudiu Beznea Signed-off-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20231120111820.87398-6-claudiu.beznea.uj@bp.renesas.com Stable-dep-of: 9eec61df55c5 ("irqchip/renesas-rzg2l: Flush posted write in irq_eoi()") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 10c3e85c90c23..fbd1766f6aaa5 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -72,11 +72,17 @@ static void rzg2l_irq_eoi(struct irq_data *d) unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START; struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); u32 bit = BIT(hw_irq); - u32 reg; + u32 iitsr, iscr; - reg = readl_relaxed(priv->base + ISCR); - if (reg & bit) - writel_relaxed(reg & ~bit, priv->base + ISCR); + iscr = readl_relaxed(priv->base + ISCR); + iitsr = readl_relaxed(priv->base + IITSR); + + /* + * ISCR can only be cleared if the type is falling-edge, rising-edge or + * falling/rising-edge. + */ + if ((iscr & bit) && (iitsr & IITSR_IITSEL_MASK(hw_irq))) + writel_relaxed(iscr & ~bit, priv->base + ISCR); } static void rzg2l_tint_eoi(struct irq_data *d) -- GitLab From 7775e87f503a9867b22c3b04392075b0262e1544 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 5 Mar 2024 18:39:18 +0000 Subject: [PATCH 1370/2290] irqchip/renesas-rzg2l: Flush posted write in irq_eoi() [ Upstream commit 9eec61df55c51415409c7cc47e9a1c8de94a0522 ] The irq_eoi() callback of the RZ/G2L interrupt chip clears the relevant interrupt cause bit in the TSCR register by writing to it. This write is not sufficient because the write is posted and therefore not guaranteed to immediately clear the bit. Due to that delay the CPU can raise the just handled interrupt again. Prevent this by reading the register back which causes the posted write to be flushed to the hardware before the read completes. Fixes: 3fed09559cd8 ("irqchip: Add RZ/G2L IA55 Interrupt Controller driver") Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index fbd1766f6aaa5..454af6faf42bc 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -81,8 +81,14 @@ static void rzg2l_irq_eoi(struct irq_data *d) * ISCR can only be cleared if the type is falling-edge, rising-edge or * falling/rising-edge. */ - if ((iscr & bit) && (iitsr & IITSR_IITSEL_MASK(hw_irq))) + if ((iscr & bit) && (iitsr & IITSR_IITSEL_MASK(hw_irq))) { writel_relaxed(iscr & ~bit, priv->base + ISCR); + /* + * Enforce that the posted write is flushed to prevent that the + * just handled interrupt is raised again. + */ + readl_relaxed(priv->base + ISCR); + } } static void rzg2l_tint_eoi(struct irq_data *d) @@ -93,8 +99,14 @@ static void rzg2l_tint_eoi(struct irq_data *d) u32 reg; reg = readl_relaxed(priv->base + TSCR); - if (reg & bit) + if (reg & bit) { writel_relaxed(reg & ~bit, priv->base + TSCR); + /* + * Enforce that the posted write is flushed to prevent that the + * just handled interrupt is raised again. + */ + readl_relaxed(priv->base + TSCR); + } } static void rzg2l_irqc_eoi(struct irq_data *d) -- GitLab From 5dc2cb3eeeeeefca9e8c28115cb732b6b4d446a7 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Nov 2023 13:18:17 +0200 Subject: [PATCH 1371/2290] irqchip/renesas-rzg2l: Add macro to retrieve TITSR register offset based on register's index [ Upstream commit 2eca4731cc66563b3919d8753dbd74d18c39f662 ] There are 2 TITSR registers available on the IA55 interrupt controller. Add a macro that retrieves the TITSR register offset based on it's index. This macro is useful in when adding suspend/resume support so both TITSR registers can be accessed in a for loop. Signed-off-by: Claudiu Beznea Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20231120111820.87398-7-claudiu.beznea.uj@bp.renesas.com Stable-dep-of: 853a6030303f ("irqchip/renesas-rzg2l: Prevent spurious interrupts when setting trigger type") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 454af6faf42bc..a74391615ab38 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -28,8 +28,7 @@ #define ISCR 0x10 #define IITSR 0x14 #define TSCR 0x20 -#define TITSR0 0x24 -#define TITSR1 0x28 +#define TITSR(n) (0x24 + (n) * 4) #define TITSR0_MAX_INT 16 #define TITSEL_WIDTH 0x2 #define TSSR(n) (0x30 + ((n) * 4)) @@ -206,8 +205,7 @@ static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); unsigned int hwirq = irqd_to_hwirq(d); u32 titseln = hwirq - IRQC_TINT_START; - u32 offset; - u8 sense; + u8 index, sense; u32 reg; switch (type & IRQ_TYPE_SENSE_MASK) { @@ -223,17 +221,17 @@ static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) return -EINVAL; } - offset = TITSR0; + index = 0; if (titseln >= TITSR0_MAX_INT) { titseln -= TITSR0_MAX_INT; - offset = TITSR1; + index = 1; } raw_spin_lock(&priv->lock); - reg = readl_relaxed(priv->base + offset); + reg = readl_relaxed(priv->base + TITSR(index)); reg &= ~(IRQ_MASK << (titseln * TITSEL_WIDTH)); reg |= sense << (titseln * TITSEL_WIDTH); - writel_relaxed(reg, priv->base + offset); + writel_relaxed(reg, priv->base + TITSR(index)); raw_spin_unlock(&priv->lock); return 0; -- GitLab From 551ad70e86621587cd7cec4351d55a37f0eb50d4 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 5 Mar 2024 18:39:19 +0000 Subject: [PATCH 1372/2290] irqchip/renesas-rzg2l: Rename rzg2l_tint_eoi() [ Upstream commit 7cb6362c63df233172eaecddaf9ce2ce2f769112 ] Rename rzg2l_tint_eoi()->rzg2l_clear_tint_int() and simplify the code by removing redundant priv and hw_irq local variables. Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Stable-dep-of: 853a6030303f ("irqchip/renesas-rzg2l: Prevent spurious interrupts when setting trigger type") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index a74391615ab38..ced5064eb0e9d 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -90,11 +90,9 @@ static void rzg2l_irq_eoi(struct irq_data *d) } } -static void rzg2l_tint_eoi(struct irq_data *d) +static void rzg2l_clear_tint_int(struct rzg2l_irqc_priv *priv, unsigned int hwirq) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_TINT_START; - struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); - u32 bit = BIT(hw_irq); + u32 bit = BIT(hwirq - IRQC_TINT_START); u32 reg; reg = readl_relaxed(priv->base + TSCR); @@ -117,7 +115,7 @@ static void rzg2l_irqc_eoi(struct irq_data *d) if (hw_irq >= IRQC_IRQ_START && hw_irq <= IRQC_IRQ_COUNT) rzg2l_irq_eoi(d); else if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) - rzg2l_tint_eoi(d); + rzg2l_clear_tint_int(priv, hw_irq); raw_spin_unlock(&priv->lock); irq_chip_eoi_parent(d); } -- GitLab From 573777eb56071631b5310348898ad3a1f486e8e0 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 5 Mar 2024 18:39:20 +0000 Subject: [PATCH 1373/2290] irqchip/renesas-rzg2l: Rename rzg2l_irq_eoi() [ Upstream commit b4b5cd61a6fdd92ede0dc39f0850a182affd1323 ] Rename rzg2l_irq_eoi()->rzg2l_clear_irq_int() and simplify the code by removing redundant priv local variable. Suggested-by: Geert Uytterhoeven Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Stable-dep-of: 853a6030303f ("irqchip/renesas-rzg2l: Prevent spurious interrupts when setting trigger type") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index ced5064eb0e9d..aa27335ab2eec 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -66,10 +66,9 @@ static struct rzg2l_irqc_priv *irq_data_to_priv(struct irq_data *data) return data->domain->host_data; } -static void rzg2l_irq_eoi(struct irq_data *d) +static void rzg2l_clear_irq_int(struct rzg2l_irqc_priv *priv, unsigned int hwirq) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START; - struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); + unsigned int hw_irq = hwirq - IRQC_IRQ_START; u32 bit = BIT(hw_irq); u32 iitsr, iscr; @@ -113,7 +112,7 @@ static void rzg2l_irqc_eoi(struct irq_data *d) raw_spin_lock(&priv->lock); if (hw_irq >= IRQC_IRQ_START && hw_irq <= IRQC_IRQ_COUNT) - rzg2l_irq_eoi(d); + rzg2l_clear_irq_int(priv, hw_irq); else if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) rzg2l_clear_tint_int(priv, hw_irq); raw_spin_unlock(&priv->lock); -- GitLab From dc33e92af306ab8407aab843c85d30384b37054b Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 5 Mar 2024 18:39:21 +0000 Subject: [PATCH 1374/2290] irqchip/renesas-rzg2l: Prevent spurious interrupts when setting trigger type [ Upstream commit 853a6030303f8a8fa54929b68e5665d9b21aa405 ] RZ/G2L interrupt chips require that the interrupt is masked before changing the NMI, IRQ, TINT interrupt settings. Aside of that, after setting an edge trigger type it is required to clear the interrupt status register in order to avoid spurious interrupts. The current implementation fails to do either of that and therefore is prone to generate spurious interrupts when setting the trigger type. Address this by: - Ensuring that the interrupt is masked at the chip level across the update for the TINT chip - Clearing the interrupt status register after updating the trigger mode for edge type interrupts [ tglx: Massaged changelog and reverted the spin_lock_irqsave() change as the set_type() callback is always called with interrupts disabled. ] Fixes: 3fed09559cd8 ("irqchip: Add RZ/G2L IA55 Interrupt Controller driver") Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 36 +++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index aa27335ab2eec..be71459c7465a 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -162,8 +162,10 @@ static void rzg2l_irqc_irq_enable(struct irq_data *d) static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) { - unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START; struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); + unsigned int hwirq = irqd_to_hwirq(d); + u32 iitseln = hwirq - IRQC_IRQ_START; + bool clear_irq_int = false; u16 sense, tmp; switch (type & IRQ_TYPE_SENSE_MASK) { @@ -173,14 +175,17 @@ static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) case IRQ_TYPE_EDGE_FALLING: sense = IITSR_IITSEL_EDGE_FALLING; + clear_irq_int = true; break; case IRQ_TYPE_EDGE_RISING: sense = IITSR_IITSEL_EDGE_RISING; + clear_irq_int = true; break; case IRQ_TYPE_EDGE_BOTH: sense = IITSR_IITSEL_EDGE_BOTH; + clear_irq_int = true; break; default: @@ -189,21 +194,40 @@ static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type) raw_spin_lock(&priv->lock); tmp = readl_relaxed(priv->base + IITSR); - tmp &= ~IITSR_IITSEL_MASK(hw_irq); - tmp |= IITSR_IITSEL(hw_irq, sense); + tmp &= ~IITSR_IITSEL_MASK(iitseln); + tmp |= IITSR_IITSEL(iitseln, sense); + if (clear_irq_int) + rzg2l_clear_irq_int(priv, hwirq); writel_relaxed(tmp, priv->base + IITSR); raw_spin_unlock(&priv->lock); return 0; } +static u32 rzg2l_disable_tint_and_set_tint_source(struct irq_data *d, struct rzg2l_irqc_priv *priv, + u32 reg, u32 tssr_offset, u8 tssr_index) +{ + u32 tint = (u32)(uintptr_t)irq_data_get_irq_chip_data(d); + u32 tien = reg & (TIEN << TSSEL_SHIFT(tssr_offset)); + + /* Clear the relevant byte in reg */ + reg &= ~(TSSEL_MASK << TSSEL_SHIFT(tssr_offset)); + /* Set TINT and leave TIEN clear */ + reg |= tint << TSSEL_SHIFT(tssr_offset); + writel_relaxed(reg, priv->base + TSSR(tssr_index)); + + return reg | tien; +} + static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) { struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); unsigned int hwirq = irqd_to_hwirq(d); u32 titseln = hwirq - IRQC_TINT_START; + u32 tssr_offset = TSSR_OFFSET(titseln); + u8 tssr_index = TSSR_INDEX(titseln); u8 index, sense; - u32 reg; + u32 reg, tssr; switch (type & IRQ_TYPE_SENSE_MASK) { case IRQ_TYPE_EDGE_RISING: @@ -225,10 +249,14 @@ static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type) } raw_spin_lock(&priv->lock); + tssr = readl_relaxed(priv->base + TSSR(tssr_index)); + tssr = rzg2l_disable_tint_and_set_tint_source(d, priv, tssr, tssr_offset, tssr_index); reg = readl_relaxed(priv->base + TITSR(index)); reg &= ~(IRQ_MASK << (titseln * TITSEL_WIDTH)); reg |= sense << (titseln * TITSEL_WIDTH); writel_relaxed(reg, priv->base + TITSR(index)); + rzg2l_clear_tint_int(priv, hwirq); + writel_relaxed(tssr, priv->base + TSSR(tssr_index)); raw_spin_unlock(&priv->lock); return 0; -- GitLab From 6417684315087904fffe8966d27ca74398c57dd6 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 15 Mar 2024 00:17:30 +0900 Subject: [PATCH 1375/2290] kprobes/x86: Use copy_from_kernel_nofault() to read from unsafe address [ Upstream commit 4e51653d5d871f40f1bd5cf95cc7f2d8b33d063b ] Read from an unsafe address with copy_from_kernel_nofault() in arch_adjust_kprobe_addr() because this function is used before checking the address is in text or not. Syzcaller bot found a bug and reported the case if user specifies inaccessible data area, arch_adjust_kprobe_addr() will cause a kernel panic. [ mingo: Clarified the comment. ] Fixes: cc66bb914578 ("x86/ibt,kprobes: Cure sym+0 equals fentry woes") Reported-by: Qiang Zhang Tested-by: Jinghao Jia Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/171042945004.154897.2221804961882915806.stgit@devnote2 Signed-off-by: Sasha Levin --- arch/x86/kernel/kprobes/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6120f25b0d5cc..991f00c817e6c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -301,7 +301,16 @@ static int can_probe(unsigned long paddr) kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - if (is_endbr(*(u32 *)addr)) { + u32 insn; + + /* + * Since 'addr' is not guaranteed to be safe to access, use + * copy_from_kernel_nofault() to read the instruction: + */ + if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) + return NULL; + + if (is_endbr(insn)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; -- GitLab From 493ed133b4e444f4cc31f34e44264fc2a042a595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?KONDO=20KAZUMA=28=E8=BF=91=E8=97=A4=E3=80=80=E5=92=8C?= =?UTF-8?q?=E7=9C=9F=29?= Date: Fri, 22 Mar 2024 10:47:02 +0000 Subject: [PATCH 1376/2290] efi/libstub: fix efi_random_alloc() to allocate memory at alloc_min or higher address [ Upstream commit 3cb4a4827596abc82e55b80364f509d0fefc3051 ] Following warning is sometimes observed while booting my servers: [ 3.594838] DMA: preallocated 4096 KiB GFP_KERNEL pool for atomic allocations [ 3.602918] swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0-1 ... [ 3.851862] DMA: preallocated 1024 KiB GFP_KERNEL|GFP_DMA pool for atomic allocation If 'nokaslr' boot option is set, the warning always happens. On x86, ZONE_DMA is small zone at the first 16MB of physical address space. When this problem happens, most of that space seems to be used by decompressed kernel. Thereby, there is not enough space at DMA_ZONE to meet the request of DMA pool allocation. The commit 2f77465b05b1 ("x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR") tried to fix this problem by introducing lower bound of allocation. But the fix is not complete. efi_random_alloc() allocates pages by following steps. 1. Count total available slots ('total_slots') 2. Select a slot ('target_slot') to allocate randomly 3. Calculate a starting address ('target') to be included target_slot 4. Allocate pages, which starting address is 'target' In step 1, 'alloc_min' is used to offset the starting address of memory chunk. But in step 3 'alloc_min' is not considered at all. As the result, 'target' can be miscalculated and become lower than 'alloc_min'. When KASLR is disabled, 'target_slot' is always 0 and the problem happens everytime if the EFI memory map of the system meets the condition. Fix this problem by calculating 'target' considering 'alloc_min'. Cc: linux-efi@vger.kernel.org Cc: Tom Englund Cc: linux-kernel@vger.kernel.org Fixes: 2f77465b05b1 ("x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR") Signed-off-by: Kazuma Kondo Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/randomalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 7ba05719a53ba..0d7b11b55ff31 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -119,7 +119,7 @@ efi_status_t efi_random_alloc(unsigned long size, continue; } - target = round_up(md->phys_addr, align) + target_slot * align; + target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align; pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, -- GitLab From 21c7c00dae55cb0e3810d5f9506b58f68475d41d Mon Sep 17 00:00:00 2001 From: Adamos Ttofari Date: Fri, 22 Mar 2024 16:04:39 -0700 Subject: [PATCH 1377/2290] x86/fpu: Keep xfd_state in sync with MSR_IA32_XFD [ Upstream commit 10e4b5166df9ff7a2d5316138ca668b42d004422 ] Commit 672365477ae8 ("x86/fpu: Update XFD state where required") and commit 8bf26758ca96 ("x86/fpu: Add XFD state to fpstate") introduced a per CPU variable xfd_state to keep the MSR_IA32_XFD value cached, in order to avoid unnecessary writes to the MSR. On CPU hotplug MSR_IA32_XFD is reset to the init_fpstate.xfd, which wipes out any stale state. But the per CPU cached xfd value is not reset, which brings them out of sync. As a consequence a subsequent xfd_update_state() might fail to update the MSR which in turn can result in XRSTOR raising a #NM in kernel space, which crashes the kernel. To fix this, introduce xfd_set_state() to write xfd_state together with MSR_IA32_XFD, and use it in all places that set MSR_IA32_XFD. Fixes: 672365477ae8 ("x86/fpu: Update XFD state where required") Signed-off-by: Adamos Ttofari Signed-off-by: Chang S. Bae Signed-off-by: Ingo Molnar Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240322230439.456571-1-chang.seok.bae@intel.com Closes: https://lore.kernel.org/lkml/20230511152818.13839-1-attofari@amazon.de Signed-off-by: Sasha Levin --- arch/x86/kernel/fpu/xstate.c | 5 +++-- arch/x86/kernel/fpu/xstate.h | 14 ++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index ebe698f8af73b..2aa849705bb68 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -177,10 +177,11 @@ void fpu__init_cpu_xstate(void) * Must happen after CR4 setup and before xsetbv() to allow KVM * lazy passthrough. Write independent of the dynamic state static * key as that does not work on the boot CPU. This also ensures - * that any stale state is wiped out from XFD. + * that any stale state is wiped out from XFD. Reset the per CPU + * xfd cache too. */ if (cpu_feature_enabled(X86_FEATURE_XFD)) - wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); + xfd_set_state(init_fpstate.xfd); /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 3518fb26d06b0..19ca623ffa2ac 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs #endif #ifdef CONFIG_X86_64 +static inline void xfd_set_state(u64 xfd) +{ + wrmsrl(MSR_IA32_XFD, xfd); + __this_cpu_write(xfd_state, xfd); +} + static inline void xfd_update_state(struct fpstate *fpstate) { if (fpu_state_size_dynamic()) { u64 xfd = fpstate->xfd; - if (__this_cpu_read(xfd_state) != xfd) { - wrmsrl(MSR_IA32_XFD, xfd); - __this_cpu_write(xfd_state, xfd); - } + if (__this_cpu_read(xfd_state) != xfd) + xfd_set_state(xfd); } } extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu); #else +static inline void xfd_set_state(u64 xfd) { } + static inline void xfd_update_state(struct fpstate *fpstate) { } static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) { -- GitLab From b9d103aca85f082a343b222493f3cab1219aaaf4 Mon Sep 17 00:00:00 2001 From: Oleksandr Tymoshenko Date: Sat, 23 Mar 2024 06:33:33 +0000 Subject: [PATCH 1378/2290] efi: fix panic in kdump kernel [ Upstream commit 62b71cd73d41ddac6b1760402bbe8c4932e23531 ] Check if get_next_variable() is actually valid pointer before calling it. In kdump kernel this method is set to NULL that causes panic during the kexec-ed kernel boot. Tested with QEMU and OVMF firmware. Fixes: bad267f9e18f ("efi: verify that variable services are supported") Signed-off-by: Oleksandr Tymoshenko Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/efi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 9077353d1c98d..28d4defc5d0cd 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -194,6 +194,8 @@ static bool generic_ops_supported(void) name_size = sizeof(name); + if (!efi.get_next_variable) + return false; status = efi.get_next_variable(&name_size, &name, &guid); if (status == EFI_UNSUPPORTED) return false; -- GitLab From 44b6fb6cdedb2c391a2da355521d4610b2645fcc Mon Sep 17 00:00:00 2001 From: Zoltan HERPAI Date: Wed, 20 Mar 2024 09:36:02 +0100 Subject: [PATCH 1379/2290] pwm: img: fix pwm clock lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9eb05877dbee03064d3d3483cd6702f610d5a358 ] 22e8e19 has introduced a regression in the imgchip->pwm_clk lookup, whereas the clock name has also been renamed to "imgchip". This causes the driver failing to load: [ 0.546905] img-pwm 18101300.pwm: failed to get imgchip clock [ 0.553418] img-pwm: probe of 18101300.pwm failed with error -2 Fix this lookup by reverting the clock name back to "pwm". Signed-off-by: Zoltan HERPAI Link: https://lore.kernel.org/r/20240320083602.81592-1-wigyori@uid0.hu Fixes: 22e8e19a46f7 ("pwm: img: Rename variable pointing to driver private data") Signed-off-by: Uwe Kleine-König Signed-off-by: Sasha Levin --- drivers/pwm/pwm-img.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 0fccf061ab958..8ce6c453adf07 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -289,9 +289,9 @@ static int img_pwm_probe(struct platform_device *pdev) return PTR_ERR(imgchip->sys_clk); } - imgchip->pwm_clk = devm_clk_get(&pdev->dev, "imgchip"); + imgchip->pwm_clk = devm_clk_get(&pdev->dev, "pwm"); if (IS_ERR(imgchip->pwm_clk)) { - dev_err(&pdev->dev, "failed to get imgchip clock\n"); + dev_err(&pdev->dev, "failed to get pwm clock\n"); return PTR_ERR(imgchip->pwm_clk); } -- GitLab From 4f2debd9c6801065196960d2967de4668ed025a8 Mon Sep 17 00:00:00 2001 From: Rickard x Andersson Date: Wed, 21 Feb 2024 12:53:04 +0100 Subject: [PATCH 1380/2290] tty: serial: imx: Fix broken RS485 commit 672448ccf9b6a676f96f9352cbf91f4d35f4084a upstream. When about to transmit the function imx_uart_start_tx is called and in some RS485 configurations this function will call imx_uart_stop_rx. The problem is that imx_uart_stop_rx will enable loopback in order to release the RS485 bus, but when loopback is enabled transmitted data will just be looped to RX. This patch fixes the above problem by not enabling loopback when about to transmit. This driver now works well when used for RS485 half duplex master configurations. Fixes: 79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high") Cc: stable Signed-off-by: Rickard x Andersson Tested-by: Christoph Niedermaier Link: https://lore.kernel.org/r/20240221115304.509811-1-rickaran@axis.com Signed-off-by: Christoph Niedermaier Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index f8962a3d44216..573bf7e9b7978 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -496,8 +496,7 @@ static void imx_uart_stop_tx(struct uart_port *port) } } -/* called with port.lock taken and irqs off */ -static void imx_uart_stop_rx(struct uart_port *port) +static void imx_uart_stop_rx_with_loopback_ctrl(struct uart_port *port, bool loopback) { struct imx_port *sport = (struct imx_port *)port; u32 ucr1, ucr2, ucr4, uts; @@ -519,7 +518,7 @@ static void imx_uart_stop_rx(struct uart_port *port) /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ if (port->rs485.flags & SER_RS485_ENABLED && port->rs485.flags & SER_RS485_RTS_ON_SEND && - sport->have_rtscts && !sport->have_rtsgpio) { + sport->have_rtscts && !sport->have_rtsgpio && loopback) { uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); uts |= UTS_LOOP; imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); @@ -531,6 +530,16 @@ static void imx_uart_stop_rx(struct uart_port *port) imx_uart_writel(sport, ucr2, UCR2); } +/* called with port.lock taken and irqs off */ +static void imx_uart_stop_rx(struct uart_port *port) +{ + /* + * Stop RX and enable loopback in order to make sure RS485 bus + * is not blocked. Se comment in imx_uart_probe(). + */ + imx_uart_stop_rx_with_loopback_ctrl(port, true); +} + /* called with port.lock taken and irqs off */ static void imx_uart_enable_ms(struct uart_port *port) { @@ -719,8 +728,13 @@ static void imx_uart_start_tx(struct uart_port *port) imx_uart_rts_inactive(sport, &ucr2); imx_uart_writel(sport, ucr2, UCR2); + /* + * Since we are about to transmit we can not stop RX + * with loopback enabled because that will make our + * transmitted data being just looped to RX. + */ if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - imx_uart_stop_rx(port); + imx_uart_stop_rx_with_loopback_ctrl(port, false); sport->tx_state = WAIT_AFTER_RTS; -- GitLab From 242006996d15f5ca62e22f8c7de077d9c4a8f367 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 29 Feb 2024 13:08:09 -0500 Subject: [PATCH 1381/2290] block: Fix page refcounts for unaligned buffers in __bio_release_pages() commit 38b43539d64b2fa020b3b9a752a986769f87f7a6 upstream. Fix an incorrect number of pages being released for buffers that do not start at the beginning of a page. Fixes: 1b151e2435fc ("block: Remove special-casing of compound pages") Cc: stable@vger.kernel.org Signed-off-by: Tony Battersby Tested-by: Greg Edwards Link: https://lore.kernel.org/r/86e592a9-98d4-4cff-a646-0c0084328356@cybernetics.com Signed-off-by: Jens Axboe [ Tony: backport to v6.1 by replacing bio_release_page() loop with folio_put_refs() as commits fd363244e883 and e4cc64657bec are not present. ] Signed-off-by: Greg Kroah-Hartman --- block/bio.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/block/bio.c b/block/bio.c index 74c2818c7ec99..3318e0022fdfd 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1112,19 +1112,16 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) struct folio_iter fi; bio_for_each_folio_all(fi, bio) { - struct page *page; - size_t done = 0; + size_t nr_pages; if (mark_dirty) { folio_lock(fi.folio); folio_mark_dirty(fi.folio); folio_unlock(fi.folio); } - page = folio_page(fi.folio, fi.offset / PAGE_SIZE); - do { - folio_put(fi.folio); - done += PAGE_SIZE; - } while (done < fi.length); + nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE - + fi.offset / PAGE_SIZE + 1; + folio_put_refs(fi.folio, nr_pages); } } EXPORT_SYMBOL_GPL(__bio_release_pages); -- GitLab From 052e4c8987ee9bef8994cf9c9166e2c6759e0752 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sun, 13 Aug 2023 23:23:25 +0800 Subject: [PATCH 1382/2290] blk-mq: release scheduler resource when request completes commit e5c0ca13659e9d18f53368d651ed7e6e433ec1cf upstream. Chuck reported [1] an IO hang problem on NFS exports that reside on SATA devices and bisected to commit 615939a2ae73 ("blk-mq: defer to the normal submission path for post-flush requests"). We analysed the IO hang problem, found there are two postflush requests waiting for each other. The first postflush request completed the REQ_FSEQ_DATA sequence, so go to the REQ_FSEQ_POSTFLUSH sequence and added in the flush pending list, but failed to blk_kick_flush() because of the second postflush request which is inflight waiting in scheduler queue. The second postflush waiting in scheduler queue can't be dispatched because the first postflush hasn't released scheduler resource even though it has completed by itself. Fix it by releasing scheduler resource when the first postflush request completed, so the second postflush can be dispatched and completed, then make blk_kick_flush() succeed. While at it, remove the check for e->ops.finish_request, as all schedulers set that. Reaffirm this requirement by adding a WARN_ON_ONCE() at scheduler registration time, just like we do for insert_requests and dispatch_request. [1] https://lore.kernel.org/all/7A57C7AE-A51A-4254-888B-FE15CA21F9E9@oracle.com/ Link: https://lore.kernel.org/linux-block/20230819031206.2744005-1-chengming.zhou@linux.dev/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202308172100.8ce4b853-oliver.sang@intel.com Fixes: 615939a2ae73 ("blk-mq: defer to the normal submission path for post-flush requests") Reported-by: Chuck Lever Signed-off-by: Chengming Zhou Tested-by: Chuck Lever Link: https://lore.kernel.org/r/20230813152325.3017343-1-chengming.zhou@linux.dev [axboe: folded in incremental fix and added tags] Signed-off-by: Jens Axboe [bvanassche: changed RQF_USE_SCHED into RQF_ELVPRIV; restored the finish_request pointer check before calling finish_request and removed the new warning from the elevator code. This patch fixes an I/O hang when submitting a REQ_FUA request to a request queue for a zoned block device for which FUA has been disabled (QUEUE_FLAG_FUA is not set).] Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 7ed6b9469f979..07610505c1776 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -675,6 +675,22 @@ out_queue_exit: } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); +static void blk_mq_finish_request(struct request *rq) +{ + struct request_queue *q = rq->q; + + if ((rq->rq_flags & RQF_ELVPRIV) && + q->elevator->type->ops.finish_request) { + q->elevator->type->ops.finish_request(rq); + /* + * For postflush request that may need to be + * completed twice, we should clear this flag + * to avoid double finish_request() on the rq. + */ + rq->rq_flags &= ~RQF_ELVPRIV; + } +} + static void __blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; @@ -701,9 +717,7 @@ void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; - if ((rq->rq_flags & RQF_ELVPRIV) && - q->elevator->type->ops.finish_request) - q->elevator->type->ops.finish_request(rq); + blk_mq_finish_request(rq); if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->disk->bdi); @@ -1025,6 +1039,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) if (blk_mq_need_time_stamp(rq)) __blk_mq_end_request_acct(rq, ktime_get_ns()); + blk_mq_finish_request(rq); + if (rq->end_io) { rq_qos_done(rq->q, rq); if (rq->end_io(rq, error) == RQ_END_IO_FREE) @@ -1079,6 +1095,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) if (iob->need_ts) __blk_mq_end_request_acct(rq, now); + blk_mq_finish_request(rq); + rq_qos_done(rq->q, rq); /* -- GitLab From 6d87a9be22151997984a96e32b769b01de158c6f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 18:11:22 +0100 Subject: [PATCH 1383/2290] selftests: mptcp: diag: return KSFT_FAIL not test_cnt commit 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 upstream. The test counter 'test_cnt' should not be returned in diag.sh, e.g. what if only the 4th test fail? Will do 'exit 4' which is 'exit ${KSFT_SKIP}', the whole test will be marked as skipped instead of 'failed'! So we should do ret=${KSFT_FAIL} instead. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Cc: stable@vger.kernel.org Fixes: 42fb6cddec3b ("selftests: mptcp: more stable diag tests") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/diag.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 400cf1ce96e31..3df4a8103c76f 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -56,7 +56,7 @@ __chk_nr() echo "[ skip ] Feature probably not supported" else echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + ret=${KSFT_FAIL} fi else echo "[ ok ]" @@ -100,10 +100,10 @@ wait_msk_nr() printf "%-50s" "$msg" if [ $i -ge $timeout ]; then echo "[ fail ] timeout while expecting $expected max $max last $nr" - ret=$test_cnt + ret=${KSFT_FAIL} elif [ $nr != $expected ]; then echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + ret=${KSFT_FAIL} else echo "[ ok ]" fi -- GitLab From 139dfcc4d723ab13469881200c7d80f49d776060 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:50 -0600 Subject: [PATCH 1384/2290] vfio/pci: Disable auto-enable of exclusive INTx IRQ [ Upstream commit fe9a7082684eb059b925c535682e68c34d487d43 ] Currently for devices requiring masking at the irqchip for INTx, ie. devices without DisINTx support, the IRQ is enabled in request_irq() and subsequently disabled as necessary to align with the masked status flag. This presents a window where the interrupt could fire between these events, resulting in the IRQ incrementing the disable depth twice. This would be unrecoverable for a user since the masked flag prevents nested enables through vfio. Instead, invert the logic using IRQF_NO_AUTOEN such that exclusive INTx is never auto-enabled, then unmask as required. Cc: Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-2-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci_intrs.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 8c8b04d858454..4f0699215f12b 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -251,8 +251,15 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) vdev->ctx[0].trigger = trigger; + /* + * Devices without DisINTx support require an exclusive interrupt, + * IRQ masking is performed at the IRQ chip. The masked status is + * protected by vdev->irqlock. Setup the IRQ without auto-enable and + * unmask as necessary below under lock. DisINTx is unmodified by + * the IRQ configuration and may therefore use auto-enable. + */ if (!vdev->pci_2_3) - irqflags = 0; + irqflags = IRQF_NO_AUTOEN; ret = request_irq(pdev->irq, vfio_intx_handler, irqflags, vdev->ctx[0].name, vdev); @@ -263,13 +270,9 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) return ret; } - /* - * INTx disable will stick across the new irq setup, - * disable_irq won't. - */ spin_lock_irqsave(&vdev->irqlock, flags); - if (!vdev->pci_2_3 && vdev->ctx[0].masked) - disable_irq_nosync(pdev->irq); + if (!vdev->pci_2_3 && !vdev->ctx[0].masked) + enable_irq(pdev->irq); spin_unlock_irqrestore(&vdev->irqlock, flags); return 0; -- GitLab From 33dc33f560017ffdf7a3db03b963f407082aab16 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:51 -0600 Subject: [PATCH 1385/2290] vfio: Introduce interface to flush virqfd inject workqueue [ Upstream commit b620ecbd17a03cacd06f014a5d3f3a11285ce053 ] In order to synchronize changes that can affect the thread callback, introduce an interface to force a flush of the inject workqueue. The irqfd pointer is only valid under spinlock, but the workqueue cannot be flushed under spinlock. Therefore the flush work for the irqfd is queued under spinlock. The vfio_irqfd_cleanup_wq workqueue is re-used for queuing this work such that flushing the workqueue is also ordered relative to shutdown. Reviewed-by: Kevin Tian Reviewed-by: Reinette Chatre Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-4-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/virqfd.c | 21 +++++++++++++++++++++ include/linux/vfio.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index a928c68df4763..e06b32ddedced 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -104,6 +104,13 @@ static void virqfd_inject(struct work_struct *work) virqfd->thread(virqfd->opaque, virqfd->data); } +static void virqfd_flush_inject(struct work_struct *work) +{ + struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject); + + flush_work(&virqfd->inject); +} + int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), @@ -127,6 +134,7 @@ int vfio_virqfd_enable(void *opaque, INIT_WORK(&virqfd->shutdown, virqfd_shutdown); INIT_WORK(&virqfd->inject, virqfd_inject); + INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject); irqfd = fdget(fd); if (!irqfd.file) { @@ -217,6 +225,19 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd) } EXPORT_SYMBOL_GPL(vfio_virqfd_disable); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd) +{ + unsigned long flags; + + spin_lock_irqsave(&virqfd_lock, flags); + if (*pvirqfd && (*pvirqfd)->thread) + queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject); + spin_unlock_irqrestore(&virqfd_lock, flags); + + flush_workqueue(vfio_irqfd_cleanup_wq); +} +EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread); + module_init(vfio_virqfd_init); module_exit(vfio_virqfd_exit); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index fdd393f70b198..5e7bf143cb223 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -268,6 +268,7 @@ struct virqfd { wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; + struct work_struct flush_inject; struct virqfd **pvirqfd; }; @@ -275,5 +276,6 @@ int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), void *data, struct virqfd **pvirqfd, int fd); void vfio_virqfd_disable(struct virqfd **pvirqfd); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); #endif /* VFIO_H */ -- GitLab From 7d29d4c72c1e196cce6969c98072a272d1a703b3 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:52 -0600 Subject: [PATCH 1386/2290] vfio/pci: Create persistent INTx handler [ Upstream commit 18c198c96a815c962adc2b9b77909eec0be7df4d ] A vulnerability exists where the eventfd for INTx signaling can be deconfigured, which unregisters the IRQ handler but still allows eventfds to be signaled with a NULL context through the SET_IRQS ioctl or through unmask irqfd if the device interrupt is pending. Ideally this could be solved with some additional locking; the igate mutex serializes the ioctl and config space accesses, and the interrupt handler is unregistered relative to the trigger, but the irqfd path runs asynchronous to those. The igate mutex cannot be acquired from the atomic context of the eventfd wake function. Disabling the irqfd relative to the eventfd registration is potentially incompatible with existing userspace. As a result, the solution implemented here moves configuration of the INTx interrupt handler to track the lifetime of the INTx context object and irq_type configuration, rather than registration of a particular trigger eventfd. Synchronization is added between the ioctl path and eventfd_signal() wrapper such that the eventfd trigger can be dynamically updated relative to in-flight interrupts or irqfd callbacks. Cc: Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Reported-by: Reinette Chatre Reviewed-by: Kevin Tian Reviewed-by: Reinette Chatre Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci_intrs.c | 149 ++++++++++++++++-------------- 1 file changed, 82 insertions(+), 67 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 4f0699215f12b..03246a59b5536 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -55,8 +55,13 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) { struct vfio_pci_core_device *vdev = opaque; - if (likely(is_intx(vdev) && !vdev->virq_disabled)) - eventfd_signal(vdev->ctx[0].trigger, 1); + if (likely(is_intx(vdev) && !vdev->virq_disabled)) { + struct eventfd_ctx *trigger; + + trigger = READ_ONCE(vdev->ctx[0].trigger); + if (likely(trigger)) + eventfd_signal(trigger, 1); + } } /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ @@ -191,98 +196,104 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) return ret; } -static int vfio_intx_enable(struct vfio_pci_core_device *vdev) +static int vfio_intx_enable(struct vfio_pci_core_device *vdev, + struct eventfd_ctx *trigger) { + struct pci_dev *pdev = vdev->pdev; + unsigned long irqflags; + char *name; + int ret; + if (!is_irq_none(vdev)) return -EINVAL; - if (!vdev->pdev->irq) + if (!pdev->irq) return -ENODEV; + name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); + if (!name) + return -ENOMEM; + vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; vdev->num_ctx = 1; + vdev->ctx[0].name = name; + vdev->ctx[0].trigger = trigger; + /* - * If the virtual interrupt is masked, restore it. Devices - * supporting DisINTx can be masked at the hardware level - * here, non-PCI-2.3 devices will have to wait until the - * interrupt is enabled. + * Fill the initial masked state based on virq_disabled. After + * enable, changing the DisINTx bit in vconfig directly changes INTx + * masking. igate prevents races during setup, once running masked + * is protected via irqlock. + * + * Devices supporting DisINTx also reflect the current mask state in + * the physical DisINTx bit, which is not affected during IRQ setup. + * + * Devices without DisINTx support require an exclusive interrupt. + * IRQ masking is performed at the IRQ chip. Again, igate protects + * against races during setup and IRQ handlers and irqfds are not + * yet active, therefore masked is stable and can be used to + * conditionally auto-enable the IRQ. + * + * irq_type must be stable while the IRQ handler is registered, + * therefore it must be set before request_irq(). */ vdev->ctx[0].masked = vdev->virq_disabled; - if (vdev->pci_2_3) - pci_intx(vdev->pdev, !vdev->ctx[0].masked); + if (vdev->pci_2_3) { + pci_intx(pdev, !vdev->ctx[0].masked); + irqflags = IRQF_SHARED; + } else { + irqflags = vdev->ctx[0].masked ? IRQF_NO_AUTOEN : 0; + } vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; + ret = request_irq(pdev->irq, vfio_intx_handler, + irqflags, vdev->ctx[0].name, vdev); + if (ret) { + vdev->irq_type = VFIO_PCI_NUM_IRQS; + kfree(name); + vdev->num_ctx = 0; + kfree(vdev->ctx); + return ret; + } + return 0; } -static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) +static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, + struct eventfd_ctx *trigger) { struct pci_dev *pdev = vdev->pdev; - unsigned long irqflags = IRQF_SHARED; - struct eventfd_ctx *trigger; - unsigned long flags; - int ret; + struct eventfd_ctx *old; - if (vdev->ctx[0].trigger) { - free_irq(pdev->irq, vdev); - kfree(vdev->ctx[0].name); - eventfd_ctx_put(vdev->ctx[0].trigger); - vdev->ctx[0].trigger = NULL; - } - - if (fd < 0) /* Disable only */ - return 0; + old = vdev->ctx[0].trigger; - vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", - pci_name(pdev)); - if (!vdev->ctx[0].name) - return -ENOMEM; + WRITE_ONCE(vdev->ctx[0].trigger, trigger); - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { - kfree(vdev->ctx[0].name); - return PTR_ERR(trigger); - } - - vdev->ctx[0].trigger = trigger; - - /* - * Devices without DisINTx support require an exclusive interrupt, - * IRQ masking is performed at the IRQ chip. The masked status is - * protected by vdev->irqlock. Setup the IRQ without auto-enable and - * unmask as necessary below under lock. DisINTx is unmodified by - * the IRQ configuration and may therefore use auto-enable. - */ - if (!vdev->pci_2_3) - irqflags = IRQF_NO_AUTOEN; - - ret = request_irq(pdev->irq, vfio_intx_handler, - irqflags, vdev->ctx[0].name, vdev); - if (ret) { - vdev->ctx[0].trigger = NULL; - kfree(vdev->ctx[0].name); - eventfd_ctx_put(trigger); - return ret; + /* Releasing an old ctx requires synchronizing in-flight users */ + if (old) { + synchronize_irq(pdev->irq); + vfio_virqfd_flush_thread(&vdev->ctx[0].unmask); + eventfd_ctx_put(old); } - spin_lock_irqsave(&vdev->irqlock, flags); - if (!vdev->pci_2_3 && !vdev->ctx[0].masked) - enable_irq(pdev->irq); - spin_unlock_irqrestore(&vdev->irqlock, flags); - return 0; } static void vfio_intx_disable(struct vfio_pci_core_device *vdev) { + struct pci_dev *pdev = vdev->pdev; + vfio_virqfd_disable(&vdev->ctx[0].unmask); vfio_virqfd_disable(&vdev->ctx[0].mask); - vfio_intx_set_signal(vdev, -1); + free_irq(pdev->irq, vdev); + if (vdev->ctx[0].trigger) + eventfd_ctx_put(vdev->ctx[0].trigger); + kfree(vdev->ctx[0].name); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -534,19 +545,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + struct eventfd_ctx *trigger = NULL; int32_t fd = *(int32_t *)data; int ret; - if (is_intx(vdev)) - return vfio_intx_set_signal(vdev, fd); + if (fd >= 0) { + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) + return PTR_ERR(trigger); + } - ret = vfio_intx_enable(vdev); - if (ret) - return ret; + if (is_intx(vdev)) + ret = vfio_intx_set_signal(vdev, trigger); + else + ret = vfio_intx_enable(vdev, trigger); - ret = vfio_intx_set_signal(vdev, fd); - if (ret) - vfio_intx_disable(vdev); + if (ret && trigger) + eventfd_ctx_put(trigger); return ret; } -- GitLab From 7932db06c82c5b2f42a4d1a849d97dba9ce4a362 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:53 -0600 Subject: [PATCH 1387/2290] vfio/platform: Create persistent IRQ handlers [ Upstream commit 675daf435e9f8e5a5eab140a9864dfad6668b375 ] The vfio-platform SET_IRQS ioctl currently allows loopback triggering of an interrupt before a signaling eventfd has been configured by the user, which thereby allows a NULL pointer dereference. Rather than register the IRQ relative to a valid trigger, register all IRQs in a disabled state in the device open path. This allows mask operations on the IRQ to nest within the overall enable state governed by a valid eventfd signal. This decouples @masked, protected by the @locked spinlock from @trigger, protected via the @igate mutex. In doing so, it's guaranteed that changes to @trigger cannot race the IRQ handlers because the IRQ handler is synchronously disabled before modifying the trigger, and loopback triggering of the IRQ via ioctl is safe due to serialization with trigger changes via igate. For compatibility, request_irq() failures are maintained to be local to the SET_IRQS ioctl rather than a fatal error in the open device path. This allows, for example, a userspace driver with polling mode support to continue to work regardless of moving the request_irq() call site. This necessarily blocks all SET_IRQS access to the failed index. Cc: Eric Auger Cc: Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd") Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-7-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/platform/vfio_platform_irq.c | 101 +++++++++++++++------- 1 file changed, 68 insertions(+), 33 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index f2893f2fcaabd..7f4341a8d7185 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, return 0; } +/* + * The trigger eventfd is guaranteed valid in the interrupt path + * and protected by the igate mutex when triggered via ioctl. + */ +static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx) +{ + if (likely(irq_ctx->trigger)) + eventfd_signal(irq_ctx->trigger, 1); +} + static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; @@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) spin_unlock_irqrestore(&irq_ctx->lock, flags); if (ret == IRQ_HANDLED) - eventfd_signal(irq_ctx->trigger, 1); + vfio_send_eventfd(irq_ctx); return ret; } @@ -164,22 +174,19 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; - eventfd_signal(irq_ctx->trigger, 1); + vfio_send_eventfd(irq_ctx); return IRQ_HANDLED; } static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, - int fd, irq_handler_t handler) + int fd) { struct vfio_platform_irq *irq = &vdev->irqs[index]; struct eventfd_ctx *trigger; - int ret; if (irq->trigger) { - irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); - free_irq(irq->hwirq, irq); - kfree(irq->name); + disable_irq(irq->hwirq); eventfd_ctx_put(irq->trigger); irq->trigger = NULL; } @@ -187,30 +194,20 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, if (fd < 0) /* Disable only */ return 0; - irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", - irq->hwirq, vdev->name); - if (!irq->name) - return -ENOMEM; - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { - kfree(irq->name); + if (IS_ERR(trigger)) return PTR_ERR(trigger); - } irq->trigger = trigger; - irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); - ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); - if (ret) { - kfree(irq->name); - eventfd_ctx_put(trigger); - irq->trigger = NULL; - return ret; - } - - if (!irq->masked) - enable_irq(irq->hwirq); + /* + * irq->masked effectively provides nested disables within the overall + * enable relative to trigger. Specifically request_irq() is called + * with NO_AUTOEN, therefore the IRQ is initially disabled. The user + * may only further disable the IRQ with a MASK operations because + * irq->masked is initially false. + */ + enable_irq(irq->hwirq); return 0; } @@ -229,7 +226,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, handler = vfio_irq_handler; if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) - return vfio_set_trigger(vdev, index, -1, handler); + return vfio_set_trigger(vdev, index, -1); if (start != 0 || count != 1) return -EINVAL; @@ -237,7 +234,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; - return vfio_set_trigger(vdev, index, fd, handler); + return vfio_set_trigger(vdev, index, fd); } if (flags & VFIO_IRQ_SET_DATA_NONE) { @@ -261,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, unsigned start, unsigned count, uint32_t flags, void *data) = NULL; + /* + * For compatibility, errors from request_irq() are local to the + * SET_IRQS path and reflected in the name pointer. This allows, + * for example, polling mode fallback for an exclusive IRQ failure. + */ + if (IS_ERR(vdev->irqs[index].name)) + return PTR_ERR(vdev->irqs[index].name); + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { case VFIO_IRQ_SET_ACTION_MASK: func = vfio_platform_set_irq_mask; @@ -281,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, int vfio_platform_irq_init(struct vfio_platform_device *vdev) { - int cnt = 0, i; + int cnt = 0, i, ret = 0; while (vdev->get_irq(vdev, cnt) >= 0) cnt++; @@ -292,29 +297,54 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) for (i = 0; i < cnt; i++) { int hwirq = vdev->get_irq(vdev, i); + irq_handler_t handler = vfio_irq_handler; - if (hwirq < 0) + if (hwirq < 0) { + ret = -EINVAL; goto err; + } spin_lock_init(&vdev->irqs[i].lock); vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; - if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) { vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED; + handler = vfio_automasked_irq_handler; + } vdev->irqs[i].count = 1; vdev->irqs[i].hwirq = hwirq; vdev->irqs[i].masked = false; + vdev->irqs[i].name = kasprintf(GFP_KERNEL, + "vfio-irq[%d](%s)", hwirq, + vdev->name); + if (!vdev->irqs[i].name) { + ret = -ENOMEM; + goto err; + } + + ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN, + vdev->irqs[i].name, &vdev->irqs[i]); + if (ret) { + kfree(vdev->irqs[i].name); + vdev->irqs[i].name = ERR_PTR(ret); + } } vdev->num_irqs = cnt; return 0; err: + for (--i; i >= 0; i--) { + if (!IS_ERR(vdev->irqs[i].name)) { + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); + kfree(vdev->irqs[i].name); + } + } kfree(vdev->irqs); - return -EINVAL; + return ret; } void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) @@ -324,7 +354,12 @@ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) for (i = 0; i < vdev->num_irqs; i++) { vfio_virqfd_disable(&vdev->irqs[i].mask); vfio_virqfd_disable(&vdev->irqs[i].unmask); - vfio_set_trigger(vdev, i, -1, NULL); + if (!IS_ERR(vdev->irqs[i].name)) { + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); + if (vdev->irqs[i].trigger) + eventfd_ctx_put(vdev->irqs[i].trigger); + kfree(vdev->irqs[i].name); + } } vdev->num_irqs = 0; -- GitLab From 083e750c9f5f4c3bf61161330fb84d7c8e8bb417 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Mar 2024 15:38:54 -0600 Subject: [PATCH 1388/2290] vfio/fsl-mc: Block calling interrupt handler without trigger [ Upstream commit 7447d911af699a15f8d050dfcb7c680a86f87012 ] The eventfd_ctx trigger pointer of the vfio_fsl_mc_irq object is initially NULL and may become NULL if the user sets the trigger eventfd to -1. The interrupt handler itself is guaranteed that trigger is always valid between request_irq() and free_irq(), but the loopback testing mechanisms to invoke the handler function need to test the trigger. The triggering and setting ioctl paths both make use of igate and are therefore mutually exclusive. The vfio-fsl-mc driver does not make use of irqfds, nor does it support any sort of masking operations, therefore unlike vfio-pci and vfio-platform, the flow can remain essentially unchanged. Cc: Diana Craciun Cc: Fixes: cc0ee20bd969 ("vfio/fsl-mc: trigger an interrupt via eventfd") Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-8-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index 7b428eac3d3e5..b125b6edf634e 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -142,13 +142,14 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, irq = &vdev->mc_irqs[index]; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_fsl_mc_irq_handler(hwirq, irq); + if (irq->trigger) + eventfd_signal(irq->trigger, 1); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { u8 trigger = *(u8 *)data; - if (trigger) - vfio_fsl_mc_irq_handler(hwirq, irq); + if (trigger && irq->trigger) + eventfd_signal(irq->trigger, 1); } return 0; -- GitLab From 452a382970640b27c8f0b97347eaf4c2902cf572 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 18 Mar 2023 12:56:33 +0100 Subject: [PATCH 1389/2290] x86/coco: Export cc_vendor commit 3d91c537296794d5d0773f61abbe7b63f2f132d8 upstream. It will be used in different checks in future changes. Export it directly and provide accessor functions and stubs so this can be used in general code when CONFIG_ARCH_HAS_CC_PLATFORM is not set. No functional changes. [ tglx: Add accessor functions ] Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230318115634.9392-2-bp@alien8.de Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/core.c | 13 ++++--------- arch/x86/include/asm/coco.h | 23 ++++++++++++++++++++--- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 49b44f8814846..684f0a910475c 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -13,7 +13,7 @@ #include #include -static enum cc_vendor vendor __ro_after_init; +enum cc_vendor cc_vendor __ro_after_init; static u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) @@ -83,7 +83,7 @@ static bool hyperv_cc_platform_has(enum cc_attr attr) bool cc_platform_has(enum cc_attr attr) { - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return amd_cc_platform_has(attr); case CC_VENDOR_INTEL: @@ -105,7 +105,7 @@ u64 cc_mkenc(u64 val) * - for AMD, bit *set* means the page is encrypted * - for Intel *clear* means encrypted. */ - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return val | cc_mask; case CC_VENDOR_INTEL: @@ -118,7 +118,7 @@ u64 cc_mkenc(u64 val) u64 cc_mkdec(u64 val) { /* See comment in cc_mkenc() */ - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return val & ~cc_mask; case CC_VENDOR_INTEL: @@ -129,11 +129,6 @@ u64 cc_mkdec(u64 val) } EXPORT_SYMBOL_GPL(cc_mkdec); -__init void cc_set_vendor(enum cc_vendor v) -{ - vendor = v; -} - __init void cc_set_mask(u64 mask) { cc_mask = mask; diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 3d98c3a60d34f..91b9448ffe768 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -11,13 +11,30 @@ enum cc_vendor { CC_VENDOR_INTEL, }; -void cc_set_vendor(enum cc_vendor v); -void cc_set_mask(u64 mask); - #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +extern enum cc_vendor cc_vendor; + +static inline enum cc_vendor cc_get_vendor(void) +{ + return cc_vendor; +} + +static inline void cc_set_vendor(enum cc_vendor vendor) +{ + cc_vendor = vendor; +} + +void cc_set_mask(u64 mask); u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else +static inline enum cc_vendor cc_get_vendor(void) +{ + return CC_VENDOR_NONE; +} + +static inline void cc_set_vendor(enum cc_vendor vendor) { } + static inline u64 cc_mkenc(u64 val) { return val; -- GitLab From 851ddc3587379975c6d5da7ef0ac39b70bd063e5 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 8 May 2023 12:44:28 +0200 Subject: [PATCH 1390/2290] x86/coco: Get rid of accessor functions commit da86eb9611840772a459693832e54c63cbcc040a upstream. cc_vendor is __ro_after_init and thus can be used directly. No functional changes. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230508121957.32341-1-bp@alien8.de Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/core.c | 2 +- arch/x86/coco/tdx/tdx.c | 2 +- arch/x86/include/asm/coco.h | 19 +------------------ arch/x86/kernel/cpu/mshyperv.c | 2 +- arch/x86/mm/mem_encrypt_identity.c | 2 +- 5 files changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 684f0a910475c..1e73254336a62 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -13,7 +13,7 @@ #include #include -enum cc_vendor cc_vendor __ro_after_init; +enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; static u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index d0565a9e7d8c9..4692450aeb4d3 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -793,7 +793,7 @@ void __init tdx_early_init(void) setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); - cc_set_vendor(CC_VENDOR_INTEL); + cc_vendor = CC_VENDOR_INTEL; tdx_parse_tdinfo(&cc_mask); cc_set_mask(cc_mask); diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 91b9448ffe768..75a0d7b1a906a 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -11,30 +11,13 @@ enum cc_vendor { CC_VENDOR_INTEL, }; -#ifdef CONFIG_ARCH_HAS_CC_PLATFORM extern enum cc_vendor cc_vendor; -static inline enum cc_vendor cc_get_vendor(void) -{ - return cc_vendor; -} - -static inline void cc_set_vendor(enum cc_vendor vendor) -{ - cc_vendor = vendor; -} - +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM void cc_set_mask(u64 mask); u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else -static inline enum cc_vendor cc_get_vendor(void) -{ - return CC_VENDOR_NONE; -} - -static inline void cc_set_vendor(enum cc_vendor vendor) { } - static inline u64 cc_mkenc(u64 val) { return val; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 34d9e899e471e..9b039e9635e40 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -344,7 +344,7 @@ static void __init ms_hyperv_init_platform(void) /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) - cc_set_vendor(CC_VENDOR_HYPERV); + cc_vendor = CC_VENDOR_HYPERV; } } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index a11a6ebbf5ecf..4daeefa011ed3 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -608,7 +608,7 @@ void __init sme_enable(struct boot_params *bp) out: if (sme_me_mask) { physical_mask &= ~sme_me_mask; - cc_set_vendor(CC_VENDOR_AMD); + cc_vendor = CC_VENDOR_AMD; cc_set_mask(sme_me_mask); } } -- GitLab From 77e3de4113958c06a1f6696adefc9baf0b6a2659 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 2 Feb 2024 17:29:32 +0100 Subject: [PATCH 1391/2290] x86/Kconfig: Remove CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT commit 29956748339aa8757a7e2f927a8679dd08f24bb6 upstream. It was meant well at the time but nothing's using it so get rid of it. Signed-off-by: Borislav Petkov (AMD) Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20240202163510.GDZb0Zvj8qOndvFOiZ@fat_crate.local Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 4 +--- Documentation/x86/amd-memory-encryption.rst | 16 ++++++++-------- arch/x86/Kconfig | 13 ------------- arch/x86/mm/mem_encrypt_identity.c | 11 +---------- 4 files changed, 10 insertions(+), 34 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f7dfdd3d021ea..88dffaf8f0a99 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3206,9 +3206,7 @@ mem_encrypt= [X86-64] AMD Secure Memory Encryption (SME) control Valid arguments: on, off - Default (depends on kernel configuration option): - on (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) - off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n) + Default: off mem_encrypt=on: Activate SME mem_encrypt=off: Do not activate SME diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst index 934310ce72582..bace87cc9ca2c 100644 --- a/Documentation/x86/amd-memory-encryption.rst +++ b/Documentation/x86/amd-memory-encryption.rst @@ -87,14 +87,14 @@ The state of SME in the Linux kernel can be documented as follows: kernel is non-zero). SME can also be enabled and activated in the BIOS. If SME is enabled and -activated in the BIOS, then all memory accesses will be encrypted and it will -not be necessary to activate the Linux memory encryption support. If the BIOS -merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate -memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or -by supplying mem_encrypt=on on the kernel command line. However, if BIOS does -not enable SME, then Linux will not be able to activate memory encryption, even -if configured to do so by default or the mem_encrypt=on command line parameter -is specified. +activated in the BIOS, then all memory accesses will be encrypted and it +will not be necessary to activate the Linux memory encryption support. + +If the BIOS merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), +then memory encryption can be enabled by supplying mem_encrypt=on on the +kernel command line. However, if BIOS does not enable SME, then Linux +will not be able to activate memory encryption, even if configured to do +so by default or the mem_encrypt=on command line parameter is specified. Secure Nested Paging (SNP) ========================== diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5caa023e98397..bea53385d31e3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1553,19 +1553,6 @@ config AMD_MEM_ENCRYPT This requires an AMD processor that supports Secure Memory Encryption (SME). -config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT - bool "Activate AMD Secure Memory Encryption (SME) by default" - depends on AMD_MEM_ENCRYPT - help - Say yes to have system memory encrypted by default if running on - an AMD processor that supports Secure Memory Encryption (SME). - - If set to Y, then the encryption of system memory can be - deactivated with the mem_encrypt=off command line option. - - If set to N, then the encryption of system memory can be - activated with the mem_encrypt=on command line option. - # Common NUMA Features config NUMA bool "NUMA Memory Allocation and Scheduler Support" diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 4daeefa011ed3..7d96904230af0 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -97,7 +97,6 @@ static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; -static char sme_cmdline_off[] __initdata = "off"; static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) { @@ -504,7 +503,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) void __init sme_enable(struct boot_params *bp) { - const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; + const char *cmdline_ptr, *cmdline_arg, *cmdline_on; unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; unsigned long me_mask; @@ -587,12 +586,6 @@ void __init sme_enable(struct boot_params *bp) asm ("lea sme_cmdline_on(%%rip), %0" : "=r" (cmdline_on) : "p" (sme_cmdline_on)); - asm ("lea sme_cmdline_off(%%rip), %0" - : "=r" (cmdline_off) - : "p" (sme_cmdline_off)); - - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) - sme_me_mask = me_mask; cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | ((u64)bp->ext_cmd_line_ptr << 32)); @@ -602,8 +595,6 @@ void __init sme_enable(struct boot_params *bp) if (!strncmp(buffer, cmdline_on, sizeof(buffer))) sme_me_mask = me_mask; - else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) - sme_me_mask = 0; out: if (sme_me_mask) { -- GitLab From fe272b61506bb1534922ef07aa165fd3c37a6a90 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 3 Feb 2024 13:53:06 +0100 Subject: [PATCH 1392/2290] x86/sev: Fix position dependent variable references in startup code commit 1c811d403afd73f04bde82b83b24c754011bd0e8 upstream. The early startup code executes from a 1:1 mapping of memory, which differs from the mapping that the code was linked and/or relocated to run at. The latter mapping is not active yet at this point, and so symbol references that rely on it will fault. Given that the core kernel is built without -fPIC, symbol references are typically emitted as absolute, and so any such references occuring in the early startup code will therefore crash the kernel. While an attempt was made to work around this for the early SEV/SME startup code, by forcing RIP-relative addressing for certain global SEV/SME variables via inline assembly (see snp_cpuid_get_table() for example), RIP-relative addressing must be pervasively enforced for SEV/SME global variables when accessed prior to page table fixups. __startup_64() already handles this issue for select non-SEV/SME global variables using fixup_pointer(), which adjusts the pointer relative to a `physaddr` argument. To avoid having to pass around this `physaddr` argument across all functions needing to apply pointer fixups, introduce a macro RIP_RELATIVE_REF() which generates a RIP-relative reference to a given global variable. It is used where necessary to force RIP-relative accesses to global variables. For backporting purposes, this patch makes no attempt at cleaning up other occurrences of this pattern, involving either inline asm or fixup_pointer(). Those will be addressed later. [ bp: Call it "rip_rel_ref" everywhere like other code shortens "rIP-relative reference" and make the asm wrapper __always_inline. ] Co-developed-by: Kevin Loughlin Signed-off-by: Kevin Loughlin Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/all/20240130220845.1978329-1-kevinloughlin@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/core.c | 7 +------ arch/x86/include/asm/asm.h | 14 ++++++++++++++ arch/x86/include/asm/coco.h | 8 +++++++- arch/x86/include/asm/mem_encrypt.h | 15 +++++++++------ arch/x86/kernel/sev-shared.c | 12 ++++++------ arch/x86/kernel/sev.c | 4 ++-- arch/x86/mm/mem_encrypt_identity.c | 27 ++++++++++++--------------- 7 files changed, 51 insertions(+), 36 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 1e73254336a62..1d3ad275c3664 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -14,7 +14,7 @@ #include enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; -static u64 cc_mask __ro_after_init; +u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) { @@ -128,8 +128,3 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); - -__init void cc_set_mask(u64 mask) -{ - cc_mask = mask; -} diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index fbcfec4dc4ccd..ca8eed1d496ab 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -113,6 +113,20 @@ #endif +#ifndef __ASSEMBLY__ +#ifndef __pic__ +static __always_inline __pure void *rip_rel_ptr(void *p) +{ + asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p)); + + return p; +} +#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var))) +#else +#define RIP_REL_REF(var) (var) +#endif +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 75a0d7b1a906a..60bb26097da1a 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_COCO_H #define _ASM_X86_COCO_H +#include #include enum cc_vendor { @@ -12,9 +13,14 @@ enum cc_vendor { }; extern enum cc_vendor cc_vendor; +extern u64 cc_mask; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM -void cc_set_mask(u64 mask); +static inline void cc_set_mask(u64 mask) +{ + RIP_REL_REF(cc_mask) = mask; +} + u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c91326593e741..41d06822bc8cd 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -15,7 +15,8 @@ #include #include -#include +#include +struct boot_params; #ifdef CONFIG_X86_MEM_ENCRYPT void __init mem_encrypt_init(void); @@ -57,6 +58,11 @@ void __init mem_encrypt_free_decrypted_mem(void); void __init sev_es_init_vc_handling(void); +static inline u64 sme_get_me_mask(void) +{ + return RIP_REL_REF(sme_me_mask); +} + #define __bss_decrypted __section(".bss..decrypted") #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -88,6 +94,8 @@ early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool en static inline void mem_encrypt_free_decrypted_mem(void) { } +static inline u64 sme_get_me_mask(void) { return 0; } + #define __bss_decrypted #endif /* CONFIG_AMD_MEM_ENCRYPT */ @@ -105,11 +113,6 @@ void add_encrypt_protection_map(void); extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; -static inline u64 sme_get_me_mask(void) -{ - return sme_me_mask; -} - #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 71d8698702ce3..271e70d5748ef 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -553,9 +553,9 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; /* Skip post-processing for out-of-range zero leafs. */ - if (!(leaf->fn <= cpuid_std_range_max || - (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || - (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) + if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) || + (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) || + (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max)))) return 0; } @@ -1060,10 +1060,10 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; if (fn->eax_in == 0x0) - cpuid_std_range_max = fn->eax; + RIP_REL_REF(cpuid_std_range_max) = fn->eax; else if (fn->eax_in == 0x40000000) - cpuid_hyp_range_max = fn->eax; + RIP_REL_REF(cpuid_hyp_range_max) = fn->eax; else if (fn->eax_in == 0x80000000) - cpuid_ext_range_max = fn->eax; + RIP_REL_REF(cpuid_ext_range_max) = fn->eax; } } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index c8dfb0fdde7f9..f93ff4794e38f 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -736,7 +736,7 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd * This eliminates worries about jump tables or checking boot_cpu_data * in the cc_platform_has() function. */ - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) return; /* @@ -758,7 +758,7 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr * This eliminates worries about jump tables or checking boot_cpu_data * in the cc_platform_has() function. */ - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) return; /* Invalidate the memory pages before they are marked shared in the RMP table. */ diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 7d96904230af0..06ccbd36e8dcd 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -304,7 +304,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * instrumentation or checking boot_cpu_data in the cc_platform_has() * function. */ - if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED) + if (!sme_get_me_mask() || + RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED) return; /* @@ -541,11 +542,11 @@ void __init sme_enable(struct boot_params *bp) me_mask = 1UL << (ebx & 0x3f); /* Check the SEV MSR whether SEV or SME is enabled */ - sev_status = __rdmsr(MSR_AMD64_SEV); - feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; + RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV); + feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */ - if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED)) snp_abort(); /* Check if memory encryption is enabled */ @@ -571,7 +572,6 @@ void __init sme_enable(struct boot_params *bp) return; } else { /* SEV state cannot be controlled by a command line option */ - sme_me_mask = me_mask; goto out; } @@ -590,16 +590,13 @@ void __init sme_enable(struct boot_params *bp) cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | ((u64)bp->ext_cmd_line_ptr << 32)); - if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0) - goto out; - - if (!strncmp(buffer, cmdline_on, sizeof(buffer))) - sme_me_mask = me_mask; + if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 || + strncmp(buffer, cmdline_on, sizeof(buffer))) + return; out: - if (sme_me_mask) { - physical_mask &= ~sme_me_mask; - cc_vendor = CC_VENDOR_AMD; - cc_set_mask(sme_me_mask); - } + RIP_REL_REF(sme_me_mask) = me_mask; + physical_mask &= ~me_mask; + cc_vendor = CC_VENDOR_AMD; + cc_set_mask(me_mask); } -- GitLab From 3372c3ffc34509cfd2f66761a1f568b78daed04f Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 6 Mar 2024 10:52:17 -0500 Subject: [PATCH 1393/2290] mm/migrate: set swap entry values of THP tail pages properly. The tail pages in a THP can have swap entry information stored in their private field. When migrating to a new page, all tail pages of the new page need to update ->private to avoid future data corruption. This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory: work on folio->swap instead of page->private when splitting folio"), subpages of a swapcached THP no longer requires the maintenance. Adding THPs to the swapcache was introduced in commit 38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"), where each subpage of a THP added to the swapcache had its own swapcache entry and required the ->private field to point to the correct swapcache entry. Later, when THP migration functionality was implemented in commit 616b8371539a6 ("mm: thp: enable thp migration in generic path"), it initially did not handle the subpages of swapcached THPs, failing to update their ->private fields or replace the subpage pointers in the swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration for shmem thp") addressed the swapcache update aspect. This patch fixes the update of subpage ->private fields. Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_charante@quicinc.com/ Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path") Signed-off-by: Zi Yan Acked-by: David Hildenbrand Reported-and-tested-by: Charan Teja Kalla Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index c93dd6a31c31a..c5968021fde0a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -423,8 +423,12 @@ int folio_migrate_mapping(struct address_space *mapping, if (folio_test_swapbacked(folio)) { __folio_set_swapbacked(newfolio); if (folio_test_swapcache(folio)) { + int i; + folio_set_swapcache(newfolio); - newfolio->private = folio_get_private(folio); + for (i = 0; i < nr; i++) + set_page_private(folio_page(newfolio, i), + page_private(folio_page(folio, i))); } entries = nr; } else { -- GitLab From bf2ba0b241e84e1321cefb02b8a68a0a078dcae5 Mon Sep 17 00:00:00 2001 From: John Sperbeck Date: Sun, 17 Mar 2024 15:15:22 -0700 Subject: [PATCH 1394/2290] init: open /initrd.image with O_LARGEFILE commit 4624b346cf67400ef46a31771011fb798dd2f999 upstream. If initrd data is larger than 2Gb, we'll eventually fail to write to the /initrd.image file when we hit that limit, unless O_LARGEFILE is set. Link: https://lkml.kernel.org/r/20240317221522.896040-1-jsperbeck@google.com Signed-off-by: John Sperbeck Cc: Jens Axboe Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- init/initramfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/initramfs.c b/init/initramfs.c index 2f5bfb7d76521..7b915170789da 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -680,7 +680,7 @@ static void __init populate_initrd_image(char *err) printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); if (IS_ERR(file)) return; -- GitLab From ca8106fffc11cae54adbc3b2f7c77cbeee4a58a0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 24 Mar 2024 17:10:53 +0100 Subject: [PATCH 1395/2290] x86/efistub: Add missing boot_params for mixed mode compat entry commit d21f5a59ea773826cc489acb287811d690b703cc upstream. The pure EFI stub entry point does not take a struct boot_params from the boot loader, but creates it from scratch, and populates only the fields that still have meaning in this context (command line, initrd base and size, etc) The original mixed mode implementation used the EFI handover protocol instead, where the boot loader (i.e., GRUB) populates a boot_params struct and passes it to a special Linux specific EFI entry point that takes the boot_params pointer as its third argument. When the new mixed mode implementation was introduced, using a special 32-bit PE entrypoint in the 64-bit kernel, it adopted the pure approach, and relied on the EFI stub to create the struct boot_params. This is preferred because it makes the bootloader side much easier to implement, as it does not need any x86-specific knowledge on how struct boot_params and struct setup_header are put together. This mixed mode implementation was adopted by systemd-boot version 252 and later. When commit e2ab9eab324c ("x86/boot/compressed: Move 32-bit entrypoint code into .text section") refactored this code and moved it out of head_64.S, the fact that ESI was populated with the address of the base of the image was overlooked, and to simplify the code flow, ESI is now zeroed and stored to memory unconditionally in shared code, so that the NULL-ness of that variable can still be used later to determine which mixed mode boot protocol is in use. With ESI pointing to the base of the image, it can serve as a struct boot_params pointer for startup_32(), which only accesses the init_data and kernel_alignment fields (and the scratch field as a temporary stack). Zeroing ESI means that those accesses produce garbage now, even though things appear to work if the first page of memory happens to be zeroed, and the region right before LOAD_PHYSICAL_ADDR (== 16 MiB) happens to be free. The solution is to pass a special, temporary struct boot_params to startup_32() via ESI, one that is sufficient for getting it to create the page tables correctly and is discarded right after. This involves setting a minimal alignment of 4k, only to get the statically allocated page tables line up correctly, and setting init_size to the executable image size (_end - startup_32). This ensures that the page tables are covered by the static footprint of the PE image. Given that EFI boot no longer calls the decompressor and no longer pads the image to permit the decompressor to execute in place, the same temporary struct boot_params should be used in the EFI handover protocol based mixed mode implementation as well, to prevent the page tables from being placed outside of allocated memory. Fixes: e2ab9eab324c ("x86/boot/compressed: Move 32-bit entrypoint code into .text section") Cc: # v6.1+ Closes: https://lore.kernel.org/all/20240321150510.GI8211@craftyguy.net/ Reported-by: Clayton Craft Tested-by: Clayton Craft Tested-by: Hans de Goede Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/efi_mixed.S | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 07873f269b7bd..fb6d60dcd6ed1 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -15,10 +15,12 @@ */ #include +#include #include #include #include #include +#include .code64 .text @@ -155,6 +157,7 @@ SYM_FUNC_END(__efi64_thunk) SYM_FUNC_START(efi32_stub_entry) call 1f 1: popl %ecx + leal (efi32_boot_args - 1b)(%ecx), %ebx /* Clear BSS */ xorl %eax, %eax @@ -169,6 +172,7 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + movl %esi, 8(%ebx) jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) #endif @@ -245,8 +249,6 @@ SYM_FUNC_END(efi_enter32) * * Arguments: %ecx image handle * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) * * Since this is the point of no return for ordinary execution, no registers * are considered live except for the function parameters. [Note that the EFI @@ -272,9 +274,18 @@ SYM_FUNC_START_LOCAL(efi32_entry) leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) movl %edx, 4(%ebx) - movl %esi, 8(%ebx) movb $0x0, 12(%ebx) // efi_is64 + /* + * Allocate some memory for a temporary struct boot_params, which only + * needs the minimal pieces that startup_32() relies on. + */ + subl $PARAM_SIZE, %esp + movl %esp, %esi + movl $PAGE_SIZE, BP_kernel_alignment(%esi) + movl $_end - 1b, BP_init_size(%esi) + subl $startup_32 - 1b, BP_init_size(%esi) + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -300,8 +311,7 @@ SYM_FUNC_START(efi32_pe_entry) movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi + jmp efi32_entry // pass %ecx, %edx // no other registers remain live 2: popl %edi // restore callee-save registers -- GitLab From 6b226ae43d82a43819d60fea0d66772ed1b88e77 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 26 Mar 2024 11:15:25 +0100 Subject: [PATCH 1396/2290] efi/libstub: Cast away type warning in use of max() commit 61d130f261a3c15ae2c4b6f3ac3517d5d5b78855 upstream. Avoid a type mismatch warning in max() by switching to max_t() and providing the type explicitly. Fixes: 3cb4a4827596abc82e ("efi/libstub: fix efi_random_alloc() ...") Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/randomalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 0d7b11b55ff31..fff826f56728c 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -119,7 +119,7 @@ efi_status_t efi_random_alloc(unsigned long size, continue; } - target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align; + target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align; pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, -- GitLab From a321a9907c0e02d8314f6b3060bcaa177edc105c Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 21 Feb 2024 07:35:52 -0800 Subject: [PATCH 1397/2290] btrfs: zoned: don't skip block groups with 100% zone unusable commit a8b70c7f8600bc77d03c0b032c0662259b9e615e upstream. Commit f4a9f219411f ("btrfs: do not delete unused block group if it may be used soon") changed the behaviour of deleting unused block-groups on zoned filesystems. Starting with this commit, we're using btrfs_space_info_used() to calculate the number of used bytes in a space_info. But btrfs_space_info_used() also accounts btrfs_space_info::bytes_zone_unusable as used bytes. So if a block group is 100% zone_unusable it is skipped from the deletion step. In order not to skip fully zone_unusable block-groups, also check if the block-group has bytes left that can be used on a zoned filesystem. Fixes: f4a9f219411f ("btrfs: do not delete unused block group if it may be used soon") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9f77565bd7f5a..5993b627be580 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1413,7 +1413,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); - if (space_info->total_bytes - block_group->length < used) { + if (space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the -- GitLab From d7387bcb7781f5ede130fec511d4ef5c415c34d4 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 26 Feb 2024 16:39:13 +0100 Subject: [PATCH 1398/2290] btrfs: zoned: use zone aware sb location for scrub commit 74098a989b9c3370f768140b7783a7aaec2759b3 upstream. At the moment scrub_supers() doesn't grab the super block's location via the zoned device aware btrfs_sb_log_location() but via btrfs_sb_offset(). This leads to checksum errors on 'scrub' as we're not accessing the correct location of the super block. So use btrfs_sb_log_location() for getting the super blocks location on scrub. Reported-by: WA AM Link: http://lore.kernel.org/linux-btrfs/CANU2Z0EvUzfYxczLgGUiREoMndE9WdQnbaawV5Fv5gNXptPUKw@mail.gmail.com CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/scrub.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1672d4846baaf..12a2b1e3f1e35 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4177,7 +4177,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, gen = fs_info->last_trans_committed; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); + ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr); + if (ret == -ENOENT) + break; + + if (ret) { + spin_lock(&sctx->stat_lock); + sctx->stat.super_errors++; + spin_unlock(&sctx->stat_lock); + continue; + } + if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->commit_total_bytes) break; -- GitLab From 7eeabcea79b67cc29563e6a9a5c81f9e2c664d5b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 16 Mar 2024 08:43:36 +0100 Subject: [PATCH 1399/2290] wifi: mac80211: check/clear fast rx for non-4addr sta VLAN changes commit 4f2bdb3c5e3189297e156b3ff84b140423d64685 upstream. When moving a station out of a VLAN and deleting the VLAN afterwards, the fast_rx entry still holds a pointer to the VLAN's netdev, which can cause use-after-free bugs. Fix this by immediately calling ieee80211_check_fast_rx after the VLAN change. Cc: stable@vger.kernel.org Reported-by: ranygh@riseup.net Signed-off-by: Felix Fietkau Link: https://msgid.link/20240316074336.40442-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6cf0b77839d1d..1e57027da2913 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2075,15 +2075,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) { - ieee80211_clear_fast_rx(sta); + sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); - } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; + ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { -- GitLab From 7aa70c492272aa003cb52288e75bc255ec182157 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2024 10:10:20 +0200 Subject: [PATCH 1400/2290] wifi: iwlwifi: fw: don't always use FW dump trig commit 045a5b645dd59929b0e05375f493cde3a0318271 upstream. Since the dump_data (struct iwl_fwrt_dump_data) is a union, it's not safe to unconditionally access and use the 'trig' member, it might be 'desc' instead. Access it only if it's known to be 'trig' rather than 'desc', i.e. if ini-debug is present. Cc: stable@vger.kernel.org Fixes: 0eb50c674a1e ("iwlwifi: yoyo: send hcmd to fw after dump collection completes.") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240319100755.e2976bc58b29.I72fbd6135b3623227de53d8a2bb82776066cb72b@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 3ef0b776b7727..3b0ed1cdfa11e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -2903,8 +2903,6 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) struct iwl_fw_dbg_params params = {0}; struct iwl_fwrt_dump_data *dump_data = &fwrt->dump.wks[wk_idx].dump_data; - u32 policy; - u32 time_point; if (!test_bit(wk_idx, &fwrt->dump.active_wks)) return; @@ -2935,13 +2933,16 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, false); - policy = le32_to_cpu(dump_data->trig->apply_policy); - time_point = le32_to_cpu(dump_data->trig->time_point); + if (iwl_trans_dbg_ini_valid(fwrt->trans)) { + u32 policy = le32_to_cpu(dump_data->trig->apply_policy); + u32 time_point = le32_to_cpu(dump_data->trig->time_point); - if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { - IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); - iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { + IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); + iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + } } + if (fwrt->trans->dbg.last_tp_resetfw == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) iwl_force_nmi(fwrt->trans); -- GitLab From 4e79b4a64d087cb5723d0ee947e8996da9e82363 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 20 Mar 2024 11:26:07 -0700 Subject: [PATCH 1401/2290] exec: Fix NOMMU linux_binprm::exec in transfer_args_to_stack() commit 2aea94ac14d1e0a8ae9e34febebe208213ba72f7 upstream. In NOMMU kernel the value of linux_binprm::p is the offset inside the temporary program arguments array maintained in separate pages in the linux_binprm::page. linux_binprm::exec being a copy of linux_binprm::p thus must be adjusted when that array is copied to the user stack. Without that adjustment the value passed by the NOMMU kernel to the ELF program in the AT_EXECFN entry of the aux array doesn't make any sense and it may break programs that try to access memory pointed to by that entry. Adjust linux_binprm::exec before the successful return from the transfer_args_to_stack(). Cc: Fixes: b6a2fea39318 ("mm: variable length argument support") Fixes: 5edc2a5123a7 ("binfmt_elf_fdpic: wire up AT_EXECFD, AT_EXECFN, AT_SECURE") Signed-off-by: Max Filippov Link: https://lore.kernel.org/r/20240320182607.1472887-1-jcmvbkbc@gmail.com Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/exec.c b/fs/exec.c index 39f7751c90fc3..b01434d6a512d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -896,6 +896,7 @@ int transfer_args_to_stack(struct linux_binprm *bprm, goto out; } + bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE; *sp_location = sp; out: -- GitLab From 192058bb3e1b4fc429689f8e2b57630f3beb7ae2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 19 Mar 2024 17:37:46 -0700 Subject: [PATCH 1402/2290] hexagon: vmlinux.lds.S: handle attributes section commit 549aa9678a0b3981d4821bf244579d9937650562 upstream. After the linked LLVM change, the build fails with CONFIG_LD_ORPHAN_WARN_LEVEL="error", which happens with allmodconfig: ld.lld: error: vmlinux.a(init/main.o):(.hexagon.attributes) is being placed in '.hexagon.attributes' Handle the attributes section in a similar manner as arm and riscv by adding it after the primary ELF_DETAILS grouping in vmlinux.lds.S, which fixes the error. Link: https://lkml.kernel.org/r/20240319-hexagon-handle-attributes-section-vmlinux-lds-s-v1-1-59855dab8872@kernel.org Fixes: 113616ec5b64 ("hexagon: select ARCH_WANT_LD_ORPHAN_WARN") Link: https://github.com/llvm/llvm-project/commit/31f4b329c8234fab9afa59494d7f8bdaeaefeaad Signed-off-by: Nathan Chancellor Reviewed-by: Brian Cain Cc: Bill Wendling Cc: Justin Stitt Cc: Nick Desaulniers Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/hexagon/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 57465bff1fe49..df7f349c8d4f3 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -64,6 +64,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG ELF_DETAILS + .hexagon.attributes 0 : { *(.hexagon.attributes) } DISCARDS } -- GitLab From a433300285d7e3c816a82bf6506d2b6398503be6 Mon Sep 17 00:00:00 2001 From: Romain Naour Date: Sat, 16 Mar 2024 00:44:44 +0100 Subject: [PATCH 1403/2290] mmc: sdhci-omap: re-tuning is needed after a pm transition to support emmc HS200 mode commit f9e2a5b00a35f2c064dc679808bc8db5cc779ed6 upstream. "PM runtime functions" was been added in sdhci-omap driver in commit f433e8aac6b9 ("mmc: sdhci-omap: Implement PM runtime functions") along with "card power off and enable aggressive PM" in commit 3edf588e7fe0 ("mmc: sdhci-omap: Allow SDIO card power off and enable aggressive PM"). Since then, the sdhci-omap driver doesn't work using mmc-hs200 mode due to the tuning values being lost during a pm transition. As for the sdhci_am654 driver, request a new tuning sequence before suspend (sdhci_omap_runtime_suspend()), otherwise the device will trigger cache flush error: mmc1: cache flush error -110 (ETIMEDOUT) mmc1: error -110 doing aggressive suspend followed by I/O errors produced by fdisk -l /dev/mmcblk1boot1: I/O error, dev mmcblk1boot0, sector 64384 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2 I/O error, dev mmcblk1boot1, sector 64384 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2 I/O error, dev mmcblk1boot1, sector 64384 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 Buffer I/O error on dev mmcblk1boot1, logical block 8048, async page read I/O error, dev mmcblk1boot0, sector 64384 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 Buffer I/O error on dev mmcblk1boot0, logical block 8048, async page read Don't re-tune if auto retuning is supported in HW (when SDHCI_TUNING_MODE_3 is available). Link: https://lore.kernel.org/all/2e5f1997-564c-44e4-b357-6343e0dae7ab@smile.fr Fixes: f433e8aac6b9 ("mmc: sdhci-omap: Implement PM runtime functions") Signed-off-by: Romain Naour Reviewed-by: Tony Lindgren Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240315234444.816978-1-romain.naour@smile.fr Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-omap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 033be559a7309..bfb7c8b96341c 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1442,6 +1442,9 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + if (host->tuning_mode != SDHCI_TUNING_MODE_3) + mmc_retune_needed(host->mmc); + if (omap_host->con != -EINVAL) sdhci_runtime_suspend_host(host); -- GitLab From 547f4afaced0901d1d4c4bd64415dc1223f5fff8 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Wed, 13 Mar 2024 15:37:43 +0200 Subject: [PATCH 1404/2290] mmc: core: Initialize mmc_blk_ioc_data commit 0cdfe5b0bf295c0dee97436a8ed13336933a0211 upstream. Commit 4d0c8d0aef63 ("mmc: core: Use mrq.sbc in close-ended ffu") adds flags uint to struct mmc_blk_ioc_data, but it does not get initialized for RPMB ioctls which now fails. Let's fix this by always initializing the struct and flags to zero. Fixes: 4d0c8d0aef63 ("mmc: core: Use mrq.sbc in close-ended ffu") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218587 Link: https://lore.kernel.org/all/20231129092535.3278-1-avri.altman@wdc.com/ Cc: stable@vger.kernel.org Signed-off-by: Mikko Rapeli Reviewed-by: Avri Altman Acked-by: Adrian Hunter Tested-by: Francesco Dolcini Link: https://lore.kernel.org/r/20240313133744.2405325-1-mikko.rapeli@linaro.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4688a658d6a6d..4aaaaca5ae7ad 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -415,7 +415,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( struct mmc_blk_ioc_data *idata; int err; - idata = kmalloc(sizeof(*idata), GFP_KERNEL); + idata = kzalloc(sizeof(*idata), GFP_KERNEL); if (!idata) { err = -ENOMEM; goto out; -- GitLab From ad9cc5e9e53ab94aa0c7ac65d43be7eb208dcb55 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Wed, 13 Mar 2024 15:37:44 +0200 Subject: [PATCH 1405/2290] mmc: core: Avoid negative index with array access commit cf55a7acd1ed38afe43bba1c8a0935b51d1dc014 upstream. Commit 4d0c8d0aef63 ("mmc: core: Use mrq.sbc in close-ended ffu") assigns prev_idata = idatas[i - 1], but doesn't check that the iterator i is greater than zero. Let's fix this by adding a check. Fixes: 4d0c8d0aef63 ("mmc: core: Use mrq.sbc in close-ended ffu") Link: https://lore.kernel.org/all/20231129092535.3278-1-avri.altman@wdc.com/ Cc: stable@vger.kernel.org Signed-off-by: Mikko Rapeli Reviewed-by: Avri Altman Tested-by: Francesco Dolcini Link: https://lore.kernel.org/r/20240313133744.2405325-2-mikko.rapeli@linaro.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4aaaaca5ae7ad..657772546b6b1 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -490,7 +490,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->flags & MMC_BLK_IOC_DROP) return 0; - if (idata->flags & MMC_BLK_IOC_SBC) + if (idata->flags & MMC_BLK_IOC_SBC && i > 0) prev_idata = idatas[i - 1]; /* -- GitLab From 244cb8200e3a99ed3c34bb16e627d477bfac93ba Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 28 Mar 2024 09:43:40 +0900 Subject: [PATCH 1406/2290] block: Do not force full zone append completion in req_bio_endio() commit 55251fbdf0146c252ceff146a1bb145546f3e034 upstream. This reverts commit 748dc0b65ec2b4b7b3dbd7befcc4a54fdcac7988. Partial zone append completions cannot be supported as there is no guarantees that the fragmented data will be written sequentially in the same manner as with a full command. Commit 748dc0b65ec2 ("block: fix partial zone append completion handling in req_bio_endio()") changed req_bio_endio() to always advance a partially failed BIO by its full length, but this can lead to incorrect accounting. So revert this change and let low level device drivers handle this case by always failing completely zone append operations. With this revert, users will still see an IO error for a partially completed zone append BIO. Fixes: 748dc0b65ec2 ("block: fix partial zone append completion handling in req_bio_endio()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240328004409.594888-2-dlemoal@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 07610505c1776..e1b12f3d54bd4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -761,16 +761,11 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. - * For such case, force the completed nbytes to be equal to - * the BIO size so that bio_advance() sets the BIO remaining - * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) { + if (bio->bi_iter.bi_size != nbytes) bio->bi_status = BLK_STS_IOERR; - nbytes = bio->bi_iter.bi_size; - } else { + else bio->bi_iter.bi_sector = rq->__sector; - } } bio_advance(bio, nbytes); -- GitLab From 6fc218ccd534d7246adb5b3cae6cafbc5dcf7a3c Mon Sep 17 00:00:00 2001 From: Ye Zhang Date: Thu, 21 Mar 2024 18:21:00 +0800 Subject: [PATCH 1407/2290] thermal: devfreq_cooling: Fix perf state when calculate dfc res_util MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a26de34b3c77ae3a969654d94be49e433c947e3b upstream. The issue occurs when the devfreq cooling device uses the EM power model and the get_real_power() callback is provided by the driver. The EM power table is sorted ascending,can't index the table by cooling device state,so convert cooling state to performance state by dfc->max_state - dfc->capped_state. Fixes: 615510fe13bd ("thermal: devfreq_cooling: remove old power model and use EM") Cc: 5.11+ # 5.11+ Signed-off-by: Ye Zhang Reviewed-by: Dhruva Gole Reviewed-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/devfreq_cooling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 24b474925cd68..0b424bc8cadfd 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -201,7 +201,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd res = dfc->power_ops->get_real_power(df, power, freq, voltage); if (!res) { - state = dfc->capped_state; + state = dfc->max_state - dfc->capped_state; /* Convert EM power into milli-Watts first */ dfc->res_util = dfc->em_pd->table[state].power; -- GitLab From 9acfd8b083a0ffbd387566800d89f55058a68af2 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 6 Mar 2024 13:01:04 +0800 Subject: [PATCH 1408/2290] nouveau/dmem: handle kcalloc() allocation failure commit 16e87fe23d4af6df920406494ced5c0f4354567b upstream. The kcalloc() in nouveau_dmem_evict_chunk() will return null if the physical memory has run out. As a result, if we dereference src_pfns, dst_pfns or dma_addrs, the null pointer dereference bugs will happen. Moreover, the GPU is going away. If the kcalloc() fails, we could not evict all pages mapping a chunk. So this patch adds a __GFP_NOFAIL flag in kcalloc(). Finally, as there is no need to have physically contiguous memory, this patch switches kcalloc() to kvcalloc() in order to avoid failing allocations. CC: # v6.1 Fixes: 249881232e14 ("nouveau/dmem: evict device private memory during release") Suggested-by: Danilo Krummrich Signed-off-by: Duoming Zhou Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240306050104.11259-1-duoming@zju.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 20fe53815b20f..6ca4a46a82ee9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -379,9 +379,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) dma_addr_t *dma_addrs; struct nouveau_fence *fence; - src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL); - dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL); - dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL); + src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); + dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); + dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL); migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, npages); @@ -407,11 +407,11 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) migrate_device_pages(src_pfns, dst_pfns, npages); nouveau_dmem_fence_done(&fence); migrate_device_finalize(src_pfns, dst_pfns, npages); - kfree(src_pfns); - kfree(dst_pfns); + kvfree(src_pfns); + kvfree(dst_pfns); for (i = 0; i < npages; i++) dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); - kfree(dma_addrs); + kvfree(dma_addrs); } void -- GitLab From 46efbdbc95a30951c2579caf97b6df2ee2b3bef3 Mon Sep 17 00:00:00 2001 From: Claus Hansen Ries Date: Thu, 21 Mar 2024 13:08:59 +0000 Subject: [PATCH 1409/2290] net: ll_temac: platform_get_resource replaced by wrong function commit 3a38a829c8bc27d78552c28e582eb1d885d07d11 upstream. The function platform_get_resource was replaced with devm_platform_ioremap_resource_byname and is called using 0 as name. This eventually ends up in platform_get_resource_byname in the call stack, where it causes a null pointer in strcmp. if (type == resource_type(r) && !strcmp(r->name, name)) It should have been replaced with devm_platform_ioremap_resource. Fixes: bd69058f50d5 ("net: ll_temac: Use devm_platform_ioremap_resource_byname()") Signed-off-by: Claus Hansen Ries Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/cca18f9c630a41c18487729770b492bb@terma.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/xilinx/ll_temac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 6bf5e341c3c11..08c45756b2181 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1445,7 +1445,7 @@ static int temac_probe(struct platform_device *pdev) } /* map device registers */ - lp->regs = devm_platform_ioremap_resource_byname(pdev, 0); + lp->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map TEMAC registers\n"); return -ENOMEM; -- GitLab From 016119154981d81c9e8f2ea3f56b9e2b4ea14500 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 12 Mar 2024 10:35:12 +0100 Subject: [PATCH 1410/2290] drm/vmwgfx: Create debugfs ttm_resource_manager entry only if needed commit 4be9075fec0a639384ed19975634b662bfab938f upstream. The driver creates /sys/kernel/debug/dri/0/mob_ttm even when the corresponding ttm_resource_manager is not allocated. This leads to a crash when trying to read from this file. Add a check to create mob_ttm, system_mob_ttm, and gmr_ttm debug file only when the corresponding ttm_resource_manager is allocated. crash> bt PID: 3133409 TASK: ffff8fe4834a5000 CPU: 3 COMMAND: "grep" #0 [ffffb954506b3b20] machine_kexec at ffffffffb2a6bec3 #1 [ffffb954506b3b78] __crash_kexec at ffffffffb2bb598a #2 [ffffb954506b3c38] crash_kexec at ffffffffb2bb68c1 #3 [ffffb954506b3c50] oops_end at ffffffffb2a2a9b1 #4 [ffffb954506b3c70] no_context at ffffffffb2a7e913 #5 [ffffb954506b3cc8] __bad_area_nosemaphore at ffffffffb2a7ec8c #6 [ffffb954506b3d10] do_page_fault at ffffffffb2a7f887 #7 [ffffb954506b3d40] page_fault at ffffffffb360116e [exception RIP: ttm_resource_manager_debug+0x11] RIP: ffffffffc04afd11 RSP: ffffb954506b3df0 RFLAGS: 00010246 RAX: ffff8fe41a6d1200 RBX: 0000000000000000 RCX: 0000000000000940 RDX: 0000000000000000 RSI: ffffffffc04b4338 RDI: 0000000000000000 RBP: ffffb954506b3e08 R8: ffff8fee3ffad000 R9: 0000000000000000 R10: ffff8fe41a76a000 R11: 0000000000000001 R12: 00000000ffffffff R13: 0000000000000001 R14: ffff8fe5bb6f3900 R15: ffff8fe41a6d1200 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ffffb954506b3e00] ttm_resource_manager_show at ffffffffc04afde7 [ttm] #9 [ffffb954506b3e30] seq_read at ffffffffb2d8f9f3 RIP: 00007f4c4eda8985 RSP: 00007ffdbba9e9f8 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: 000000000037e000 RCX: 00007f4c4eda8985 RDX: 000000000037e000 RSI: 00007f4c41573000 RDI: 0000000000000003 RBP: 000000000037e000 R8: 0000000000000000 R9: 000000000037fe30 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4c41573000 R13: 0000000000000003 R14: 00007f4c41572010 R15: 0000000000000003 ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b Signed-off-by: Jocelyn Falempe Fixes: af4a25bbe5e7 ("drm/vmwgfx: Add debugfs entries for various ttm resource managers") Cc: Reviewed-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240312093551.196609-1-jfalempe@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b909a3ce9af3c..9d7a1b710f48f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1429,12 +1429,15 @@ static void vmw_debugfs_resource_managers_init(struct vmw_private *vmw) root, "system_ttm"); ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM), root, "vram_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), - root, "gmr_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), - root, "mob_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), - root, "system_mob_ttm"); + if (vmw->has_gmr) + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), + root, "gmr_ttm"); + if (vmw->has_mob) { + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), + root, "mob_ttm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), + root, "system_mob_ttm"); + } } static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, -- GitLab From b691954c94dbbe63c0aa9241cc516985aa59df05 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Wed, 20 Mar 2024 15:53:47 -0400 Subject: [PATCH 1411/2290] drm/amdkfd: fix TLB flush after unmap for GFX9.4.2 commit 1210e2f1033dc56b666c9f6dfb761a2d3f9f5d6c upstream. TLB flush after unmap accidentially was removed on gfx9.4.2. It is to add it back. Signed-off-by: Eric Huang Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ec8a576ac5a9e..3c7d267f2a07b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1349,7 +1349,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } -- GitLab From 72e4d3fb72e9f0f016946158a7d95304832768e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 19 Mar 2024 11:24:42 +0200 Subject: [PATCH 1412/2290] drm/i915/bios: Tolerate devdata==NULL in intel_bios_encoder_supports_dp_dual_mode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 32e39bab59934bfd3f37097d4dd85ac5eb0fd549 upstream. If we have no VBT, or the VBT didn't declare the encoder in question, we won't have the 'devdata' for the encoder. Instead of oopsing just bail early. We won't be able to tell whether the port is DP++ or not, but so be it. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10464 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240319092443.15769-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 26410896206342c8a80d2b027923e9ee7d33b733) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_bios.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index a70b7061742a8..9cc1ef2ca72cc 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -3413,6 +3413,9 @@ static bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_enc { const struct child_device_config *child = &devdata->child; + if (!devdata) + return false; + if (!intel_bios_encoder_supports_dp(devdata) || !intel_bios_encoder_supports_hdmi(devdata)) return false; -- GitLab From 7eab7b021835ae422c38b968d5cc60e99408fb62 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Mar 2024 14:58:47 +0100 Subject: [PATCH 1413/2290] drm/i915/gt: Reset queue_priority_hint on parking commit 4a3859ea5240365d21f6053ee219bb240d520895 upstream. Originally, with strict in order execution, we could complete execution only when the queue was empty. Preempt-to-busy allows replacement of an active request that may complete before the preemption is processed by HW. If that happens, the request is retired from the queue, but the queue_priority_hint remains set, preventing direct submission until after the next CS interrupt is processed. This preempt-to-busy race can be triggered by the heartbeat, which will also act as the power-management barrier and upon completion allow us to idle the HW. We may process the completion of the heartbeat, and begin parking the engine before the CS event that restores the queue_priority_hint, causing us to fail the assertion that it is MIN. <3>[ 166.210729] __engine_park:283 GEM_BUG_ON(engine->sched_engine->queue_priority_hint != (-((int)(~0U >> 1)) - 1)) <0>[ 166.210781] Dumping ftrace buffer: <0>[ 166.210795] --------------------------------- ... <0>[ 167.302811] drm_fdin-1097 2..s1. 165741070us : trace_ports: 0000:00:02.0 rcs0: promote { ccid:20 1217:2 prio 0 } <0>[ 167.302861] drm_fdin-1097 2d.s2. 165741072us : execlists_submission_tasklet: 0000:00:02.0 rcs0: preempting last=1217:2, prio=0, hint=2147483646 <0>[ 167.302928] drm_fdin-1097 2d.s2. 165741072us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 1217:2, current 0 <0>[ 167.302992] drm_fdin-1097 2d.s2. 165741073us : __i915_request_submit: 0000:00:02.0 rcs0: fence 3:4660, current 4659 <0>[ 167.303044] drm_fdin-1097 2d.s1. 165741076us : execlists_submission_tasklet: 0000:00:02.0 rcs0: context:3 schedule-in, ccid:40 <0>[ 167.303095] drm_fdin-1097 2d.s1. 165741077us : trace_ports: 0000:00:02.0 rcs0: submit { ccid:40 3:4660* prio 2147483646 } <0>[ 167.303159] kworker/-89 11..... 165741139us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence c90:2, current 2 <0>[ 167.303208] kworker/-89 11..... 165741148us : __intel_context_do_unpin: 0000:00:02.0 rcs0: context:c90 unpin <0>[ 167.303272] kworker/-89 11..... 165741159us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence 1217:2, current 2 <0>[ 167.303321] kworker/-89 11..... 165741166us : __intel_context_do_unpin: 0000:00:02.0 rcs0: context:1217 unpin <0>[ 167.303384] kworker/-89 11..... 165741170us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence 3:4660, current 4660 <0>[ 167.303434] kworker/-89 11d..1. 165741172us : __intel_context_retire: 0000:00:02.0 rcs0: context:1216 retire runtime: { total:56028ns, avg:56028ns } <0>[ 167.303484] kworker/-89 11..... 165741198us : __engine_park: 0000:00:02.0 rcs0: parked <0>[ 167.303534] -0 5d.H3. 165741207us : execlists_irq_handler: 0000:00:02.0 rcs0: semaphore yield: 00000040 <0>[ 167.303583] kworker/-89 11..... 165741397us : __intel_context_retire: 0000:00:02.0 rcs0: context:1217 retire runtime: { total:325575ns, avg:0ns } <0>[ 167.303756] kworker/-89 11..... 165741777us : __intel_context_retire: 0000:00:02.0 rcs0: context:c90 retire runtime: { total:0ns, avg:0ns } <0>[ 167.303806] kworker/-89 11..... 165742017us : __engine_park: __engine_park:283 GEM_BUG_ON(engine->sched_engine->queue_priority_hint != (-((int)(~0U >> 1)) - 1)) <0>[ 167.303811] --------------------------------- <4>[ 167.304722] ------------[ cut here ]------------ <2>[ 167.304725] kernel BUG at drivers/gpu/drm/i915/gt/intel_engine_pm.c:283! <4>[ 167.304731] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI <4>[ 167.304734] CPU: 11 PID: 89 Comm: kworker/11:1 Tainted: G W 6.8.0-rc2-CI_DRM_14193-gc655e0fd2804+ #1 <4>[ 167.304736] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S UDIMM 6L RVP, BIOS RKLSFWI1.R00.3173.A03.2204210138 04/21/2022 <4>[ 167.304738] Workqueue: i915-unordered retire_work_handler [i915] <4>[ 167.304839] RIP: 0010:__engine_park+0x3fd/0x680 [i915] <4>[ 167.304937] Code: 00 48 c7 c2 b0 e5 86 a0 48 8d 3d 00 00 00 00 e8 79 48 d4 e0 bf 01 00 00 00 e8 ef 0a d4 e0 31 f6 bf 09 00 00 00 e8 03 49 c0 e0 <0f> 0b 0f 0b be 01 00 00 00 e8 f5 61 fd ff 31 c0 e9 34 fd ff ff 48 <4>[ 167.304940] RSP: 0018:ffffc9000059fce0 EFLAGS: 00010246 <4>[ 167.304942] RAX: 0000000000000200 RBX: 0000000000000000 RCX: 0000000000000006 <4>[ 167.304944] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009 <4>[ 167.304946] RBP: ffff8881330ca1b0 R08: 0000000000000001 R09: 0000000000000001 <4>[ 167.304947] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881330ca000 <4>[ 167.304948] R13: ffff888110f02aa0 R14: ffff88812d1d0205 R15: ffff88811277d4f0 <4>[ 167.304950] FS: 0000000000000000(0000) GS:ffff88844f780000(0000) knlGS:0000000000000000 <4>[ 167.304952] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 167.304953] CR2: 00007fc362200c40 CR3: 000000013306e003 CR4: 0000000000770ef0 <4>[ 167.304955] PKRU: 55555554 <4>[ 167.304957] Call Trace: <4>[ 167.304958] <4>[ 167.305573] ____intel_wakeref_put_last+0x1d/0x80 [i915] <4>[ 167.305685] i915_request_retire.part.0+0x34f/0x600 [i915] <4>[ 167.305800] retire_requests+0x51/0x80 [i915] <4>[ 167.305892] intel_gt_retire_requests_timeout+0x27f/0x700 [i915] <4>[ 167.305985] process_scheduled_works+0x2db/0x530 <4>[ 167.305990] worker_thread+0x18c/0x350 <4>[ 167.305993] kthread+0xfe/0x130 <4>[ 167.305997] ret_from_fork+0x2c/0x50 <4>[ 167.306001] ret_from_fork_asm+0x1b/0x30 <4>[ 167.306004] It is necessary for the queue_priority_hint to be lower than the next request submission upon waking up, as we rely on the hint to decide when to kick the tasklet to submit that first request. Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Closes: https://gitlab.freedesktop.org/drm/intel/issues/10154 Signed-off-by: Chris Wilson Signed-off-by: Janusz Krzysztofik Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240318135906.716055-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 98850e96cf811dc2d0a7d0af491caff9f5d49c1e) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 3 --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b0a4a2dbe3ee9..feb0fc32a19ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -253,9 +253,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index f903ee1ce06e7..eae138b9f2df3 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3270,6 +3270,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq) -- GitLab From fa2b938438cd217a9f937f0e12bf04470930a9ac Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 26 Mar 2024 12:43:17 -0400 Subject: [PATCH 1414/2290] Bluetooth: hci_sync: Fix not checking error on hci_cmd_sync_cancel_sync commit 1c3366abdbe884be62e5a7502b4db758aa3974c6 upstream. hci_cmd_sync_cancel_sync shall check the error passed to it since it will be propagated using req_result which is __u32 it needs to be properly set to a positive value if it was passed as negative othertise IS_ERR will not trigger as -(errno) would be converted to a positive value. Fixes: 63298d6e752f ("Bluetooth: hci_core: Cancel request on command timeout") Signed-off-by: Luiz Augusto von Dentz Reported-and-tested-by: Thorsten Leemhuis Closes: https://lore.kernel.org/all/08275279-7462-4f4a-a0ee-8aa015f829bc@leemhuis.info/ Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_core.c | 6 +++--- net/bluetooth/hci_sync.c | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 70f24dc75b596..02e67ff05b7b4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2838,7 +2838,7 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) cancel_delayed_work_sync(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); - hci_cmd_sync_cancel_sync(hdev, -err); + hci_cmd_sync_cancel_sync(hdev, err); } /* Suspend HCI device */ @@ -2858,7 +2858,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - hci_cancel_cmd_sync(hdev, -EHOSTDOWN); + hci_cancel_cmd_sync(hdev, EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4169,7 +4169,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) err = hci_send_frame(hdev, skb); if (err < 0) { - hci_cmd_sync_cancel_sync(hdev, err); + hci_cmd_sync_cancel_sync(hdev, -err); return; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 65b2ad34179f8..7e64cf880f9f1 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -678,7 +678,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = err; + /* req_result is __u32 so error must be positive to be properly + * propagated. + */ + hdev->req_result = err < 0 ? -err : err; hdev->req_status = HCI_REQ_CANCELED; wake_up_interruptible(&hdev->req_wait_q); -- GitLab From 181f92abda617d406ceac7f9df1cc45152ca329b Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 14 Mar 2024 10:26:27 +0100 Subject: [PATCH 1415/2290] Revert "usb: phy: generic: Get the vbus supply" commit fdada0db0b2ae2addef4ccafe50937874dbeeebe upstream. This reverts commit 75fd6485cccef269ac9eb3b71cf56753341195ef. This patch was applied twice by accident, causing probe failures. Revert the accident. Signed-off-by: Alexander Stein Fixes: 75fd6485ccce ("usb: phy: generic: Get the vbus supply") Cc: stable Reviewed-by: Sean Anderson Link: https://lore.kernel.org/r/20240314092628.1869414-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-generic.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 953df04b40d40..3dc5c04e7cbf9 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -265,13 +265,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return -EPROBE_DEFER; } - nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); - if (PTR_ERR(nop->vbus_draw) == -ENODEV) - nop->vbus_draw = NULL; - if (IS_ERR(nop->vbus_draw)) - return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), - "could not get vbus regulator\n"); - nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); if (PTR_ERR(nop->vbus_draw) == -ENODEV) nop->vbus_draw = NULL; -- GitLab From 916cd2fcbc1e344bcabf4b2a834cdf5a0417d30c Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Mar 2024 12:50:48 +0100 Subject: [PATCH 1416/2290] usb: cdc-wdm: close race between read and workqueue commit 339f83612f3a569b194680768b22bf113c26a29d upstream. wdm_read() cannot race with itself. However, in service_outstanding_interrupt() it can race with the workqueue, which can be triggered by error handling. Hence we need to make sure that the WDM_RESPONDING flag is not just only set but tested. Fixes: afba937e540c9 ("USB: CDC WDM driver") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20240314115132.3907-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 1f0951be15ab7..fdc1a66b129a4 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -485,6 +485,7 @@ out_free_mem: static int service_outstanding_interrupt(struct wdm_device *desc) { int rv = 0; + int used; /* submit read urb only if the device is waiting for it */ if (!desc->resp_count || !--desc->resp_count) @@ -499,7 +500,10 @@ static int service_outstanding_interrupt(struct wdm_device *desc) goto out; } - set_bit(WDM_RESPONDING, &desc->flags); + used = test_and_set_bit(WDM_RESPONDING, &desc->flags); + if (used) + goto out; + spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); -- GitLab From c9006b90ba6876c87cffc70d604549ab5c51749a Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Thu, 7 Mar 2024 02:08:14 +0800 Subject: [PATCH 1417/2290] USB: UAS: return ENODEV when submit urbs fail with device not attached commit cd5432c712351a3d5f82512908f5febfca946ca6 upstream. In the scenario of entering hibernation with udisk in the system, if the udisk was gone or resume fail in the thaw phase of hibernation. Its state will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed and can't not handle disconnect event. Next, in the poweroff phase of hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk when poweroff this scsi device, which will cause uas_submit_urbs to be called to submit URB for sense/data/cmd pipe. However, these URBs will submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead the SCSI layer go into an ugly loop and system fail to go into hibernation. On the other hand, when we specially check for -ENODEV in function uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device poweroff fail and system shutdown instead of entering hibernation. To fix this issue, let uas_submit_urbs to return original generic error when submitting URB failed. At the same time, we need to translate -ENODEV to DID_NOT_CONNECT for the SCSI layer. Suggested-by: Oliver Neukum Cc: stable@vger.kernel.org Signed-off-by: Weitao Wang Link: https://lore.kernel.org/r/20240306180814.4897-1-WeitaoWang-oc@zhaoxin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index ed22053b3252f..af619efe8eabf 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp, * daft to me. */ -static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) +static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) { struct uas_dev_info *devinfo = cmnd->device->hostdata; struct urb *urb; @@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) urb = uas_alloc_sense_urb(devinfo, gfp, cmnd); if (!urb) - return NULL; + return -ENOMEM; usb_anchor_urb(urb, &devinfo->sense_urbs); err = usb_submit_urb(urb, gfp); if (err) { usb_unanchor_urb(urb); uas_log_cmd_state(cmnd, "sense submit err", err); usb_free_urb(urb); - return NULL; } - return urb; + return err; } static int uas_submit_urbs(struct scsi_cmnd *cmnd, struct uas_dev_info *devinfo) { struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); - struct urb *urb; int err; lockdep_assert_held(&devinfo->lock); if (cmdinfo->state & SUBMIT_STATUS_URB) { - urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC); - if (!urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + err = uas_submit_sense_urb(cmnd, GFP_ATOMIC); + if (err) + return err; cmdinfo->state &= ~SUBMIT_STATUS_URB; } @@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_FROM_DEVICE); if (!cmdinfo->data_in_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_IN_URB; } @@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_in_urb); uas_log_cmd_state(cmnd, "data in submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_IN_URB; cmdinfo->state |= DATA_IN_URB_INFLIGHT; @@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_TO_DEVICE); if (!cmdinfo->data_out_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_OUT_URB; } @@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_out_urb); uas_log_cmd_state(cmnd, "data out submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_OUT_URB; cmdinfo->state |= DATA_OUT_URB_INFLIGHT; @@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (cmdinfo->state & ALLOC_CMD_URB) { cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd); if (!cmdinfo->cmd_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_CMD_URB; } @@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->cmd_urb); uas_log_cmd_state(cmnd, "cmd submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->cmd_urb = NULL; cmdinfo->state &= ~SUBMIT_CMD_URB; @@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd) * of queueing, no matter how fatal the error */ if (err == -ENODEV) { - set_host_byte(cmnd, DID_ERROR); + set_host_byte(cmnd, DID_NO_CONNECT); scsi_done(cmnd); goto zombie; } -- GitLab From aa1d1ce1ad6b5ec94f91c51c0e5c161444825875 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Sat, 15 Jul 2023 16:07:26 +0200 Subject: [PATCH 1418/2290] usb: dwc3-am62: Rename private data [ Upstream commit 3609699c32aa4f2710a6fe2b21afc6a9a3c66bc5 ] Rename dwc3_data to dwc3_am62 to make it consistent with other glue drivers, it's clearer that this is am62's specific. While there, do the same for data variable. Signed-off-by: Ladislav Michl Link: https://lore.kernel.org/r/ZLKoHhJvT+Y6aM+C@lenoch Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 6661befe4100 ("usb: dwc3-am62: fix module unload/reload behavior") Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-am62.c | 80 ++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-am62.c b/drivers/usb/dwc3/dwc3-am62.c index 173cf3579c55d..726f96d257c8d 100644 --- a/drivers/usb/dwc3/dwc3-am62.c +++ b/drivers/usb/dwc3/dwc3-am62.c @@ -89,7 +89,7 @@ #define DWC3_AM62_AUTOSUSPEND_DELAY 100 -struct dwc3_data { +struct dwc3_am62 { struct device *dev; void __iomem *usbss; struct clk *usb2_refclk; @@ -115,19 +115,19 @@ static const int dwc3_ti_rate_table[] = { /* in KHZ */ 52000, }; -static inline u32 dwc3_ti_readl(struct dwc3_data *data, u32 offset) +static inline u32 dwc3_ti_readl(struct dwc3_am62 *am62, u32 offset) { - return readl((data->usbss) + offset); + return readl((am62->usbss) + offset); } -static inline void dwc3_ti_writel(struct dwc3_data *data, u32 offset, u32 value) +static inline void dwc3_ti_writel(struct dwc3_am62 *am62, u32 offset, u32 value) { - writel(value, (data->usbss) + offset); + writel(value, (am62->usbss) + offset); } -static int phy_syscon_pll_refclk(struct dwc3_data *data) +static int phy_syscon_pll_refclk(struct dwc3_am62 *am62) { - struct device *dev = data->dev; + struct device *dev = am62->dev; struct device_node *node = dev->of_node; struct of_phandle_args args; struct regmap *syscon; @@ -139,16 +139,16 @@ static int phy_syscon_pll_refclk(struct dwc3_data *data) return PTR_ERR(syscon); } - data->syscon = syscon; + am62->syscon = syscon; ret = of_parse_phandle_with_fixed_args(node, "ti,syscon-phy-pll-refclk", 1, 0, &args); if (ret) return ret; - data->offset = args.args[0]; + am62->offset = args.args[0]; - ret = regmap_update_bits(data->syscon, data->offset, PHY_PLL_REFCLK_MASK, data->rate_code); + ret = regmap_update_bits(am62->syscon, am62->offset, PHY_PLL_REFCLK_MASK, am62->rate_code); if (ret) { dev_err(dev, "failed to set phy pll reference clock rate\n"); return ret; @@ -161,32 +161,32 @@ static int dwc3_ti_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *node = pdev->dev.of_node; - struct dwc3_data *data; + struct dwc3_am62 *am62; int i, ret; unsigned long rate; u32 reg; - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); - if (!data) + am62 = devm_kzalloc(dev, sizeof(*am62), GFP_KERNEL); + if (!am62) return -ENOMEM; - data->dev = dev; - platform_set_drvdata(pdev, data); + am62->dev = dev; + platform_set_drvdata(pdev, am62); - data->usbss = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(data->usbss)) { + am62->usbss = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(am62->usbss)) { dev_err(dev, "can't map IOMEM resource\n"); - return PTR_ERR(data->usbss); + return PTR_ERR(am62->usbss); } - data->usb2_refclk = devm_clk_get(dev, "ref"); - if (IS_ERR(data->usb2_refclk)) { + am62->usb2_refclk = devm_clk_get(dev, "ref"); + if (IS_ERR(am62->usb2_refclk)) { dev_err(dev, "can't get usb2_refclk\n"); - return PTR_ERR(data->usb2_refclk); + return PTR_ERR(am62->usb2_refclk); } /* Calculate the rate code */ - rate = clk_get_rate(data->usb2_refclk); + rate = clk_get_rate(am62->usb2_refclk); rate /= 1000; // To KHz for (i = 0; i < ARRAY_SIZE(dwc3_ti_rate_table); i++) { if (dwc3_ti_rate_table[i] == rate) @@ -198,20 +198,20 @@ static int dwc3_ti_probe(struct platform_device *pdev) return -EINVAL; } - data->rate_code = i; + am62->rate_code = i; /* Read the syscon property and set the rate code */ - ret = phy_syscon_pll_refclk(data); + ret = phy_syscon_pll_refclk(am62); if (ret) return ret; /* VBUS divider select */ - data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); - reg = dwc3_ti_readl(data, USBSS_PHY_CONFIG); - if (data->vbus_divider) + am62->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); + reg = dwc3_ti_readl(am62, USBSS_PHY_CONFIG); + if (am62->vbus_divider) reg |= 1 << USBSS_PHY_VBUS_SEL_SHIFT; - dwc3_ti_writel(data, USBSS_PHY_CONFIG, reg); + dwc3_ti_writel(am62, USBSS_PHY_CONFIG, reg); pm_runtime_set_active(dev); pm_runtime_enable(dev); @@ -219,7 +219,7 @@ static int dwc3_ti_probe(struct platform_device *pdev) * Don't ignore its dependencies with its children */ pm_suspend_ignore_children(dev, false); - clk_prepare_enable(data->usb2_refclk); + clk_prepare_enable(am62->usb2_refclk); pm_runtime_get_noresume(dev); ret = of_platform_populate(node, NULL, NULL, dev); @@ -229,9 +229,9 @@ static int dwc3_ti_probe(struct platform_device *pdev) } /* Set mode valid bit to indicate role is valid */ - reg = dwc3_ti_readl(data, USBSS_MODE_CONTROL); + reg = dwc3_ti_readl(am62, USBSS_MODE_CONTROL); reg |= USBSS_MODE_VALID; - dwc3_ti_writel(data, USBSS_MODE_CONTROL, reg); + dwc3_ti_writel(am62, USBSS_MODE_CONTROL, reg); /* Setting up autosuspend */ pm_runtime_set_autosuspend_delay(dev, DWC3_AM62_AUTOSUSPEND_DELAY); @@ -241,7 +241,7 @@ static int dwc3_ti_probe(struct platform_device *pdev) return 0; err_pm_disable: - clk_disable_unprepare(data->usb2_refclk); + clk_disable_unprepare(am62->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); return ret; @@ -258,18 +258,18 @@ static int dwc3_ti_remove_core(struct device *dev, void *c) static int dwc3_ti_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct dwc3_data *data = platform_get_drvdata(pdev); + struct dwc3_am62 *am62 = platform_get_drvdata(pdev); u32 reg; device_for_each_child(dev, NULL, dwc3_ti_remove_core); /* Clear mode valid bit */ - reg = dwc3_ti_readl(data, USBSS_MODE_CONTROL); + reg = dwc3_ti_readl(am62, USBSS_MODE_CONTROL); reg &= ~USBSS_MODE_VALID; - dwc3_ti_writel(data, USBSS_MODE_CONTROL, reg); + dwc3_ti_writel(am62, USBSS_MODE_CONTROL, reg); pm_runtime_put_sync(dev); - clk_disable_unprepare(data->usb2_refclk); + clk_disable_unprepare(am62->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); @@ -280,18 +280,18 @@ static int dwc3_ti_remove(struct platform_device *pdev) #ifdef CONFIG_PM static int dwc3_ti_suspend_common(struct device *dev) { - struct dwc3_data *data = dev_get_drvdata(dev); + struct dwc3_am62 *am62 = dev_get_drvdata(dev); - clk_disable_unprepare(data->usb2_refclk); + clk_disable_unprepare(am62->usb2_refclk); return 0; } static int dwc3_ti_resume_common(struct device *dev) { - struct dwc3_data *data = dev_get_drvdata(dev); + struct dwc3_am62 *am62 = dev_get_drvdata(dev); - clk_prepare_enable(data->usb2_refclk); + clk_prepare_enable(am62->usb2_refclk); return 0; } -- GitLab From 6c6a45645a2e6a272dfde14eddbb6706de63c25d Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 27 Feb 2024 11:23:48 +0200 Subject: [PATCH 1419/2290] usb: dwc3-am62: fix module unload/reload behavior [ Upstream commit 6661befe41009c210efa2c1bcd16a5cc4cff8a06 ] As runtime PM is enabled, the module can be runtime suspended when .remove() is called. Do a pm_runtime_get_sync() to make sure module is active before doing any register operations. Doing a pm_runtime_put_sync() should disable the refclk so no need to disable it again. Fixes the below warning at module removel. [ 39.705310] ------------[ cut here ]------------ [ 39.710004] clk:162:3 already disabled [ 39.713941] WARNING: CPU: 0 PID: 921 at drivers/clk/clk.c:1090 clk_core_disable+0xb0/0xb8 We called of_platform_populate() in .probe() so call the cleanup function of_platform_depopulate() in .remove(). Get rid of the now unnnecessary dwc3_ti_remove_core(). Without this, module re-load doesn't work properly. Fixes: e8784c0aec03 ("drivers: usb: dwc3: Add AM62 USB wrapper driver") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Roger Quadros Link: https://lore.kernel.org/r/20240227-for-v6-9-am62-usb-errata-3-0-v4-1-0ada8ddb0767@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-am62.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-am62.c b/drivers/usb/dwc3/dwc3-am62.c index 726f96d257c8d..ad8a2eadb472b 100644 --- a/drivers/usb/dwc3/dwc3-am62.c +++ b/drivers/usb/dwc3/dwc3-am62.c @@ -247,21 +247,14 @@ err_pm_disable: return ret; } -static int dwc3_ti_remove_core(struct device *dev, void *c) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - return 0; -} - static int dwc3_ti_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct dwc3_am62 *am62 = platform_get_drvdata(pdev); u32 reg; - device_for_each_child(dev, NULL, dwc3_ti_remove_core); + pm_runtime_get_sync(dev); + of_platform_depopulate(dev); /* Clear mode valid bit */ reg = dwc3_ti_readl(am62, USBSS_MODE_CONTROL); @@ -269,7 +262,6 @@ static int dwc3_ti_remove(struct platform_device *pdev) dwc3_ti_writel(am62, USBSS_MODE_CONTROL, reg); pm_runtime_put_sync(dev); - clk_disable_unprepare(am62->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); -- GitLab From 9d66ae0e7bb78b54e1e0525456c6b54e1d132046 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 26 Mar 2024 17:42:38 +0800 Subject: [PATCH 1420/2290] ALSA: sh: aica: reorder cleanup operations to avoid UAF bugs commit 051e0840ffa8ab25554d6b14b62c9ab9e4901457 upstream. The dreamcastcard->timer could schedule the spu_dma_work and the spu_dma_work could also arm the dreamcastcard->timer. When the snd_pcm_substream is closing, the aica_channel will be deallocated. But it could still be dereferenced in the worker thread. The reason is that del_timer() will return directly regardless of whether the timer handler is running or not and the worker could be rescheduled in the timer handler. As a result, the UAF bug will happen. The racy situation is shown below: (Thread 1) | (Thread 2) snd_aicapcm_pcm_close() | ... | run_spu_dma() //worker | mod_timer() flush_work() | del_timer() | aica_period_elapsed() //timer kfree(dreamcastcard->channel) | schedule_work() | run_spu_dma() //worker ... | dreamcastcard->channel-> //USE In order to mitigate this bug and other possible corner cases, call mod_timer() conditionally in run_spu_dma(), then implement PCM sync_stop op to cancel both the timer and worker. The sync_stop op will be called from PCM core appropriately when needed. Fixes: 198de43d758c ("[ALSA] Add ALSA support for the SEGA Dreamcast PCM device") Suggested-by: Takashi Iwai Signed-off-by: Duoming Zhou Message-ID: <20240326094238.95442-1-duoming@zju.edu.cn> Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/sh/aica.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/sound/sh/aica.c b/sound/sh/aica.c index 6e9d6bd67369a..8b47bfcd90318 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -278,7 +278,8 @@ static void run_spu_dma(struct work_struct *work) dreamcastcard->clicks++; if (unlikely(dreamcastcard->clicks >= AICA_PERIOD_NUMBER)) dreamcastcard->clicks %= AICA_PERIOD_NUMBER; - mod_timer(&dreamcastcard->timer, jiffies + 1); + if (snd_pcm_running(dreamcastcard->substream)) + mod_timer(&dreamcastcard->timer, jiffies + 1); } } @@ -290,6 +291,8 @@ static void aica_period_elapsed(struct timer_list *t) /*timer function - so cannot sleep */ int play_period; struct snd_pcm_runtime *runtime; + if (!snd_pcm_running(substream)) + return; runtime = substream->runtime; dreamcastcard = substream->pcm->private_data; /* Have we played out an additional period? */ @@ -350,12 +353,19 @@ static int snd_aicapcm_pcm_open(struct snd_pcm_substream return 0; } +static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct snd_card_aica *dreamcastcard = substream->pcm->private_data; + + del_timer_sync(&dreamcastcard->timer); + cancel_work_sync(&dreamcastcard->spu_dma_work); + return 0; +} + static int snd_aicapcm_pcm_close(struct snd_pcm_substream *substream) { struct snd_card_aica *dreamcastcard = substream->pcm->private_data; - flush_work(&(dreamcastcard->spu_dma_work)); - del_timer(&dreamcastcard->timer); dreamcastcard->substream = NULL; kfree(dreamcastcard->channel); spu_disable(); @@ -401,6 +411,7 @@ static const struct snd_pcm_ops snd_aicapcm_playback_ops = { .prepare = snd_aicapcm_pcm_prepare, .trigger = snd_aicapcm_pcm_trigger, .pointer = snd_aicapcm_pcm_pointer, + .sync_stop = snd_aicapcm_pcm_sync_stop, }; /* TO DO: set up to handle more than one pcm instance */ -- GitLab From 3678cf67ff7136db1dd3bf63c361650db5d92889 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 13 Mar 2024 08:21:20 -0300 Subject: [PATCH 1421/2290] scsi: core: Fix unremoved procfs host directory regression commit f23a4d6e07570826fe95023ca1aa96a011fa9f84 upstream. Commit fc663711b944 ("scsi: core: Remove the /proc/scsi/${proc_name} directory earlier") fixed a bug related to modules loading/unloading, by adding a call to scsi_proc_hostdir_rm() on scsi_remove_host(). But that led to a potential duplicate call to the hostdir_rm() routine, since it's also called from scsi_host_dev_release(). That triggered a regression report, which was then fixed by commit be03df3d4bfe ("scsi: core: Fix a procfs host directory removal regression"). The fix just dropped the hostdir_rm() call from dev_release(). But it happens that this proc directory is created on scsi_host_alloc(), and that function "pairs" with scsi_host_dev_release(), while scsi_remove_host() pairs with scsi_add_host(). In other words, it seems the reason for removing the proc directory on dev_release() was meant to cover cases in which a SCSI host structure was allocated, but the call to scsi_add_host() didn't happen. And that pattern happens to exist in some error paths, for example. Syzkaller causes that by using USB raw gadget device, error'ing on usb-storage driver, at usb_stor_probe2(). By checking that path, we can see that the BadDevice label leads to a scsi_host_put() after a SCSI host allocation, but there's no call to scsi_add_host() in such path. That leads to messages like this in dmesg (and a leak of the SCSI host proc structure): usb-storage 4-1:87.51: USB Mass Storage device detected proc_dir_entry 'scsi/usb-storage' already registered WARNING: CPU: 1 PID: 3519 at fs/proc/generic.c:377 proc_register+0x347/0x4e0 fs/proc/generic.c:376 The proper fix seems to still call scsi_proc_hostdir_rm() on dev_release(), but guard that with the state check for SHOST_CREATED; there is even a comment in scsi_host_dev_release() detailing that: such conditional is meant for cases where the SCSI host was allocated but there was no calls to {add,remove}_host(), like the usb-storage case. This is what we propose here and with that, the error path of usb-storage does not trigger the warning anymore. Reported-by: syzbot+c645abf505ed21f931b5@syzkaller.appspotmail.com Fixes: be03df3d4bfe ("scsi: core: Fix a procfs host directory removal regression") Cc: stable@vger.kernel.org Cc: Bart Van Assche Cc: John Garry Cc: Shin'ichiro Kawasaki Signed-off-by: Guilherme G. Piccoli Link: https://lore.kernel.org/r/20240313113006.2834799-1-gpiccoli@igalia.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hosts.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 8b825364baade..c785493b105c0 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -353,12 +353,13 @@ static void scsi_host_dev_release(struct device *dev) if (shost->shost_state == SHOST_CREATED) { /* - * Free the shost_dev device name here if scsi_host_alloc() - * and scsi_host_put() have been called but neither + * Free the shost_dev device name and remove the proc host dir + * here if scsi_host_{alloc,put}() have been called but neither * scsi_host_add() nor scsi_host_remove() has been called. * This avoids that the memory allocated for the shost_dev - * name is leaked. + * name as well as the proc dir structure are leaked. */ + scsi_proc_hostdir_rm(shost->hostt); kfree(dev_name(&shost->shost_dev)); } -- GitLab From 008bf3d622a9845f7f3363579b7f6851607996cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Mar 2024 17:36:56 +0100 Subject: [PATCH 1422/2290] staging: vc04_services: changen strncpy() to strscpy_pad() commit ef25725b7f8aaffd7756974d3246ec44fae0a5cf upstream. gcc-14 warns about this strncpy() that results in a non-terminated string for an overflow: In file included from include/linux/string.h:369, from drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c:20: In function 'strncpy', inlined from 'create_component' at drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c:940:2: include/linux/fortify-string.h:108:33: error: '__builtin_strncpy' specified bound 128 equals destination size [-Werror=stringop-truncation] Change it to strscpy_pad(), which produces a properly terminated and zero-padded string. Signed-off-by: Arnd Bergmann Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20240313163712.224585-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index cb921c94996a1..bda791ed8d5a4 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -938,8 +938,8 @@ static int create_component(struct vchiq_mmal_instance *instance, /* build component create message */ m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE; m.u.component_create.client_component = component->client_component; - strncpy(m.u.component_create.name, name, - sizeof(m.u.component_create.name)); + strscpy_pad(m.u.component_create.name, name, + sizeof(m.u.component_create.name)); ret = send_synchronous_mmal_msg(instance, &m, sizeof(m.u.component_create), -- GitLab From 8416da2df7e0d5556da6f92e33ac9e498ca89aa3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Mar 2024 21:07:43 +0300 Subject: [PATCH 1423/2290] staging: vc04_services: fix information leak in create_component() commit f37e76abd614b68987abc8e5c22d986013349771 upstream. The m.u.component_create.pid field is for debugging and in the mainline kernel it's not used anything. However, it still needs to be set to something to prevent disclosing uninitialized stack data. Set it to zero. Fixes: 7b3ad5abf027 ("staging: Import the BCM2835 MMAL-based V4L2 camera driver.") Cc: stable Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/2d972847-9ebd-481b-b6f9-af390f5aabd3@moroto.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index bda791ed8d5a4..90eb4c5936f38 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -940,6 +940,7 @@ static int create_component(struct vchiq_mmal_instance *instance, m.u.component_create.client_component = component->client_component; strscpy_pad(m.u.component_create.name, name, sizeof(m.u.component_create.name)); + m.u.component_create.pid = 0; ret = send_synchronous_mmal_msg(instance, &m, sizeof(m.u.component_create), -- GitLab From cd20a6e83ba019ac91f3a8ed8a68d3385779a3d0 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 15 Mar 2024 13:04:50 -0400 Subject: [PATCH 1424/2290] USB: core: Add hub_get() and hub_put() routines commit ee113b860aa169e9a4d2c167c95d0f1961c6e1b8 upstream. Create hub_get() and hub_put() routines to encapsulate the kref_get() and kref_put() calls in hub.c. The new routines will be used by the next patch in this series. Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/604da420-ae8a-4a9e-91a4-2d511ff404fb@rowland.harvard.edu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 23 ++++++++++++++++------- drivers/usb/core/hub.h | 2 ++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index d960a56b760ec..b1fb04e5247c3 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -123,7 +123,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 -static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, @@ -685,14 +684,14 @@ static void kick_hub_wq(struct usb_hub *hub) */ intf = to_usb_interface(hub->intfdev); usb_autopm_get_interface_no_resume(intf); - kref_get(&hub->kref); + hub_get(hub); if (queue_work(hub_wq, &hub->events)) return; /* the work has already been scheduled */ usb_autopm_put_interface_async(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } void usb_kick_hub_wq(struct usb_device *hdev) @@ -1060,7 +1059,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } - kref_get(&hub->kref); + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1308,7 +1307,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } - kref_put(&hub->kref, hub_release); + hub_put(hub); } /* Implement the continuations for the delays above */ @@ -1724,6 +1723,16 @@ static void hub_release(struct kref *kref) kfree(hub); } +void hub_get(struct usb_hub *hub) +{ + kref_get(&hub->kref); +} + +void hub_put(struct usb_hub *hub) +{ + kref_put(&hub->kref, hub_release); +} + static unsigned highspeed_hubs; static void hub_disconnect(struct usb_interface *intf) @@ -1772,7 +1781,7 @@ static void hub_disconnect(struct usb_interface *intf) onboard_hub_destroy_pdevs(&hub->onboard_hub_devs); - kref_put(&hub->kref, hub_release); + hub_put(hub); } static bool hub_descriptor_is_sane(struct usb_host_interface *desc) @@ -5874,7 +5883,7 @@ out_hdev_lock: /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); kcov_remote_stop(); } diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index bc66205ca52c3..1085c72335d5c 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -118,6 +118,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub, extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub, int port1, bool set); extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev); +extern void hub_get(struct usb_hub *hub); +extern void hub_put(struct usb_hub *hub); extern int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected); extern int usb_clear_port_feature(struct usb_device *hdev, -- GitLab From 9dac54f08198147f5ec0ec52fcf1bc8ac899ac05 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 15 Mar 2024 13:06:33 -0400 Subject: [PATCH 1425/2290] USB: core: Fix deadlock in port "disable" sysfs attribute commit f4d1960764d8a70318b02f15203a1be2b2554ca1 upstream. The show and store callback routines for the "disable" sysfs attribute file in port.c acquire the device lock for the port's parent hub device. This can cause problems if another process has locked the hub to remove it or change its configuration: Removing the hub or changing its configuration requires the hub interface to be removed, which requires the port device to be removed, and device_del() waits until all outstanding sysfs attribute callbacks for the ports have returned. The lock can't be released until then. But the disable_show() or disable_store() routine can't return until after it has acquired the lock. The resulting deadlock can be avoided by calling sysfs_break_active_protection(). This will cause the sysfs core not to wait for the attribute's callback routine to return, allowing the removal to proceed. The disadvantage is that after making this call, there is no guarantee that the hub structure won't be deallocated at any moment. To prevent this, we have to acquire a reference to it first by calling hub_get(). Signed-off-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/f7a8c135-a495-4ce6-bd49-405a45e7ea9a@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 17aef216cb501..e91fa567d08d2 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -28,11 +28,22 @@ static ssize_t disable_show(struct device *dev, u16 portstatus, unused; bool disabled; int rc; + struct kernfs_node *kn; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -42,9 +53,13 @@ static ssize_t disable_show(struct device *dev, usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); if (rc) return rc; @@ -62,15 +77,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, int port1 = port_dev->portnum; bool disabled; int rc; + struct kernfs_node *kn; rc = strtobool(buf, &disabled); if (rc) return rc; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -91,9 +117,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, if (!rc) rc = count; -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); return rc; } -- GitLab From 3e284e15b7f05ed1e74ebcdc5d9db6b6e78fcb17 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 19 Mar 2024 16:12:09 +0900 Subject: [PATCH 1426/2290] scsi: sd: Fix TCG OPAL unlock on system resume commit 0c76106cb97548810214def8ee22700bbbb90543 upstream. Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") introduced the manage_system_start_stop scsi_device flag to allow libata to indicate to the SCSI disk driver that nothing should be done when resuming a disk on system resume. This change turned the execution of sd_resume() into a no-op for ATA devices on system resume. While this solved deadlock issues during device resume, this change also wrongly removed the execution of opal_unlock_from_suspend(). As a result, devices with TCG OPAL locking enabled remain locked and inaccessible after a system resume from sleep. To fix this issue, introduce the SCSI driver resume method and implement it with the sd_resume() function calling opal_unlock_from_suspend(). The former sd_resume() function is renamed to sd_resume_common() and modified to call the new sd_resume() function. For non-ATA devices, this result in no functional changes. In order for libata to explicitly execute sd_resume() when a device is resumed during system restart, the function scsi_resume_device() is introduced. libata calls this function from the revalidation work executed on devie resume, a state that is indicated with the new device flag ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are unlocked on resume, allowing normal operation. Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538 Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240319071209.1179257-1-dlemoal@kernel.org Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-eh.c | 5 ++++- drivers/ata/libata-scsi.c | 9 +++++++++ drivers/scsi/scsi_scan.c | 34 ++++++++++++++++++++++++++++++++++ drivers/scsi/sd.c | 25 +++++++++++++++++++++---- include/linux/libata.h | 1 + include/scsi/scsi_driver.h | 1 + include/scsi/scsi_host.h | 1 + 7 files changed, 71 insertions(+), 5 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 1eaaf01418ea7..b8034d194078d 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -711,8 +711,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_ncq_enabled |= 1 << devno; /* If we are resuming, wake up the device */ - if (ap->pflags & ATA_PFLAG_RESUMING) + if (ap->pflags & ATA_PFLAG_RESUMING) { + dev->flags |= ATA_DFLAG_RESUMING; ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; + } } } @@ -3089,6 +3091,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, return 0; err: + dev->flags &= ~ATA_DFLAG_RESUMING; *r_failed_dev = dev; return rc; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a9da2f05e6297..a09548630fc8b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4652,6 +4652,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool do_resume; int ret = 0; mutex_lock(&ap->scsi_scan_mutex); @@ -4673,7 +4674,15 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (scsi_device_get(sdev)) continue; + do_resume = dev->flags & ATA_DFLAG_RESUMING; + spin_unlock_irqrestore(ap->lock, flags); + if (do_resume) { + ret = scsi_resume_device(sdev); + if (ret == -EWOULDBLOCK) + goto unlock; + dev->flags &= ~ATA_DFLAG_RESUMING; + } ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index bab00b65bc9d1..852d509b19b2b 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1611,6 +1611,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); +int scsi_resume_device(struct scsi_device *sdev) +{ + struct device *dev = &sdev->sdev_gendev; + int ret = 0; + + device_lock(dev); + + /* + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { + ret = -EWOULDBLOCK; + goto unlock; + } + + if (dev->driver && try_module_get(dev->driver->owner)) { + struct scsi_driver *drv = to_scsi_driver(dev->driver); + + if (drv->resume) + ret = drv->resume(dev); + module_put(dev->driver->owner); + } + +unlock: + device_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(scsi_resume_device); + int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4433b02c8935f..c793bca882236 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -110,6 +110,7 @@ static int sd_suspend_system(struct device *); static int sd_suspend_runtime(struct device *); static int sd_resume_system(struct device *); static int sd_resume_runtime(struct device *); +static int sd_resume(struct device *); static void sd_rescan(struct device *); static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt); static void sd_uninit_command(struct scsi_cmnd *SCpnt); @@ -691,6 +692,7 @@ static struct scsi_driver sd_template = { .pm = &sd_pm_ops, }, .rescan = sd_rescan, + .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, @@ -3830,7 +3832,22 @@ static int sd_suspend_runtime(struct device *dev) return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev, bool runtime) +static int sd_resume(struct device *dev) +{ + struct scsi_disk *sdkp = dev_get_drvdata(dev); + + if (sdkp->device->no_start_on_resume) + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + + if (opal_unlock_from_suspend(sdkp->opal_dev)) { + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); + return -EIO; + } + + return 0; +} + +static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret = 0; @@ -3849,7 +3866,7 @@ static int sd_resume(struct device *dev, bool runtime) } if (!ret) { - opal_unlock_from_suspend(sdkp->opal_dev); + sd_resume(dev); sdkp->suspended = false; } @@ -3868,7 +3885,7 @@ static int sd_resume_system(struct device *dev) return 0; } - return sd_resume(dev, false); + return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -3892,7 +3909,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev, true); + return sd_resume_common(dev, true); } /** diff --git a/include/linux/libata.h b/include/linux/libata.h index 45910aebc3778..6645259be1438 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -102,6 +102,7 @@ enum { ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 4ce1988b2ba01..f40915d2eceef 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -12,6 +12,7 @@ struct request; struct scsi_driver { struct device_driver gendrv; + int (*resume)(struct device *); void (*rescan)(struct device *); blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 71def41b1ad78..149b63e3534ad 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -752,6 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, struct device *, struct device *); extern void scsi_scan_host(struct Scsi_Host *); +extern int scsi_resume_device(struct scsi_device *sdev); extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); -- GitLab From 4a8a42e16a5e8b376d7aa8a2d732f3e21512003c Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:21:21 +0000 Subject: [PATCH 1427/2290] usb: dwc2: host: Fix remote wakeup from hibernation commit bae2bc73a59c200db53b6c15fb26bb758e2c6108 upstream. Starting from core v4.30a changed order of programming GPWRDN_PMUACTV to 0 in case of exit from hibernation on remote wakeup signaling from device. Fixes: c5c403dc4336 ("usb: dwc2: Add host/device hibernation functions") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/99385ec55ce73445b6fbd0f471c9bd40eb1c9b9e.1708939799.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 1 + drivers/usb/dwc2/hcd.c | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 40cf2880d7e59..e18d9031c9953 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1084,6 +1084,7 @@ struct dwc2_hsotg { bool needs_byte_swap; /* DWC OTG HW Release versions */ +#define DWC2_CORE_REV_4_30a 0x4f54430a #define DWC2_CORE_REV_2_71a 0x4f54271a #define DWC2_CORE_REV_2_72a 0x4f54272a #define DWC2_CORE_REV_2_80a 0x4f54280a diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 35c7a4df8e717..3b955b314199f 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5610,10 +5610,12 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, dwc2_writel(hsotg, hr->hcfg, HCFG); /* De-assert Wakeup Logic */ - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn &= ~GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } hprt0 = hr->hprt0; hprt0 |= HPRT0_PWR; @@ -5638,6 +5640,13 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, hprt0 |= HPRT0_RES; dwc2_writel(hsotg, hprt0, HPRT0); + /* De-assert Wakeup Logic */ + if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } /* Wait for Resume time and then program HPRT again */ mdelay(100); hprt0 &= ~HPRT0_RES; -- GitLab From 3294928206812c1c6060bc27569566705eb034b6 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:21:11 +0000 Subject: [PATCH 1428/2290] usb: dwc2: host: Fix hibernation flow commit 3c7b9856a82227db01a20171d2e24c7ce305d59b upstream. Added to backup/restore registers HFLBADDR, HCCHARi, HCSPLTi, HCTSIZi, HCDMAi and HCDMABi. Fixes: 58e52ff6a6c3 ("usb: dwc2: Move register save and restore functions") Fixes: d17ee77b3044 ("usb: dwc2: add controller hibernation support") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/c2d10ee6098b9b009a8e94191e046004747d3bdd.1708945444.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 12 ++++++++++++ drivers/usb/dwc2/hcd.c | 18 ++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index e18d9031c9953..8c888b76abdf1 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -729,8 +729,14 @@ struct dwc2_dregs_backup { * struct dwc2_hregs_backup - Holds host registers state before * entering partial power down * @hcfg: Backup of HCFG register + * @hflbaddr: Backup of HFLBADDR register * @haintmsk: Backup of HAINTMSK register + * @hcchar: Backup of HCCHAR register + * @hcsplt: Backup of HCSPLT register * @hcintmsk: Backup of HCINTMSK register + * @hctsiz: Backup of HCTSIZ register + * @hdma: Backup of HCDMA register + * @hcdmab: Backup of HCDMAB register * @hprt0: Backup of HPTR0 register * @hfir: Backup of HFIR register * @hptxfsiz: Backup of HPTXFSIZ register @@ -738,8 +744,14 @@ struct dwc2_dregs_backup { */ struct dwc2_hregs_backup { u32 hcfg; + u32 hflbaddr; u32 haintmsk; + u32 hcchar[MAX_EPS_CHANNELS]; + u32 hcsplt[MAX_EPS_CHANNELS]; u32 hcintmsk[MAX_EPS_CHANNELS]; + u32 hctsiz[MAX_EPS_CHANNELS]; + u32 hcidma[MAX_EPS_CHANNELS]; + u32 hcidmab[MAX_EPS_CHANNELS]; u32 hprt0; u32 hfir; u32 hptxfsiz; diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 3b955b314199f..6de999c7513eb 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5406,9 +5406,16 @@ int dwc2_backup_host_registers(struct dwc2_hsotg *hsotg) /* Backup Host regs */ hr = &hsotg->hr_backup; hr->hcfg = dwc2_readl(hsotg, HCFG); + hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR); hr->haintmsk = dwc2_readl(hsotg, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i)); + hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i)); hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i)); + hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i)); + hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i)); + hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i)); + } hr->hprt0 = dwc2_read_hprt0(hsotg); hr->hfir = dwc2_readl(hsotg, HFIR); @@ -5442,10 +5449,17 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg) hr->valid = false; dwc2_writel(hsotg, hr->hcfg, HCFG); + dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR); dwc2_writel(hsotg, hr->haintmsk, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i)); + dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i)); dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i)); + dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i)); + dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i)); + dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i)); + } dwc2_writel(hsotg, hr->hprt0, HPRT0); dwc2_writel(hsotg, hr->hfir, HFIR); -- GitLab From bc48eb1b53ce977d17d51caa574bd81064a117a2 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:21:32 +0000 Subject: [PATCH 1429/2290] usb: dwc2: host: Fix ISOC flow in DDMA mode commit b258e42688501cadb1a6dd658d6f015df9f32d8f upstream. Fixed ISOC completion flow in DDMA mode. Added isoc descriptor actual length value and update urb's start_frame value. Fixed initialization of ISOC DMA descriptors flow. Fixes: 56f5b1cff22a ("staging: Core files for the DWC2 driver") Fixes: 20f2eb9c4cf8 ("staging: dwc2: add microframe scheduler from downstream Pi kernel") Fixes: c17b337c1ea4 ("usb: dwc2: host: program descriptor for next frame") Fixes: dc4c76e7b22c ("staging: HCD descriptor DMA support for the DWC2 driver") Fixes: 762d3a1a9cd7 ("usb: dwc2: host: process all completed urbs") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/a8b1e1711cc6cabfb45d92ede12e35445c66f06c.1708944698.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd.c | 12 ++++++++++-- drivers/usb/dwc2/hcd_ddma.c | 17 +++++++++++------ drivers/usb/dwc2/hw.h | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 6de999c7513eb..70b389125f635 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -2701,8 +2701,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry); - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the periodic ready schedule to the @@ -2735,8 +2738,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the non-periodic inactive schedule to the @@ -4143,6 +4149,8 @@ void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd, urb->actual_length); if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { + if (!hsotg->params.dma_desc_enable) + urb->start_frame = qtd->qh->start_active_frame; urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb); for (i = 0; i < urb->number_of_packets; ++i) { urb->iso_frame_desc[i].actual_length = diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c index 6b4d825e97a2d..79582b102c7ed 100644 --- a/drivers/usb/dwc2/hcd_ddma.c +++ b/drivers/usb/dwc2/hcd_ddma.c @@ -559,7 +559,7 @@ static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg, idx = qh->td_last; inc = qh->host_interval; hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg); - cur_idx = dwc2_frame_list_idx(hsotg->frame_number); + cur_idx = idx; next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed); /* @@ -866,6 +866,8 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, { struct dwc2_dma_desc *dma_desc; struct dwc2_hcd_iso_packet_desc *frame_desc; + u16 frame_desc_idx; + struct urb *usb_urb = qtd->urb->priv; u16 remain = 0; int rc = 0; @@ -878,8 +880,11 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, DMA_FROM_DEVICE); dma_desc = &qh->desc_list[idx]; + frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1); - frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last]; + frame_desc = &qtd->urb->iso_descs[frame_desc_idx]; + if (idx == qtd->isoc_td_first) + usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg); dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset); if (chan->ep_is_in) remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >> @@ -900,7 +905,7 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, frame_desc->status = 0; } - if (++qtd->isoc_frame_index == qtd->urb->packet_count) { + if (++qtd->isoc_frame_index == usb_urb->number_of_packets) { /* * urb->status is not used for isoc transfers here. The * individual frame_desc status are used instead. @@ -1005,11 +1010,11 @@ static void dwc2_complete_isoc_xfer_ddma(struct dwc2_hsotg *hsotg, return; idx = dwc2_desclist_idx_inc(idx, qh->host_interval, chan->speed); - if (!rc) + if (rc == 0) continue; - if (rc == DWC2_CMPL_DONE) - break; + if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP) + goto stop_scan; /* rc == DWC2_CMPL_STOP */ diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h index 13abdd5f67529..12f8c7f86dc98 100644 --- a/drivers/usb/dwc2/hw.h +++ b/drivers/usb/dwc2/hw.h @@ -698,7 +698,7 @@ #define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) #define TXSTS_QTOP_TOKEN_SHIFT 25 #define TXSTS_QTOP_TERMINATE BIT(24) -#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_MASK (0x7f << 16) #define TXSTS_QSPCAVAIL_SHIFT 16 #define TXSTS_FSPCAVAIL_MASK (0xffff << 0) #define TXSTS_FSPCAVAIL_SHIFT 0 -- GitLab From 74cdf12f8dd8d7e9d9b2cf605ed136df7c48a659 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:22:01 +0000 Subject: [PATCH 1430/2290] usb: dwc2: gadget: Fix exiting from clock gating commit 31f42da31417bec88158f3cf62d19db836217f1e upstream. Added exiting from the clock gating mode on USB Reset Detect interrupt if core in the clock gating mode. Added new condition to check core in clock gating mode or no. Fixes: 9b4965d77e11 ("usb: dwc2: Add exit clock gating from session request interrupt") Fixes: 5d240efddc7f ("usb: dwc2: Add exit clock gating from wakeup interrupt") Fixes: 16c729f90bdf ("usb: dwc2: Allow exit clock gating in urb enqueue") Fixes: 401411bbc4e6 ("usb: dwc2: Add exit clock gating before removing driver") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/cbcc2ccd37e89e339130797ed68ae4597db773ac.1708938774.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core_intr.c | 9 ++++++--- drivers/usb/dwc2/gadget.c | 6 ++++++ drivers/usb/dwc2/hcd.c | 2 +- drivers/usb/dwc2/platform.c | 2 +- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index 158ede7538548..f8426e3d2b19b 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -297,7 +297,8 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } @@ -408,7 +409,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } else { /* Change to L0 state */ @@ -425,7 +427,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) } if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_host_exit_clock_gating(hsotg, 1); /* diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 8b15742d9e8aa..56157b91f5050 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3728,6 +3728,12 @@ irq_retry: if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2) dwc2_exit_partial_power_down(hsotg, 0, true); + /* Exit gadget mode clock gating. */ + if (hsotg->params.power_down == + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) + dwc2_gadget_exit_clock_gating(hsotg, 0); + hsotg->lx_state = DWC2_L0; } diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 70b389125f635..dd5b1c5691e11 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -4657,7 +4657,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, } if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 58f53faab340f..2e4c6884f36a4 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -298,7 +298,7 @@ static int dwc2_driver_remove(struct platform_device *dev) /* Exit clock gating when driver is removed. */ if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else -- GitLab From 19ca7ef7d83974574b0edc8894607d1544f157d6 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 13 Mar 2024 09:22:13 +0000 Subject: [PATCH 1431/2290] usb: dwc2: gadget: LPM flow fix commit 5d69a3b54e5a630c90d82a4c2bdce3d53dc78710 upstream. Added functionality to exit from L1 state by device initiation using remote wakeup signaling, in case when function driver queuing request while core in L1 state. Fixes: 273d576c4d41 ("usb: dwc2: gadget: Add functionality to exit from LPM L1 state") Fixes: 88b02f2cb1e1 ("usb: dwc2: Add core state checking") CC: stable@vger.kernel.org Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/b4d9de5382375dddbf7ef6049d9a82066ad87d5d.1710166393.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 1 + drivers/usb/dwc2/core_intr.c | 63 ++++++++++++++++++++++++------------ drivers/usb/dwc2/gadget.c | 4 +++ 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 8c888b76abdf1..b106c0e0b77ba 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1334,6 +1334,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg); int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg); void dwc2_enable_acg(struct dwc2_hsotg *hsotg); +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup); /* This function should be called on every hardware interrupt. */ irqreturn_t dwc2_handle_common_intr(int irq, void *dev); diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index f8426e3d2b19b..26d752a4c3ca9 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -323,10 +323,11 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) * @hsotg: Programming view of DWC_otg controller * */ -static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup) { u32 glpmcfg; - u32 i = 0; + u32 pcgctl; + u32 dctl; if (hsotg->lx_state != DWC2_L1) { dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n"); @@ -335,37 +336,57 @@ static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) glpmcfg = dwc2_readl(hsotg, GLPMCFG); if (dwc2_is_device_mode(hsotg)) { - dev_dbg(hsotg->dev, "Exit from L1 state\n"); + dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup); glpmcfg &= ~GLPMCFG_ENBLSLPM; - glpmcfg &= ~GLPMCFG_HIRD_THRES_EN; + glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK; dwc2_writel(hsotg, glpmcfg, GLPMCFG); - do { - glpmcfg = dwc2_readl(hsotg, GLPMCFG); + pcgctl = dwc2_readl(hsotg, PCGCTL); + pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING; + dwc2_writel(hsotg, pcgctl, PCGCTL); - if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK | - GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS))) - break; + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_ENBESL) { + glpmcfg |= GLPMCFG_RSTRSLPSTS; + dwc2_writel(hsotg, glpmcfg, GLPMCFG); + } + + if (remotewakeup) { + if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__); + goto fail; + return; + } + + dctl = dwc2_readl(hsotg, DCTL); + dctl |= DCTL_RMTWKUPSIG; + dwc2_writel(hsotg, dctl, DCTL); - udelay(1); - } while (++i < 200); + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__); + goto fail; + return; + } + } - if (i == 200) { - dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n"); + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS || + glpmcfg & GLPMCFG_L1RESUMEOK) { + goto fail; return; } - dwc2_gadget_init_lpm(hsotg); + + /* Inform gadget to exit from L1 */ + call_gadget(hsotg, resume); + /* Change to L0 state */ + hsotg->lx_state = DWC2_L0; + hsotg->bus_suspended = false; +fail: dwc2_gadget_init_lpm(hsotg); } else { /* TODO */ dev_err(hsotg->dev, "Host side LPM is not supported.\n"); return; } - - /* Change to L0 state */ - hsotg->lx_state = DWC2_L0; - - /* Inform gadget to exit from L1 */ - call_gadget(hsotg, resume); } /* @@ -386,7 +407,7 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state); if (hsotg->lx_state == DWC2_L1) { - dwc2_wakeup_from_lpm_l1(hsotg); + dwc2_wakeup_from_lpm_l1(hsotg, false); return; } diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 56157b91f5050..cb29f9fae2f23 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1416,6 +1416,10 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req, ep->name, req, req->length, req->buf, req->no_interrupt, req->zero, req->short_not_ok); + if (hs->lx_state == DWC2_L1) { + dwc2_wakeup_from_lpm_l1(hs, true); + } + /* Prevent new request submission when controller is suspended */ if (hs->lx_state != DWC2_L0) { dev_dbg(hs->dev, "%s: submit request only in active state\n", -- GitLab From f74c5e0b54b02706d9a862ac6cddade30ac86bcf Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Fri, 15 Mar 2024 10:01:44 +0800 Subject: [PATCH 1432/2290] usb: udc: remove warning when queue disabled ep commit 2a587a035214fa1b5ef598aea0b81848c5b72e5e upstream. It is possible trigger below warning message from mass storage function, WARNING: CPU: 6 PID: 3839 at drivers/usb/gadget/udc/core.c:294 usb_ep_queue+0x7c/0x104 pc : usb_ep_queue+0x7c/0x104 lr : fsg_main_thread+0x494/0x1b3c Root cause is mass storage function try to queue request from main thread, but other thread may already disable ep when function disable. As there is no function failure in the driver, in order to avoid effort to fix warning, change WARN_ON_ONCE() in usb_ep_queue() to pr_debug(). Suggested-by: Alan Stern Cc: stable@vger.kernel.org Signed-off-by: yuan linyu Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20240315020144.2715575-1-yuanlinyu@hihonor.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 0edd9e53fc5a1..82a10774a7ebc 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -292,7 +292,9 @@ int usb_ep_queue(struct usb_ep *ep, { int ret = 0; - if (WARN_ON_ONCE(!ep->enabled && ep->address)) { + if (!ep->enabled && ep->address) { + pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n", + ep->address, ep->name); ret = -ESHUTDOWN; goto out; } -- GitLab From 2d28af770d4d19e440f56732d8895d30bef07fb0 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Tue, 19 Mar 2024 15:43:09 +0800 Subject: [PATCH 1433/2290] usb: typec: Return size of buffer if pd_set operation succeeds commit 53f5094fdf5deacd99b8655df692e9278506724d upstream. The attribute writing should return the number of bytes used from the buffer on success. Fixes: a7cff92f0635 ("usb: typec: USB Power Delivery helpers for ports and partners") Cc: stable@vger.kernel.org Signed-off-by: Kyle Tso Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240319074309.3306579-1-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 3da404d5178d3..ce83f558fe447 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1245,6 +1245,7 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, { struct typec_port *port = to_typec_port(dev); struct usb_power_delivery *pd; + int ret; if (!port->ops || !port->ops->pd_set) return -EOPNOTSUPP; @@ -1253,7 +1254,11 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, if (!pd) return -EINVAL; - return port->ops->pd_set(port, pd); + ret = port->ops->pd_set(port, pd); + if (ret) + return ret; + + return size; } static ssize_t select_usb_power_delivery_show(struct device *dev, -- GitLab From 959aacfe3ab655f537b861a194740a5212c40b35 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:22 +0100 Subject: [PATCH 1434/2290] usb: typec: ucsi: Clear EVENT_PENDING under PPM lock commit 15b2e71b4653b3e13df34695a29ebeee237c5af2 upstream. Suppose we sleep on the PPM lock after clearing the EVENT_PENDING bit because the thread for another connector is executing a command. In this case the command completion of the other command will still report the connector change for our connector. Clear the EVENT_PENDING bit under the PPM lock to avoid another useless call to ucsi_handle_connector_change() in this case. Fixes: c9aed03a0a68 ("usb: ucsi: Add missing ppm_lock") Cc: stable Signed-off-by: Christian A. Ehrhardt Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-2-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 0695ee54ff781..1a3801991cd1c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -829,11 +829,11 @@ static void ucsi_handle_connector_change(struct work_struct *work) if (con->status.change & UCSI_CONSTAT_CAM_CHANGE) ucsi_partner_task(con, ucsi_check_altmodes, 1, 0); - clear_bit(EVENT_PENDING, &con->ucsi->flags); - mutex_lock(&ucsi->ppm_lock); + clear_bit(EVENT_PENDING, &con->ucsi->flags); ret = ucsi_acknowledge_connector_change(ucsi); mutex_unlock(&ucsi->ppm_lock); + if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); -- GitLab From f8704d54c8e70b616fc6a8af9ee07d1dfd982746 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:24 +0100 Subject: [PATCH 1435/2290] usb: typec: ucsi: Ack unsupported commands commit 6b5c85ddeea77d18c4b69e3bda60e9374a20c304 upstream. If a command completes the OPM must send an ack. This applies to unsupported commands, too. Send the required ACK for unsupported commands. Signed-off-by: Christian A. Ehrhardt Cc: stable Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-4-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 1a3801991cd1c..57ba7c61e5af6 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -138,8 +138,12 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) if (!(cci & UCSI_CCI_COMMAND_COMPLETE)) return -EIO; - if (cci & UCSI_CCI_NOT_SUPPORTED) + if (cci & UCSI_CCI_NOT_SUPPORTED) { + if (ucsi_acknowledge_command(ucsi) < 0) + dev_err(ucsi->dev, + "ACK of unsupported command failed\n"); return -EOPNOTSUPP; + } if (cci & UCSI_CCI_ERROR) { if (cmd == UCSI_GET_ERROR_STATUS) -- GitLab From 12d0306cecf1a995619445117b3121617495eb75 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:25 +0100 Subject: [PATCH 1436/2290] usb: typec: ucsi_acpi: Refactor and fix DELL quirk commit 6aaceb7d9cd00f3e065dc4b054ecfe52c5253b03 upstream. Some DELL systems don't like UCSI_ACK_CC_CI commands with the UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE bit set. The current quirk still leaves room for races because it requires two consecutive ACK commands to be sent. Refactor and significantly simplify the quirk to fix this: Send a dummy command and bundle the connector change ack with the command completion ack in a single UCSI_ACK_CC_CI command. This removes the need to probe for the quirk. While there define flag bits for struct ucsi_acpi->flags in ucsi_acpi.c and don't re-use definitions from ucsi.h for struct ucsi->flags. Fixes: f3be347ea42d ("usb: ucsi_acpi: Quirk to ack a connector change ack cmd") Cc: stable@vger.kernel.org Signed-off-by: Christian A. Ehrhardt Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-5-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 75 +++++++++++++----------------- 1 file changed, 33 insertions(+), 42 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 48130d636a020..b4d86d47c5db4 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -23,10 +23,11 @@ struct ucsi_acpi { void *base; struct completion complete; unsigned long flags; +#define UCSI_ACPI_SUPPRESS_EVENT 0 +#define UCSI_ACPI_COMMAND_PENDING 1 +#define UCSI_ACPI_ACK_PENDING 2 guid_t guid; u64 cmd; - bool dell_quirk_probed; - bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -79,9 +80,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, int ret; if (ack) - set_bit(ACK_PENDING, &ua->flags); + set_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); else - set_bit(COMMAND_PENDING, &ua->flags); + set_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -92,9 +93,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, out_clear_bit: if (ack) - clear_bit(ACK_PENDING, &ua->flags); + clear_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); else - clear_bit(COMMAND_PENDING, &ua->flags); + clear_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); return ret; } @@ -129,51 +130,40 @@ static const struct ucsi_operations ucsi_zenbook_ops = { }; /* - * Some Dell laptops expect that an ACK command with the - * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) - * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. - * If this is not done events are not delivered to OSPM and - * subsequent commands will timeout. + * Some Dell laptops don't like ACK commands with the + * UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE + * bit set. To work around this send a dummy command and bundle the + * UCSI_ACK_CONNECTOR_CHANGE with the UCSI_ACK_COMMAND_COMPLETE + * for the dummy command. */ static int ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - u64 cmd = *(u64 *)val, ack = 0; + u64 cmd = *(u64 *)val; + u64 dummycmd = UCSI_GET_CAPABILITY; int ret; - if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && - cmd & UCSI_ACK_CONNECTOR_CHANGE) - ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; - - ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); - if (ret != 0) - return ret; - if (ack == 0) - return ret; - - if (!ua->dell_quirk_probed) { - ua->dell_quirk_probed = true; - - cmd = UCSI_GET_CAPABILITY; - ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, - sizeof(cmd)); - if (ret == 0) - return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, - &ack, sizeof(ack)); - if (ret != -ETIMEDOUT) + if (cmd == (UCSI_ACK_CC_CI | UCSI_ACK_CONNECTOR_CHANGE)) { + cmd |= UCSI_ACK_COMMAND_COMPLETE; + + /* + * The UCSI core thinks it is sending a connector change ack + * and will accept new connector change events. We don't want + * this to happen for the dummy command as its response will + * still report the very event that the core is trying to clear. + */ + set_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &dummycmd, + sizeof(dummycmd)); + clear_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + + if (ret < 0) return ret; - - ua->dell_quirk_active = true; - dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); - dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); } - if (!ua->dell_quirk_active) - return ret; - - return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd)); } static const struct ucsi_operations ucsi_dell_ops = { @@ -209,13 +199,14 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (ret) return; - if (UCSI_CCI_CONNECTOR(cci)) + if (UCSI_CCI_CONNECTOR(cci) && + !test_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) complete(&ua->complete); if (cci & UCSI_CCI_COMMAND_COMPLETE && - test_bit(COMMAND_PENDING, &ua->flags)) + test_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } -- GitLab From c223bc352cd34be66b59dfa8b62ba02ec4b78792 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:26 +0100 Subject: [PATCH 1437/2290] usb: typec: ucsi: Clear UCSI_CCI_RESET_COMPLETE before reset commit 3de4f996a0b5412aa451729008130a488f71563e upstream. Check the UCSI_CCI_RESET_COMPLETE complete flag before starting another reset. Use a UCSI_SET_NOTIFICATION_ENABLE command to clear the flag if it is set. Signed-off-by: Christian A. Ehrhardt Cc: stable Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-6-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 36 ++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 57ba7c61e5af6..98f335cbbcdea 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -878,13 +878,47 @@ static int ucsi_reset_connector(struct ucsi_connector *con, bool hard) static int ucsi_reset_ppm(struct ucsi *ucsi) { - u64 command = UCSI_PPM_RESET; + u64 command; unsigned long tmo; u32 cci; int ret; mutex_lock(&ucsi->ppm_lock); + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + if (ret < 0) + goto out; + + /* + * If UCSI_CCI_RESET_COMPLETE is already set we must clear + * the flag before we start another reset. Send a + * UCSI_SET_NOTIFICATION_ENABLE command to achieve this. + * Ignore a timeout and try the reset anyway if this fails. + */ + if (cci & UCSI_CCI_RESET_COMPLETE) { + command = UCSI_SET_NOTIFICATION_ENABLE; + ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, + sizeof(command)); + if (ret < 0) + goto out; + + tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); + do { + ret = ucsi->ops->read(ucsi, UCSI_CCI, + &cci, sizeof(cci)); + if (ret < 0) + goto out; + if (cci & UCSI_CCI_COMMAND_COMPLETE) + break; + if (time_is_before_jiffies(tmo)) + break; + msleep(20); + } while (1); + + WARN_ON(cci & UCSI_CCI_RESET_COMPLETE); + } + + command = UCSI_PPM_RESET; ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, sizeof(command)); if (ret < 0) -- GitLab From 3b9d72442adfbc9ddb0f76dd1b03977b3a578b16 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:17 +0530 Subject: [PATCH 1438/2290] scsi: qla2xxx: Prevent command send on chip reset commit 4895009c4bb72f71f2e682f1e7d2c2d96e482087 upstream. Currently IOCBs are allowed to push through while chip reset could be in progress. During chip reset the outstanding_cmds array is cleared twice. Once when any command on this array is returned as failed and secondly when the array is initialize to zero. If a command is inserted on to the array between these intervals, then the command will be lost. Check for chip reset before sending IOCB. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 8 ++++++-- drivers/scsi/qla2xxx/qla_iocb.c | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 884ed77259f85..f891f7e05d202 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1187,8 +1187,12 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - /* ref: INIT */ - kref_put(&sp->cmd_kref, qla2x00_sp_release); + /* + * use qla24xx_async_gnl_sp_done to purge all pending gnl request. + * kref_put is call behind the scene. + */ + sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR; + qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR); fcport->flags &= ~(FCF_ASYNC_SENT); done: fcport->flags &= ~(FCF_ASYNC_ACTIVE); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 9e524d52dc862..8b097cec830d7 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2588,6 +2588,33 @@ void qla2x00_sp_release(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); + struct scsi_qla_host *vha = sp->vha; + + switch (sp->type) { + case SRB_CT_PTHRU_CMD: + /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */ + if (sp->u.iocb_cmd.u.ctarg.req && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + sp->u.iocb_cmd.u.ctarg.req, + sp->u.iocb_cmd.u.ctarg.req_dma); + sp->u.iocb_cmd.u.ctarg.req = NULL; + } + if (sp->u.iocb_cmd.u.ctarg.rsp && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + sp->u.iocb_cmd.u.ctarg.rsp, + sp->u.iocb_cmd.u.ctarg.rsp_dma); + sp->u.iocb_cmd.u.ctarg.rsp = NULL; + } + break; + default: + break; + } sp->free(sp); } @@ -2693,7 +2720,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { - kfree(fcport); + qla2x00_free_fcport(fcport); ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); return -ENOMEM; @@ -2748,6 +2775,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (rval != QLA_SUCCESS) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2757,6 +2785,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b.area, fcport->d_id.b.al_pa); wait_for_completion(&elsio->u.els_logo.comp); + qla2x00_free_fcport(fcport); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); @@ -3916,7 +3945,7 @@ qla2x00_start_sp(srb_t *sp) return -EAGAIN; } - pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); + pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp); if (!pkt) { rval = -EAGAIN; ql_log(ql_log_warn, vha, 0x700c, -- GitLab From 3cd58084e48ef5d6ed9d25714b5f9e563b846b04 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:18 +0530 Subject: [PATCH 1439/2290] scsi: qla2xxx: Fix N2N stuck connection commit 881eb861ca3877300570db10abbf11494e48548d upstream. Disk failed to rediscover after chip reset error injection. The chip reset happens at the time when a PLOGI is being sent. This causes a flag to be left on which blocks the retry. Clear the blocking flag. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_gbl.h | 2 +- drivers/scsi/qla2xxx/qla_iocb.c | 32 +++++++++++--------------------- drivers/scsi/qla2xxx/qla_os.c | 2 +- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 2e4537f9e5b50..73cd869caf609 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -44,7 +44,7 @@ extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *); extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *); extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t); -extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool); +extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *); extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha, struct els_plogi *els_plogi); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 8b097cec830d7..97b98ca09acb1 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -3042,7 +3042,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, - fc_port_t *fcport, bool wait) + fc_port_t *fcport) { srb_t *sp; struct srb_iocb *elsio = NULL; @@ -3057,8 +3057,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!sp) { ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); - fcport->flags &= ~FCF_ASYNC_ACTIVE; - return -ENOMEM; + goto done; } fcport->flags |= FCF_ASYNC_SENT; @@ -3067,9 +3066,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_io, vha, 0x3073, "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24); - if (wait) - sp->flags = SRB_WAKEUP_ON_COMP; - sp->type = SRB_ELS_DCMD; sp->name = "ELS_DCMD"; sp->fcport = fcport; @@ -3085,7 +3081,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_plogi_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } resp_ptr = elsio->u.els_plogi.els_resp_pyld = @@ -3094,7 +3090,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_resp_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr); @@ -3109,7 +3105,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) { struct fc_els_flogi *p = ptr; - p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC); } @@ -3118,10 +3113,11 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, sizeof(*elsio->u.els_plogi.els_plogi_pyld)); - init_completion(&elsio->u.els_plogi.comp); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - rval = QLA_FUNCTION_FAILED; + fcport->flags |= FCF_LOGIN_NEEDED; + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + goto done_free_sp; } else { ql_dbg(ql_dbg_disc, vha, 0x3074, "%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n", @@ -3129,21 +3125,15 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b24, vha->d_id.b24); } - if (wait) { - wait_for_completion(&elsio->u.els_plogi.comp); - - if (elsio->u.els_plogi.comp_status != CS_COMPLETE) - rval = QLA_FUNCTION_FAILED; - } else { - goto done; - } + return rval; -out: - fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); +done_free_sp: qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); done: + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); + qla2x00_set_fcport_disc_state(fcport, DSC_DELETED); return rval; } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 25ca0544b9639..25d0c2bfdd742 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5562,7 +5562,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) break; case QLA_EVT_ELS_PLOGI: qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI, - e->u.fcport.fcport, false); + e->u.fcport.fcport); break; case QLA_EVT_SA_REPLACE: rc = qla24xx_issue_sa_replace_iocb(vha, e); -- GitLab From 14a3ca35c523fc8de16d72c1d93d37234cdf5962 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:19 +0530 Subject: [PATCH 1440/2290] scsi: qla2xxx: Split FCE|EFT trace control commit 76a192e1a566e15365704b9f8fb3b70825f85064 upstream. Current code combines the allocation of FCE|EFT trace buffers and enables the features all in 1 step. Split this step into separate steps in preparation for follow-on patch to allow user to have a choice to enable / disable FCE trace feature. Cc: stable@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-4-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 102 +++++++++++++------------------- 1 file changed, 41 insertions(+), 61 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index f891f7e05d202..f0a11d42e1fd4 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2670,6 +2670,40 @@ exit: return rval; } +static void qla_enable_fce_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->fce) { + ha->flags.fce_enabled = 1; + memset(ha->fce, 0, fce_calc_size(ha->fce_bufs)); + rval = qla2x00_enable_fce_trace(vha, + ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8033, + "Unable to reinitialize FCE (%d).\n", rval); + ha->flags.fce_enabled = 0; + } + } +} + +static void qla_enable_eft_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->eft) { + memset(ha->eft, 0, EFT_SIZE); + rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8034, + "Unable to reinitialize EFT (%d).\n", rval); + } + } +} /* * qla2x00_initialize_adapter * Initialize board. @@ -3673,9 +3707,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha) } static void -qla2x00_init_fce_trace(scsi_qla_host_t *vha) +qla2x00_alloc_fce_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3704,27 +3737,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS, - ha->fce_mb, &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x00bf, - "Unable to initialize FCE (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c0, "Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024); - ha->flags.fce_enabled = 1; ha->fce_dma = tc_dma; ha->fce = tc; + ha->fce_bufs = FCE_NUM_BUFFERS; } static void -qla2x00_init_eft_trace(scsi_qla_host_t *vha) +qla2x00_alloc_eft_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3749,14 +3772,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x00c2, - "Unable to initialize EFT (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c3, "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024); @@ -3764,13 +3779,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) ha->eft = tc; } -static void -qla2x00_alloc_offload_mem(scsi_qla_host_t *vha) -{ - qla2x00_init_fce_trace(vha); - qla2x00_init_eft_trace(vha); -} - void qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) { @@ -3825,10 +3833,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) if (ha->tgt.atio_ring) mq_size += ha->tgt.atio_q_length * sizeof(request_t); - qla2x00_init_fce_trace(vha); + qla2x00_alloc_fce_trace(vha); if (ha->fce) fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE; - qla2x00_init_eft_trace(vha); + qla2x00_alloc_eft_trace(vha); if (ha->eft) eft_size = EFT_SIZE; } @@ -4258,7 +4266,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; unsigned long flags; - uint16_t fw_major_version; int done_once = 0; if (IS_P3P_TYPE(ha)) { @@ -4325,7 +4332,6 @@ execute_fw_with_lr: goto failed; enable_82xx_npiv: - fw_major_version = ha->fw_major_version; if (IS_P3P_TYPE(ha)) qla82xx_check_md_needed(vha); else @@ -4354,12 +4360,11 @@ enable_82xx_npiv: if (rval != QLA_SUCCESS) goto failed; - if (!fw_major_version && !(IS_P3P_TYPE(ha))) - qla2x00_alloc_offload_mem(vha); - if (ql2xallocfwdump && !(IS_P3P_TYPE(ha))) qla2x00_alloc_fw_dump(vha); + qla_enable_fce_trace(vha); + qla_enable_eft_trace(vha); } else { goto failed; } @@ -7544,7 +7549,6 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) int qla2x00_abort_isp(scsi_qla_host_t *vha) { - int rval; uint8_t status = 0; struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp, *tvp; @@ -7638,31 +7642,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) if (IS_QLA81XX(ha) || IS_QLA8031(ha)) qla2x00_get_fw_version(vha); - if (ha->fce) { - ha->flags.fce_enabled = 1; - memset(ha->fce, 0, - fce_calc_size(ha->fce_bufs)); - rval = qla2x00_enable_fce_trace(vha, - ha->fce_dma, ha->fce_bufs, ha->fce_mb, - &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x8033, - "Unable to reinitialize FCE " - "(%d).\n", rval); - ha->flags.fce_enabled = 0; - } - } - if (ha->eft) { - memset(ha->eft, 0, EFT_SIZE); - rval = qla2x00_enable_eft_trace(vha, - ha->eft_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x8034, - "Unable to reinitialize EFT " - "(%d).\n", rval); - } - } } else { /* failed the ISP abort */ vha->flags.online = 1; if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { -- GitLab From 101c1d2d46a1170caed0d41f1763ad6c167bb0c0 Mon Sep 17 00:00:00 2001 From: Bikash Hazarika Date: Tue, 27 Feb 2024 22:11:20 +0530 Subject: [PATCH 1441/2290] scsi: qla2xxx: Update manufacturer detail commit 688fa069fda6fce24d243cddfe0c7024428acb74 upstream. Update manufacturer detail from "Marvell Semiconductor, Inc." to "Marvell". Cc: stable@vger.kernel.org Signed-off-by: Bikash Hazarika Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-5-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 1713588f671f3..31c451daeeb82 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -83,7 +83,7 @@ typedef union { #include "qla_nvme.h" #define QLA2XXX_DRIVER_NAME "qla2xxx" #define QLA2XXX_APIDEV "ql2xapidev" -#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc." +#define QLA2XXX_MANUFACTURER "Marvell" /* * We have MAILBOX_REGISTER_COUNT sized arrays in a few places, -- GitLab From 1bcbd100abb8a01ea79d3e5568259ae621017788 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:21 +0530 Subject: [PATCH 1442/2290] scsi: qla2xxx: NVME|FCP prefer flag not being honored commit 69aecdd410106dc3a8f543a4f7ec6379b995b8d0 upstream. Changing of [FCP|NVME] prefer flag in flash has no effect on driver. For device that supports both FCP + NVMe over the same connection, driver continues to connect to this device using the previous successful login mode. On completion of flash update, adapter will be reset. Driver will reset the prefer flag based on setting from flash. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-6-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index f0a11d42e1fd4..c64e44964d840 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -7554,6 +7554,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) struct scsi_qla_host *vp, *tvp; struct req_que *req = ha->req_q_map[0]; unsigned long flags; + fc_port_t *fcport; if (vha->flags.online) { qla2x00_abort_isp_cleanup(vha); @@ -7622,6 +7623,15 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) "ISP Abort - ISP reg disconnect post nvmram config, exiting.\n"); return status; } + + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + if (!qla2x00_restart_isp(vha)) { clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags); @@ -7692,6 +7702,14 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) atomic_inc(&vp->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vp->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + qla2x00_vp_abort_isp(vp); spin_lock_irqsave(&ha->vport_slock, flags); -- GitLab From 09c0ac18cac206ed1218b1fe6c1a0918e5ea9211 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:22 +0530 Subject: [PATCH 1443/2290] scsi: qla2xxx: Fix command flush on cable pull commit a27d4d0e7de305def8a5098a614053be208d1aa1 upstream. System crash due to command failed to flush back to SCSI layer. BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 27 PID: 793455 Comm: kworker/u130:6 Kdump: loaded Tainted: G OE --------- - - 4.18.0-372.9.1.el8.x86_64 #1 Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 09/03/2021 Workqueue: nvme-wq nvme_fc_connect_ctrl_work [nvme_fc] RIP: 0010:__wake_up_common+0x4c/0x190 Code: 24 10 4d 85 c9 74 0a 41 f6 01 04 0f 85 9d 00 00 00 48 8b 43 08 48 83 c3 08 4c 8d 48 e8 49 8d 41 18 48 39 c3 0f 84 f0 00 00 00 <49> 8b 41 18 89 54 24 08 31 ed 4c 8d 70 e8 45 8b 29 41 f6 c5 04 75 RSP: 0018:ffff95f3e0cb7cd0 EFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff8b08d3b26328 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8b08d3b26320 RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffffffffe8 R10: 0000000000000000 R11: ffff95f3e0cb7a60 R12: ffff95f3e0cb7d20 R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8b2fdf6c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000002f1e410002 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __wake_up_common_lock+0x7c/0xc0 qla_nvme_ls_req+0x355/0x4c0 [qla2xxx] qla2xxx [0000:12:00.1]-f084:3: qlt_free_session_done: se_sess 0000000000000000 / sess ffff8ae1407ca000 from port 21:32:00:02:ac:07:ee:b8 loop_id 0x02 s_id 01:02:00 logout 1 keep 0 els_logo 0 ? __nvme_fc_send_ls_req+0x260/0x380 [nvme_fc] qla2xxx [0000:12:00.1]-207d:3: FCPort 21:32:00:02:ac:07:ee:b8 state transitioned from ONLINE to LOST - portid=010200. ? nvme_fc_send_ls_req.constprop.42+0x1a/0x45 [nvme_fc] qla2xxx [0000:12:00.1]-2109:3: qla2x00_schedule_rport_del 21320002ac07eeb8. rport ffff8ae598122000 roles 1 ? nvme_fc_connect_ctrl_work.cold.63+0x1e3/0xa7d [nvme_fc] qla2xxx [0000:12:00.1]-f084:3: qlt_free_session_done: se_sess 0000000000000000 / sess ffff8ae14801e000 from port 21:32:01:02:ad:f7:ee:b8 loop_id 0x04 s_id 01:02:01 logout 1 keep 0 els_logo 0 ? __switch_to+0x10c/0x450 ? process_one_work+0x1a7/0x360 qla2xxx [0000:12:00.1]-207d:3: FCPort 21:32:01:02:ad:f7:ee:b8 state transitioned from ONLINE to LOST - portid=010201. ? worker_thread+0x1ce/0x390 ? create_worker+0x1a0/0x1a0 qla2xxx [0000:12:00.1]-2109:3: qla2x00_schedule_rport_del 21320102adf7eeb8. rport ffff8ae3b2312800 roles 70 ? kthread+0x10a/0x120 qla2xxx [0000:12:00.1]-2112:3: qla_nvme_unregister_remote_port: unregister remoteport on ffff8ae14801e000 21320102adf7eeb8 ? set_kthread_struct+0x40/0x40 qla2xxx [0000:12:00.1]-2110:3: remoteport_delete of ffff8ae14801e000 21320102adf7eeb8 completed. ? ret_from_fork+0x1f/0x40 qla2xxx [0000:12:00.1]-f086:3: qlt_free_session_done: waiting for sess ffff8ae14801e000 logout The system was under memory stress where driver was not able to allocate an SRB to carry out error recovery of cable pull. The failure to flush causes upper layer to start modifying scsi_cmnd. When the system frees up some memory, the subsequent cable pull trigger another command flush. At this point the driver access a null pointer when attempting to DMA unmap the SGL. Add a check to make sure commands are flush back on session tear down to prevent the null pointer access. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 5a5beb41786ed..043cfa10c7167 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1079,6 +1079,16 @@ void qlt_free_session_done(struct work_struct *work) "%s: sess %p logout completed\n", __func__, sess); } + /* check for any straggling io left behind */ + if (!(sess->flags & FCF_FCP2_DEVICE) && + qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) { + ql_log(ql_log_warn, vha, 0x3027, + "IO not return. Resetting.\n"); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_chip_reset(vha); + } + if (sess->logo_ack_needed) { sess->logo_ack_needed = 0; qla24xx_async_notify_ack(vha, sess, -- GitLab From 282877633b25d67021a34169c5b5519b1d4ef65e Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Tue, 27 Feb 2024 22:11:24 +0530 Subject: [PATCH 1444/2290] scsi: qla2xxx: Fix double free of fcport commit 82f522ae0d97119a43da53e0f729275691b9c525 upstream. The server was crashing after LOGO because fcport was getting freed twice. -----------[ cut here ]----------- kernel BUG at mm/slub.c:371! invalid opcode: 0000 1 SMP PTI CPU: 35 PID: 4610 Comm: bash Kdump: loaded Tainted: G OE --------- - - 4.18.0-425.3.1.el8.x86_64 #1 Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 09/03/2021 RIP: 0010:set_freepointer.part.57+0x0/0x10 RSP: 0018:ffffb07107027d90 EFLAGS: 00010246 RAX: ffff9cb7e3150000 RBX: ffff9cb7e332b9c0 RCX: ffff9cb7e3150400 RDX: 0000000000001f37 RSI: 0000000000000000 RDI: ffff9cb7c0005500 RBP: fffff693448c5400 R08: 0000000080000000 R09: 0000000000000009 R10: 0000000000000000 R11: 0000000000132af0 R12: ffff9cb7c0005500 R13: ffff9cb7e3150000 R14: ffffffffc06990e0 R15: ffff9cb7ea85ea58 FS: 00007ff6b79c2740(0000) GS:ffff9cb8f7ec0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055b426b7d700 CR3: 0000000169c18002 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: kfree+0x238/0x250 qla2x00_els_dcmd_sp_free+0x20/0x230 [qla2xxx] ? qla24xx_els_dcmd_iocb+0x607/0x690 [qla2xxx] qla2x00_issue_logo+0x28c/0x2a0 [qla2xxx] ? qla2x00_issue_logo+0x28c/0x2a0 [qla2xxx] ? kernfs_fop_write+0x11e/0x1a0 Remove one of the free calls and add check for valid fcport. Also use function qla2x00_free_fcport() instead of kfree(). Cc: stable@vger.kernel.org Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-9-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_iocb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 97b98ca09acb1..7bccd525ee19b 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2638,7 +2638,8 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp) { struct srb_iocb *elsio = &sp->u.iocb_cmd; - kfree(sp->fcport); + if (sp->fcport) + qla2x00_free_fcport(sp->fcport); if (elsio->u.els_logo.els_logo_pyld) dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE, @@ -2751,6 +2752,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_logo.els_logo_pyld) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2785,7 +2787,6 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b.area, fcport->d_id.b.al_pa); wait_for_completion(&elsio->u.els_logo.comp); - qla2x00_free_fcport(fcport); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); -- GitLab From 65f195232b371cf05f515db0a6916825645924bc Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Tue, 27 Feb 2024 22:11:25 +0530 Subject: [PATCH 1445/2290] scsi: qla2xxx: Change debug message during driver unload commit b5a30840727a3e41d12a336d19f6c0716b299161 upstream. Upon driver unload, purge_mbox flag is set and the heartbeat monitor thread detects this flag and does not send the mailbox command down to FW with a debug message "Error detected: purge[1] eeh[0] cmd=0x0, Exiting". This being not a real error, change the debug message. Cc: stable@vger.kernel.org Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-10-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_mbx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index f794f4363a38c..1fd9485985f2e 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -194,7 +194,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || ha->flags.eeh_busy) { ql_log(ql_log_warn, vha, 0xd035, - "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", + "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); rval = QLA_ABORTED; goto premature_exit; -- GitLab From f30b3ee9a4861bcaea2642b1ec38d06e39938ce1 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 27 Feb 2024 22:11:26 +0530 Subject: [PATCH 1446/2290] scsi: qla2xxx: Delay I/O Abort on PCI error commit 591c1fdf2016d118b8fbde427b796fac13f3f070 upstream. Currently when PCI error is detected, I/O is aborted manually through the ABORT IOCB mechanism which is not guaranteed to succeed. Instead, wait for the OS or system to notify driver to wind down I/O through the pci_error_handlers api. Set eeh_busy flag to pause all traffic and wait for I/O to drain. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240227164127.36465-11-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 07fbaa452d8a1..0d414c1aa84e7 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2741,7 +2741,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport) return; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900c, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(fcport->vha); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); return; } } @@ -2763,7 +2769,11 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) vha = fcport->vha; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900b, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(vha); qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET); return; -- GitLab From 00f511d71629eb15715dce62b72117341891eff4 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 20 Jul 2023 14:47:27 -0500 Subject: [PATCH 1447/2290] x86/cpu: Enable STIBP on AMD if Automatic IBRS is enabled commit fd470a8beed88440b160d690344fbae05a0b9b1b upstream. Unlike Intel's Enhanced IBRS feature, AMD's Automatic IBRS does not provide protection to processes running at CPL3/user mode, see section "Extended Feature Enable Register (EFER)" in the APM v2 at https://bugzilla.kernel.org/attachment.cgi?id=304652 Explicitly enable STIBP to protect against cross-thread CPL3 branch target injections on systems with Automatic IBRS enabled. Also update the relevant documentation. Fixes: e7862eda309e ("x86/cpu: Support AMD Automatic IBRS") Reported-by: Tom Lendacky Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230720194727.67022-1-kim.phillips@amd.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 11 +++++++---- arch/x86/kernel/cpu/bugs.c | 15 +++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 4d186f599d90f..32a8893e56177 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -484,11 +484,14 @@ Spectre variant 2 Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at boot, by setting the IBRS bit, and they're automatically protected against - Spectre v2 variant attacks, including cross-thread branch target injections - on SMT systems (STIBP). In other words, eIBRS enables STIBP too. + Spectre v2 variant attacks. - Legacy IBRS systems clear the IBRS bit on exit to userspace and - therefore explicitly enable STIBP for that + On Intel's enhanced IBRS systems, this includes cross-thread branch target + injections on SMT systems (STIBP). In other words, Intel eIBRS enables + STIBP, too. + + AMD Automatic IBRS does not protect userspace, and Legacy IBRS systems clear + the IBRS bit on exit to userspace, therefore both explicitly enable STIBP. The retpoline mitigation is turned on by default on vulnerable CPUs. It can be forced on or off by the administrator diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 750fb4fc2ac6a..e3fec47a800bf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1354,19 +1354,21 @@ spectre_v2_user_select_mitigation(void) } /* - * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP + * If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP * is not required. * - * Enhanced IBRS also protects against cross-thread branch target + * Intel's Enhanced IBRS also protects against cross-thread branch target * injection in user-mode as the IBRS bit remains always set which * implicitly enables cross-thread protections. However, in legacy IBRS * mode, the IBRS bit is set only on kernel entry and cleared on return - * to userspace. This disables the implicit cross-thread protection, - * so allow for STIBP to be selected in that case. + * to userspace. AMD Automatic IBRS also does not protect userspace. + * These modes therefore disable the implicit cross-thread protection, + * so allow for STIBP to be selected in those cases. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS))) return; /* @@ -2666,7 +2668,8 @@ static ssize_t rfds_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS)) return ""; switch (spectre_v2_user_stibp) { -- GitLab From f2b85a4cc763841843de693bbd7308fe9a2c4c89 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 28 Feb 2024 23:44:00 +0100 Subject: [PATCH 1448/2290] tls: fix use-after-free on failed backlog decryption commit 13114dc5543069f7b97991e3b79937b6da05f5b0 upstream. When the decrypt request goes to the backlog and crypto_aead_decrypt returns -EBUSY, tls_do_decryption will wait until all async decryptions have completed. If one of them fails, tls_do_decryption will return -EBADMSG and tls_decrypt_sg jumps to the error path, releasing all the pages. But the pages have been passed to the async callback, and have already been released by tls_decrypt_done. The only true async case is when crypto_aead_decrypt returns -EINPROGRESS. With -EBUSY, we already waited so we can tell tls_sw_recvmsg that the data is available for immediate copy, but we need to notify tls_decrypt_sg (via the new ->async_done flag) that the memory has already been released. Fixes: 859054147318 ("net: tls: handle backlogging of crypto requests") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/4755dd8d9bebdefaa19ce1439b833d6199d4364c.1709132643.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e723584fc644b..bdb5153f3788a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -51,6 +51,7 @@ struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; + bool async_done; u8 tail; ); @@ -279,18 +280,19 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS) + return 0; + if (ret == -EBUSY) { ret = tls_decrypt_async_wait(ctx); + darg->async_done = true; + /* all completions have run, we're not doing async anymore */ + darg->async = false; + return ret; ret = ret ?: -EINPROGRESS; } - if (ret == -EINPROGRESS) { - if (darg->async) - return 0; - ret = crypto_wait_req(ret, &ctx->async_wait); - } else if (darg->async) { - atomic_dec(&ctx->decrypt_pending); - } + atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; @@ -1681,8 +1683,11 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); - if (err) + if (err) { + if (darg->async_done) + goto exit_free_skb; goto exit_free_pages; + } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; @@ -1694,6 +1699,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, return err; } + if (unlikely(darg->async_done)) + return 0; + if (prot->tail_size) darg->tail = dctx->tail; -- GitLab From d8cd93e0304709beed2a78774640d25d38a47d21 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 4 Mar 2024 14:11:19 +0500 Subject: [PATCH 1449/2290] scsi: lpfc: Correct size for cmdwqe/rspwqe for memset() commit 16cc2ba71b9f6440805aef7f92ba0f031f79b765 upstream. The cmdwqe and rspwqe are of type lpfc_wqe128. They should be memset() with the same type. Fixes: 61910d6a5243 ("scsi: lpfc: SLI path split: Refactor CT paths") Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240304091119.847060-1-usama.anjum@collabora.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_bsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index b54fafb486e06..2373dad016033 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -3169,10 +3169,10 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) } cmdwqe = &cmdiocbq->wqe; - memset(cmdwqe, 0, sizeof(union lpfc_wqe)); + memset(cmdwqe, 0, sizeof(*cmdwqe)); if (phba->sli_rev < LPFC_SLI_REV4) { rspwqe = &rspiocbq->wqe; - memset(rspwqe, 0, sizeof(union lpfc_wqe)); + memset(rspwqe, 0, sizeof(*rspwqe)); } INIT_LIST_HEAD(&head); -- GitLab From 8dbc1762202aa5db49d23346316fe0f140063946 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 4 Mar 2024 14:06:48 +0500 Subject: [PATCH 1450/2290] scsi: lpfc: Correct size for wqe for memset() commit 28d41991182c210ec1654f8af2e140ef4cc73f20 upstream. The wqe is of type lpfc_wqe128. It should be memset with the same type. Fixes: 6c621a2229b0 ("scsi: lpfc: Separate NVMET RQ buffer posting from IO resources SGL/iocbq/context") Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240304090649.833953-1-usama.anjum@collabora.com Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index f7cfac0da9b6e..1c64da3b2e9f0 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1586,7 +1586,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) wqe = &nvmewqe->wqe; /* Initialize WQE */ - memset(wqe, 0, sizeof(union lpfc_wqe)); + memset(wqe, 0, sizeof(*wqe)); ctx_buf->iocbq->cmd_dmabuf = NULL; spin_lock(&phba->sli4_hba.sgl_list_lock); -- GitLab From d511040d816941d2b227837e01d9b9b1cc416c03 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Thu, 7 Mar 2024 14:14:12 +0000 Subject: [PATCH 1451/2290] scsi: libsas: Add a helper sas_get_sas_addr_and_dev_type() commit a57345279fd311ba679b8083feb0eec5272c7729 upstream. Add a helper to get attached_sas_addr and device type from disc_resp. Suggested-by: John Garry Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20240307141413.48049-2-yangxingui@huawei.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libsas/sas_expander.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 63a23251fb1d8..1b8005c07dff4 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1651,6 +1651,16 @@ out_err: /* ---------- Domain revalidation ---------- */ +static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp, + u8 *sas_addr, + enum sas_device_type *type) +{ + memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE); + *type = to_dev_type(&disc_resp->disc); + if (*type == SAS_PHY_UNUSED) + memset(sas_addr, 0, SAS_ADDR_SIZE); +} + static int sas_get_phy_discover(struct domain_device *dev, int phy_id, struct smp_disc_resp *disc_resp) { @@ -1704,13 +1714,8 @@ int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, return -ENOMEM; res = sas_get_phy_discover(dev, phy_id, disc_resp); - if (res == 0) { - memcpy(sas_addr, disc_resp->disc.attached_sas_addr, - SAS_ADDR_SIZE); - *type = to_dev_type(&disc_resp->disc); - if (*type == 0) - memset(sas_addr, 0, SAS_ADDR_SIZE); - } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type); kfree(disc_resp); return res; } -- GitLab From 98cfafaf2f414e03302c09e4989f3c35744db5ce Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Thu, 7 Mar 2024 14:14:13 +0000 Subject: [PATCH 1452/2290] scsi: libsas: Fix disk not being scanned in after being removed commit 8e68a458bcf5b5cb9c3624598bae28f08251601f upstream. As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info"), do discovery will send a new SMP_DISCOVER and update phy->phy_change_count. We found that if the disk is reconnected and phy change_count changes at this time, the disk scanning process will not be triggered. Therefore, call sas_set_ex_phy() to update the PHY info with the results of the last query. And because the previous phy info will be used when calling sas_unregister_devs_sas_addr(), sas_unregister_devs_sas_addr() should be called before sas_set_ex_phy(). Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info") Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20240307141413.48049-3-yangxingui@huawei.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libsas/sas_expander.c | 32 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 1b8005c07dff4..4b5ceba68e46e 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1977,6 +1977,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; enum sas_device_type type = SAS_PHY_UNUSED; + struct smp_disc_resp *disc_resp; u8 sas_addr[SAS_ADDR_SIZE]; char msg[80] = ""; int res; @@ -1988,33 +1989,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(dev->sas_addr), phy_id, msg); memset(sas_addr, 0, SAS_ADDR_SIZE); - res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type); + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); + if (!disc_resp) + return -ENOMEM; + + res = sas_get_phy_discover(dev, phy_id, disc_resp); switch (res) { case SMP_RESP_NO_PHY: phy->phy_state = PHY_NOT_PRESENT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_PHY_VACANT: phy->phy_state = PHY_VACANT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_FUNC_ACC: break; case -ECOMM: break; default: - return res; + goto out_free_resp; } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type); + if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) { phy->phy_state = PHY_EMPTY; sas_unregister_devs_sas_addr(dev, phy_id, last); /* - * Even though the PHY is empty, for convenience we discover - * the PHY to update the PHY info, like negotiated linkrate. + * Even though the PHY is empty, for convenience we update + * the PHY info, like negotiated linkrate. */ - sas_ex_phy_discover(dev, phy_id); - return res; + if (res == 0) + sas_set_ex_phy(dev, phy_id, disc_resp); + goto out_free_resp; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id); @@ -2026,7 +2035,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, action = ", needs recovery"; pr_debug("ex %016llx phy%02d broadcast flutter%s\n", SAS_ADDR(dev->sas_addr), phy_id, action); - return res; + goto out_free_resp; } /* we always have to delete the old device when we went here */ @@ -2035,7 +2044,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(phy->attached_sas_addr)); sas_unregister_devs_sas_addr(dev, phy_id, last); - return sas_discover_new(dev, phy_id); + res = sas_discover_new(dev, phy_id); +out_free_resp: + kfree(disc_resp); + return res; } /** -- GitLab From df84d9f7796feeb8dea0a4721e269da371982b76 Mon Sep 17 00:00:00 2001 From: Kevin Loughlin Date: Wed, 13 Mar 2024 12:15:46 +0000 Subject: [PATCH 1453/2290] x86/sev: Skip ROM range scans and validation for SEV-SNP guests commit 0f4a1e80989aca185d955fcd791d7750082044a2 upstream. SEV-SNP requires encrypted memory to be validated before access. Because the ROM memory range is not part of the e820 table, it is not pre-validated by the BIOS. Therefore, if a SEV-SNP guest kernel wishes to access this range, the guest must first validate the range. The current SEV-SNP code does indeed scan the ROM range during early boot and thus attempts to validate the ROM range in probe_roms(). However, this behavior is neither sufficient nor necessary for the following reasons: * With regards to sufficiency, if EFI_CONFIG_TABLES are not enabled and CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK is set, the kernel will attempt to access the memory at SMBIOS_ENTRY_POINT_SCAN_START (which falls in the ROM range) prior to validation. For example, Project Oak Stage 0 provides a minimal guest firmware that currently meets these configuration conditions, meaning guests booting atop Oak Stage 0 firmware encounter a problematic call chain during dmi_setup() -> dmi_scan_machine() that results in a crash during boot if SEV-SNP is enabled. * With regards to necessity, SEV-SNP guests generally read garbage (which changes across boots) from the ROM range, meaning these scans are unnecessary. The guest reads garbage because the legacy ROM range is unencrypted data but is accessed via an encrypted PMD during early boot (where the PMD is marked as encrypted due to potentially mapping actually-encrypted data in other PMD-contained ranges). In one exceptional case, EISA probing treats the ROM range as unencrypted data, which is inconsistent with other probing. Continuing to allow SEV-SNP guests to use garbage and to inconsistently classify ROM range encryption status can trigger undesirable behavior. For instance, if garbage bytes appear to be a valid signature, memory may be unnecessarily reserved for the ROM range. Future code or other use cases may result in more problematic (arbitrary) behavior that should be avoided. While one solution would be to overhaul the early PMD mapping to always treat the ROM region of the PMD as unencrypted, SEV-SNP guests do not currently rely on data from the ROM region during early boot (and even if they did, they would be mostly relying on garbage data anyways). As a simpler solution, skip the ROM range scans (and the otherwise- necessary range validation) during SEV-SNP guest early boot. The potential SEV-SNP guest crash due to lack of ROM range validation is thus avoided by simply not accessing the ROM range. In most cases, skip the scans by overriding problematic x86_init functions during sme_early_init() to SNP-safe variants, which can be likened to x86_init overrides done for other platforms (ex: Xen); such overrides also avoid the spread of cc_platform_has() checks throughout the tree. In the exceptional EISA case, still use cc_platform_has() for the simplest change, given (1) checks for guest type (ex: Xen domain status) are already performed here, and (2) these checks occur in a subsys initcall instead of an x86_init function. [ bp: Massage commit message, remove "we"s. ] Fixes: 9704c07bf9f7 ("x86/kernel: Validate ROM memory before accessing when SEV-SNP is active") Signed-off-by: Kevin Loughlin Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20240313121546.2964854-1-kevinloughlin@google.com Signed-off-by: Kevin Loughlin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/sev.h | 4 ++-- arch/x86/include/asm/x86_init.h | 3 ++- arch/x86/kernel/eisa.c | 3 ++- arch/x86/kernel/probe_roms.c | 10 ---------- arch/x86/kernel/setup.c | 3 +-- arch/x86/kernel/sev.c | 27 ++++++++++++--------------- arch/x86/kernel/x86_init.c | 2 ++ arch/x86/mm/mem_encrypt_amd.c | 18 ++++++++++++++++++ 8 files changed, 39 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index cf98fc28601fb..c57dd21155bd7 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -196,12 +196,12 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd unsigned long npages); void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages); -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); +void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); @@ -219,12 +219,12 @@ static inline void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } -static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } +static inline void snp_dmi_setup(void) { } static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 034e62838b284..c3e910b1d5a25 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -30,12 +30,13 @@ struct x86_init_mpparse { * @reserve_resources: reserve the standard resources for the * platform * @memory_setup: platform specific memory setup - * + * @dmi_setup: platform specific DMI setup */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); + void (*dmi_setup)(void); }; /** diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e963344b04490..53935b4d62e30 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -2,6 +2,7 @@ /* * EISA specific code */ +#include #include #include #include @@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void) { void __iomem *p; - if (xen_pv_domain() && !xen_initial_domain()) + if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return 0; p = ioremap(0x0FFFD9, 4); diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 319fef37d9dce..cc2c34ba7228a 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -203,16 +203,6 @@ void __init probe_roms(void) unsigned char c; int i; - /* - * The ROM memory range is not part of the e820 table and is therefore not - * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted - * memory, and SNP requires encrypted memory to be validated before access. - * Do that here. - */ - snp_prep_memory(video_rom_resource.start, - ((system_rom_resource.end + 1) - video_rom_resource.start), - SNP_PAGE_STATE_PRIVATE); - /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 804a252382da7..d1ffac9ad611d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -1032,7 +1031,7 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); - dmi_setup(); + x86_init.resources.dmi_setup(); /* * VMware detection requires dmi to be available, so this diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index f93ff4794e38f..e35fcc8d4bae4 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -768,21 +769,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED); } -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) -{ - unsigned long vaddr, npages; - - vaddr = (unsigned long)__va(paddr); - npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - - if (op == SNP_PAGE_STATE_PRIVATE) - early_snp_set_memory_private(vaddr, paddr, npages); - else if (op == SNP_PAGE_STATE_SHARED) - early_snp_set_memory_shared(vaddr, paddr, npages); - else - WARN(1, "invalid memory op %d\n", op); -} - static int vmgexit_psc(struct snp_psc_desc *desc) { int cur_entry, end_entry, ret = 0; @@ -2152,6 +2138,17 @@ void __init __noreturn snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +/* + * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are + * enabled, as the alternative (fallback) logic for DMI probing in the legacy + * ROM region can cause a crash since this region is not pre-validated. + */ +void __init snp_dmi_setup(void) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + dmi_setup(); +} + static void dump_cpuid_table(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 41e5b4cb898c3..a4a921b9e6646 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -3,6 +3,7 @@ * * For licencing details see kernel-base/COPYING */ +#include #include #include #include @@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, + .dmi_setup = dmi_setup, }, .mpparse = { diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 3e93af083e037..d4957eefef267 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -513,6 +513,24 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ENABLED) ia32_disable(); + + /* + * Override init functions that scan the ROM region in SEV-SNP guests, + * as this memory is not pre-validated and would thus cause a crash. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.pci.init_irq = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; + + /* + * DMI setup behavior for SEV-SNP guests depends on + * efi_enabled(EFI_CONFIG_TABLES), which hasn't been + * parsed yet. snp_dmi_setup() will run after that + * parsing has happened. + */ + x86_init.resources.dmi_setup = snp_dmi_setup; + } } void __init mem_encrypt_free_decrypted_mem(void) -- GitLab From ab062fa3dc69aea88fe62162c5881ba14b50ecc5 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 12 Mar 2024 11:48:23 -0400 Subject: [PATCH 1454/2290] USB: core: Fix deadlock in usb_deauthorize_interface() commit 80ba43e9f799cbdd83842fc27db667289b3150f5 upstream. Among the attribute file callback routines in drivers/usb/core/sysfs.c, the interface_authorized_store() function is the only one which acquires a device lock on an ancestor device: It calls usb_deauthorize_interface(), which locks the interface's parent USB device. The will lead to deadlock if another process already owns that lock and tries to remove the interface, whether through a configuration change or because the device has been disconnected. As part of the removal procedure, device_del() waits for all ongoing sysfs attribute callbacks to complete. But usb_deauthorize_interface() can't complete until the device lock has been released, and the lock won't be released until the removal has finished. The mechanism provided by sysfs to prevent this kind of deadlock is to use the sysfs_break_active_protection() function, which tells sysfs not to wait for the attribute callback. Reported-and-tested by: Yue Sun Reported by: xingwei lee Signed-off-by: Alan Stern Link: https://lore.kernel.org/linux-usb/CAEkJfYO6jRVC8Tfrd_R=cjO0hguhrV31fDPrLrNOOHocDkPoAA@mail.gmail.com/#r Fixes: 310d2b4124c0 ("usb: interface authorization: SysFS part of USB interface authorization") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1c37eea1-9f56-4534-b9d8-b443438dc869@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/sysfs.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index ccf6cd9722693..5f1e07341f363 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -1170,14 +1170,24 @@ static ssize_t interface_authorized_store(struct device *dev, { struct usb_interface *intf = to_usb_interface(dev); bool val; + struct kernfs_node *kn; if (strtobool(buf, &val) != 0) return -EINVAL; - if (val) + if (val) { usb_authorize_interface(intf); - else - usb_deauthorize_interface(intf); + } else { + /* + * Prevent deadlock if another process is concurrently + * trying to unregister intf. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (kn) { + usb_deauthorize_interface(intf); + sysfs_unbreak_active_protection(kn); + } + } return count; } -- GitLab From 7b970a145c90260dee678065a9da97c90863a0ef Mon Sep 17 00:00:00 2001 From: Natanael Copa Date: Thu, 28 Mar 2024 11:59:13 +0100 Subject: [PATCH 1455/2290] tools/resolve_btfids: fix build with musl libc commit 62248b22d01e96a4d669cde0d7005bd51ebf9e76 upstream. Include the header that defines u32. This fixes build of 6.6.23 and 6.1.83 kernels for Alpine Linux, which uses musl libc. I assume that GNU libc indirecly pulls in linux/types.h. Fixes: 9707ac4fe2f5 ("tools/resolve_btfids: Refactor set sorting with types from btf_ids.h") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218647 Cc: stable@vger.kernel.org Signed-off-by: Natanael Copa Tested-by: Greg Thelen Link: https://lore.kernel.org/r/20240328110103.28734-1-ncopa@alpinelinux.org Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- tools/include/linux/btf_ids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 72535f00572f6..72ea363d434db 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -3,6 +3,8 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#include /* for u32 */ + struct btf_id_set { u32 cnt; u32 ids[]; -- GitLab From 347385861c50adc8d4801d4b899eded38a2f04cd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Apr 2024 15:19:55 +0200 Subject: [PATCH 1456/2290] Linux 6.1.84 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240401152530.237785232@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Kelsey Steele Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Mark Brown Tested-by: Mateusz Jończyk Tested-by: Jon Hunter Tested-by: Salvatore Bonaccorso Tested-by: Yann Sionneau Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 38657b3dda2cd..0e33150db2bfc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 83 +SUBLEVEL = 84 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 8333aae9bba1f007b1b3bfdcbc7262ff79d10227 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Mar 2024 10:34:43 +0800 Subject: [PATCH 1457/2290] scripts/bpf_doc: Use silent mode when exec make cmd [ Upstream commit 5384cc0d1a88c27448a6a4e65b8abe6486de8012 ] When getting kernel version via make, the result may be polluted by other output, like directory change info. e.g. $ export MAKEFLAGS="-w" $ make kernelversion make: Entering directory '/home/net' 6.8.0 make: Leaving directory '/home/net' This will distort the reStructuredText output and make latter rst2man failed like: [...] bpf-helpers.rst:20: (WARNING/2) Field list ends without a blank line; unexpected unindent. [...] Using silent mode would help. e.g. $ make -s --no-print-directory kernelversion 6.8.0 Fixes: fd0a38f9c37d ("scripts/bpf: Set version attribute for bpf-helpers(7) man page") Signed-off-by: Michael Hofmann Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Alejandro Colomar Link: https://lore.kernel.org/bpf/20240315023443.2364442-1-liuhangbin@gmail.com Signed-off-by: Sasha Levin --- scripts/bpf_doc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 4de98b7bbea95..c2da6ed32104f 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -396,8 +396,8 @@ class PrinterRST(Printer): version = version.stdout.decode().rstrip() except: try: - version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot, - capture_output=True, check=True) + version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'], + cwd=linuxRoot, capture_output=True, check=True) version = version.stdout.decode().rstrip() except: return 'Linux' -- GitLab From 0336995512cdab0c65e99e4cdd47c4606debe14e Mon Sep 17 00:00:00 2001 From: Pavel Sakharov Date: Wed, 20 Mar 2024 04:15:23 +0500 Subject: [PATCH 1458/2290] dma-buf: Fix NULL pointer dereference in sanitycheck() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2295bd846765c766701e666ed2e4b35396be25e6 ] If due to a memory allocation failure mock_chain() returns NULL, it is passed to dma_fence_enable_sw_signaling() resulting in NULL pointer dereference there. Call dma_fence_enable_sw_signaling() only if mock_chain() succeeds. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d62c43a953ce ("dma-buf: Enable signaling on fence for selftests") Signed-off-by: Pavel Sakharov Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240319231527.1821372-1-p.sakharov@ispras.ru Signed-off-by: Sasha Levin --- drivers/dma-buf/st-dma-fence-chain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index 0a9b099d05187..d90479d830fc3 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -84,11 +84,11 @@ static int sanitycheck(void *arg) return -ENOMEM; chain = mock_chain(NULL, f, 1); - if (!chain) + if (chain) + dma_fence_enable_sw_signaling(chain); + else err = -ENOMEM; - dma_fence_enable_sw_signaling(chain); - dma_fence_signal(f); dma_fence_put(f); -- GitLab From b51ec7fc9f877ef869c01d3ea6f18f6a64e831a7 Mon Sep 17 00:00:00 2001 From: Ryosuke Yasuoka Date: Wed, 20 Mar 2024 09:54:10 +0900 Subject: [PATCH 1459/2290] nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet [ Upstream commit d24b03535e5eb82e025219c2f632b485409c898f ] syzbot reported the following uninit-value access issue [1][2]: nci_rx_work() parses and processes received packet. When the payload length is zero, each message type handler reads uninitialized payload and KMSAN detects this issue. The receipt of a packet with a zero-size payload is considered unexpected, and therefore, such packets should be silently discarded. This patch resolved this issue by checking payload size before calling each message type handler codes. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Reported-and-tested-by: syzbot+7ea9413ea6749baf5574@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+29b5ca705d2e0f4a44d2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7ea9413ea6749baf5574 [1] Closes: https://syzkaller.appspot.com/bug?extid=29b5ca705d2e0f4a44d2 [2] Signed-off-by: Ryosuke Yasuoka Reviewed-by: Jeremy Cline Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/nfc/nci/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index b5071a2f597d4..f76a2d8060340 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1512,6 +1512,11 @@ static void nci_rx_work(struct work_struct *work) nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + if (!nci_plen(skb->data)) { + kfree_skb(skb); + break; + } + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: -- GitLab From 57beec623ac5314c5e0e957624c7517d9efdfa59 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 20 Mar 2024 15:31:17 -0400 Subject: [PATCH 1460/2290] mlxbf_gige: stop PHY during open() error paths [ Upstream commit d6c30c5a168f8586b8bcc0d8e42e2456eb05209b ] The mlxbf_gige_open() routine starts the PHY as part of normal initialization. The mlxbf_gige_open() routine must stop the PHY during its error paths. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: David Thompson Reviewed-by: Asmaa Mnebhi Reviewed-by: Andrew Lunn Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 83c4659390fd5..113e3d9d33530 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -157,7 +157,7 @@ static int mlxbf_gige_open(struct net_device *netdev) err = mlxbf_gige_tx_init(priv); if (err) - goto free_irqs; + goto phy_deinit; err = mlxbf_gige_rx_init(priv); if (err) goto tx_deinit; @@ -185,6 +185,9 @@ static int mlxbf_gige_open(struct net_device *netdev) tx_deinit: mlxbf_gige_tx_deinit(priv); +phy_deinit: + phy_stop(phydev); + free_irqs: mlxbf_gige_free_irqs(priv); return err; -- GitLab From 99a75d75007421d8e08ba139e24f77395cd08f62 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2024 10:10:17 +0200 Subject: [PATCH 1461/2290] wifi: iwlwifi: mvm: rfi: fix potential response leaks [ Upstream commit 06a093807eb7b5c5b29b6cff49f8174a4e702341 ] If the rx payload length check fails, or if kmemdup() fails, we still need to free the command response. Fix that. Fixes: 21254908cbe9 ("iwlwifi: mvm: add RFI-M support") Co-authored-by: Anjaneyulu Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240319100755.db2fa0196aa7.I116293b132502ac68a65527330fa37799694b79c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rfi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c index bb77bc9aa8218..fb2408c0551d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c @@ -122,14 +122,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm) if (ret) return ERR_PTR(ret); - if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size)) + if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != + resp_size)) { + iwl_free_resp(&cmd); return ERR_PTR(-EIO); + } resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL); + iwl_free_resp(&cmd); + if (!resp) return ERR_PTR(-ENOMEM); - iwl_free_resp(&cmd); return resp; } -- GitLab From 231b189fa1a14a8ee8c2c14a81834cea4019fb23 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Tue, 5 Mar 2024 17:02:02 +0100 Subject: [PATCH 1462/2290] ixgbe: avoid sleeping allocation in ixgbe_ipsec_vf_add_sa() [ Upstream commit aec806fb4afba5fe80b09e29351379a4292baa43 ] Change kzalloc() flags used in ixgbe_ipsec_vf_add_sa() to GFP_ATOMIC, to avoid sleeping in IRQ context. Dan Carpenter, with the help of Smatch, has found following issue: The patch eda0333ac293: "ixgbe: add VF IPsec management" from Aug 13, 2018 (linux-next), leads to the following Smatch static checker warning: drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c:917 ixgbe_ipsec_vf_add_sa() warn: sleeping in IRQ context The call tree that Smatch is worried about is: ixgbe_msix_other() <- IRQ handler -> ixgbe_msg_task() -> ixgbe_rcv_msg_from_vf() -> ixgbe_ipsec_vf_add_sa() Fixes: eda0333ac293 ("ixgbe: add VF IPsec management") Reported-by: Dan Carpenter Link: https://lore.kernel.org/intel-wired-lan/db31a0b0-4d9f-4e6b-aed8-88266eb5665c@moroto.mountain Reviewed-by: Michal Kubiak Signed-off-by: Przemek Kitszel Reviewed-by: Shannon Nelson Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 774de63dd93a6..15fc2acffb871 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -908,7 +908,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) goto err_out; } - xs = kzalloc(sizeof(*xs), GFP_KERNEL); + algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); + if (unlikely(!algo)) { + err = -ENOENT; + goto err_out; + } + + xs = kzalloc(sizeof(*xs), GFP_ATOMIC); if (unlikely(!xs)) { err = -ENOMEM; goto err_out; @@ -924,14 +930,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4)); xs->xso.dev = adapter->netdev; - algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); - if (unlikely(!algo)) { - err = -ENOENT; - goto err_xs; - } - aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8; - xs->aead = kzalloc(aead_len, GFP_KERNEL); + xs->aead = kzalloc(aead_len, GFP_ATOMIC); if (unlikely(!xs->aead)) { err = -ENOMEM; goto err_xs; -- GitLab From 16307e7bc1120e87f243d00b8e403da9cfaa60d6 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 21 Mar 2024 12:53:37 +0100 Subject: [PATCH 1463/2290] s390/qeth: handle deferred cc1 [ Upstream commit afb373ff3f54c9d909efc7f810dc80a9742807b2 ] The IO subsystem expects a driver to retry a ccw_device_start, when the subsequent interrupt response block (irb) contains a deferred condition code 1. Symptoms before this commit: On the read channel we always trigger the next read anyhow, so no different behaviour here. On the write channel we may experience timeout errors, because the expected reply will never be received without the retry. Other callers of qeth_send_control_data() may wrongly assume that the ccw was successful, which may cause problems later. Note that since commit 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") and commit 5ef1dc40ffa6 ("s390/cio: fix invalid -EBUSY on ccw_device_start") deferred CC1s are much more likely to occur. See the commit message of the latter for more background information. Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Signed-off-by: Alexandra Winter Co-developed-by: Thorsten Winkler Signed-off-by: Thorsten Winkler Reviewed-by: Peter Oberparleiter Link: https://lore.kernel.org/r/20240321115337.3564694-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 38 +++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index ae4b6d24bc902..1e6340e2c2588 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, } } +/** + * qeth_irq() - qeth interrupt handler + * @cdev: ccw device + * @intparm: expect pointer to iob + * @irb: Interruption Response Block + * + * In the good path: + * corresponding qeth channel is locked with last used iob as active_cmd. + * But this function is also called for error interrupts. + * + * Caller ensures that: + * Interrupts are disabled; ccw device lock is held; + * + */ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { @@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, iob = (struct qeth_cmd_buffer *) (addr_t)intparm; } - qeth_unlock_channel(card, channel); - rc = qeth_check_irb_error(card, cdev, irb); if (rc) { /* IO was terminated, free its resources. */ + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); return; @@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); @@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } } + if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) { + /* channel command hasn't started: retry. + * active_cmd is still set to last iob + */ + QETH_CARD_TEXT(card, 2, "irqcc1"); + rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob), + (addr_t)iob, 0, 0, iob->timeout); + if (rc) { + QETH_DBF_MESSAGE(2, + "ccw retry on %x failed, rc = %i\n", + CARD_DEVID(card), rc); + QETH_CARD_TEXT_(card, 2, " err%d", rc); + qeth_unlock_channel(card, channel); + qeth_cancel_cmd(iob, rc); + } + return; + } + + qeth_unlock_channel(card, channel); + if (iob) { /* sanity check: */ if (irb->scsw.cmd.count > iob->length) { -- GitLab From 91b243de910a9ac8476d40238ab3dbfeedd5b7de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2024 13:57:32 +0000 Subject: [PATCH 1464/2290] tcp: properly terminate timers for kernel sockets [ Upstream commit 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada ] We had various syzbot reports about tcp timers firing after the corresponding netns has been dismantled. Fortunately Josef Bacik could trigger the issue more often, and could test a patch I wrote two years ago. When TCP sockets are closed, we call inet_csk_clear_xmit_timers() to 'stop' the timers. inet_csk_clear_xmit_timers() can be called from any context, including when socket lock is held. This is the reason it uses sk_stop_timer(), aka del_timer(). This means that ongoing timers might finish much later. For user sockets, this is fine because each running timer holds a reference on the socket, and the user socket holds a reference on the netns. For kernel sockets, we risk that the netns is freed before timer can complete, because kernel sockets do not hold reference on the netns. This patch adds inet_csk_clear_xmit_timers_sync() function that using sk_stop_timer_sync() to make sure all timers are terminated before the kernel socket is released. Modules using kernel sockets close them in their netns exit() handler. Also add sock_not_owned_by_me() helper to get LOCKDEP support : inet_csk_clear_xmit_timers_sync() must not be called while socket lock is held. It is very possible we can revert in the future commit 3a58f13a881e ("net: rds: acquire refcount on TCP sockets") which attempted to solve the issue in rds only. (net/smc/af_smc.c and net/mptcp/subflow.c have similar code) We probably can remove the check_net() tests from tcp_out_of_resources() and __tcp_close() in the future. Reported-by: Josef Bacik Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/ Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket") Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgsatjw@mail.gmail.com/ Signed-off-by: Eric Dumazet Tested-by: Josef Bacik Cc: Tetsuo Handa Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/inet_connection_sock.h | 1 + include/net/sock.h | 7 +++++++ net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ net/ipv4/tcp.c | 2 ++ 4 files changed, 24 insertions(+) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 080968d6e6c53..8132f330306db 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -172,6 +172,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); +void inet_csk_clear_xmit_timers_sync(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { diff --git a/include/net/sock.h b/include/net/sock.h index 579732d47dfc4..60577751ea9e8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1833,6 +1833,13 @@ static inline void sock_owned_by_me(const struct sock *sk) #endif } +static inline void sock_not_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); +#endif +} + static inline bool sock_owned_by_user(const struct sock *sk) { sock_owned_by_me(sk); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 79fa19a36bbd1..f7832d4253820 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -771,6 +771,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) } EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +void inet_csk_clear_xmit_timers_sync(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* ongoing timer handlers need to acquire socket lock. */ + sock_not_owned_by_me(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; + + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); + sk_stop_timer_sync(sk, &sk->sk_timer); +} + void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5a165e29f7be4..f01c0a5d2c37b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3052,6 +3052,8 @@ void tcp_close(struct sock *sk, long timeout) lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); + if (!sk->sk_net_refcnt) + inet_csk_clear_xmit_timers_sync(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); -- GitLab From beaf0e7996b79e06ccc2bdcb4442fbaeccc31200 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 22 Mar 2024 15:40:00 +0100 Subject: [PATCH 1465/2290] net: wwan: t7xx: Split 64bit accesses to fix alignment issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d5a7dd5a35876f0ecc286f3602a88887a788217 ] Some of the registers are aligned on a 32bit boundary, causing alignment faults on 64bit platforms. Unable to handle kernel paging request at virtual address ffffffc084a1d004 Mem abort info: ESR = 0x0000000096000061 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x21: alignment fault Data abort info: ISV = 0, ISS = 0x00000061, ISS2 = 0x00000000 CM = 0, WnR = 1, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000046ad6000 [ffffffc084a1d004] pgd=100000013ffff003, p4d=100000013ffff003, pud=100000013ffff003, pmd=0068000020a00711 Internal error: Oops: 0000000096000061 [#1] SMP Modules linked in: mtk_t7xx(+) qcserial pppoe ppp_async option nft_fib_inet nf_flow_table_inet mt7921u(O) mt7921s(O) mt7921e(O) mt7921_common(O) iwlmvm(O) iwldvm(O) usb_wwan rndis_host qmi_wwan pppox ppp_generic nft_reject_ipv6 nft_reject_ipv4 nft_reject_inet nft_reject nft_redir nft_quota nft_numgen nft_nat nft_masq nft_log nft_limit nft_hash nft_flow_offload nft_fib_ipv6 nft_fib_ipv4 nft_fib nft_ct nft_chain_nat nf_tables nf_nat nf_flow_table nf_conntrack mt7996e(O) mt792x_usb(O) mt792x_lib(O) mt7915e(O) mt76_usb(O) mt76_sdio(O) mt76_connac_lib(O) mt76(O) mac80211(O) iwlwifi(O) huawei_cdc_ncm cfg80211(O) cdc_ncm cdc_ether wwan usbserial usbnet slhc sfp rtc_pcf8563 nfnetlink nf_reject_ipv6 nf_reject_ipv4 nf_log_syslog nf_defrag_ipv6 nf_defrag_ipv4 mt6577_auxadc mdio_i2c libcrc32c compat(O) cdc_wdm cdc_acm at24 crypto_safexcel pwm_fan i2c_gpio i2c_smbus industrialio i2c_algo_bit i2c_mux_reg i2c_mux_pca954x i2c_mux_pca9541 i2c_mux_gpio i2c_mux dummy oid_registry tun sha512_arm64 sha1_ce sha1_generic seqiv md5 geniv des_generic libdes cbc authencesn authenc leds_gpio xhci_plat_hcd xhci_pci xhci_mtk_hcd xhci_hcd nvme nvme_core gpio_button_hotplug(O) dm_mirror dm_region_hash dm_log dm_crypt dm_mod dax usbcore usb_common ptp aquantia pps_core mii tpm encrypted_keys trusted CPU: 3 PID: 5266 Comm: kworker/u9:1 Tainted: G O 6.6.22 #0 Hardware name: Bananapi BPI-R4 (DT) Workqueue: md_hk_wq t7xx_fsm_uninit [mtk_t7xx] pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : t7xx_cldma_hw_set_start_addr+0x1c/0x3c [mtk_t7xx] lr : t7xx_cldma_start+0xac/0x13c [mtk_t7xx] sp : ffffffc085d63d30 x29: ffffffc085d63d30 x28: 0000000000000000 x27: 0000000000000000 x26: 0000000000000000 x25: ffffff80c804f2c0 x24: ffffff80ca196c05 x23: 0000000000000000 x22: ffffff80c814b9b8 x21: ffffff80c814b128 x20: 0000000000000001 x19: ffffff80c814b080 x18: 0000000000000014 x17: 0000000055c9806b x16: 000000007c5296d0 x15: 000000000f6bca68 x14: 00000000dbdbdce4 x13: 000000001aeaf72a x12: 0000000000000001 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffffff80ca1ef6b4 x7 : ffffff80c814b818 x6 : 0000000000000018 x5 : 0000000000000870 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 000000010a947000 x1 : ffffffc084a1d004 x0 : ffffffc084a1d004 Call trace: t7xx_cldma_hw_set_start_addr+0x1c/0x3c [mtk_t7xx] t7xx_fsm_uninit+0x578/0x5ec [mtk_t7xx] process_one_work+0x154/0x2a0 worker_thread+0x2ac/0x488 kthread+0xe0/0xec ret_from_fork+0x10/0x20 Code: f9400800 91001000 8b214001 d50332bf (f9000022) ---[ end trace 0000000000000000 ]--- The inclusion of io-64-nonatomic-lo-hi.h indicates that all 64bit accesses can be replaced by pairs of nonatomic 32bit access. Fix alignment by forcing all accesses to be 32bit on 64bit platforms. Link: https://forum.openwrt.org/t/fibocom-fm350-gl-support/142682/72 Fixes: 39d439047f1d ("net: wwan: t7xx: Add control DMA interface") Signed-off-by: Bjørn Mork Reviewed-by: Sergey Ryazanov Tested-by: Liviu Dudau Link: https://lore.kernel.org/r/20240322144000.1683822-1-bjorn@mork.no Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wwan/t7xx/t7xx_cldma.c | 4 ++-- drivers/net/wwan/t7xx/t7xx_hif_cldma.c | 9 +++++---- drivers/net/wwan/t7xx/t7xx_pcie_mac.c | 8 ++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c index 9f43f256db1d0..f0a4783baf1f3 100644 --- a/drivers/net/wwan/t7xx/t7xx_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_cldma.c @@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno) { u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE; - return ioread64(hw_info->ap_pdn_base + offset); + return ioread64_lo_hi(hw_info->ap_pdn_base + offset); } void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address, @@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 : hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0; - iowrite64(address, reg + offset); + iowrite64_lo_hi(address, reg + offset); } void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno, diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c index 6ff30cb8eb16f..5d6032ceb9e51 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c @@ -139,8 +139,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool return -ENODEV; } - gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base + + REG_CLDMA_DL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100) return 0; @@ -318,8 +319,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue) struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info; /* Check current processing TGPD, 64-bit address is in a table by Q index */ - ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr != ul_curr_addr) { spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags); dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n", diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c index 76da4c15e3de1..f071ec7ff23d5 100644 --- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c +++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c @@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_ for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) { offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i; reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; - iowrite64(0, reg); + iowrite64_lo_hi(0, reg); } } @@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_ reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset; value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT; - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset; iowrite32(cfg->trsl_id, reg); reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0); - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); /* Ensure ATR is set */ - ioread64(reg); + ioread64_lo_hi(reg); return 0; } -- GitLab From ebabdae52f2a0ec378baf2c6e3a5f22725ee0b4f Mon Sep 17 00:00:00 2001 From: Nikita Kiryushin Date: Fri, 22 Mar 2024 21:07:53 +0300 Subject: [PATCH 1466/2290] ACPICA: debugger: check status of acpi_evaluate_object() in acpi_db_walk_for_fields() [ Upstream commit 40e2710860e57411ab57a1529c5a2748abbe8a19 ] ACPICA commit 9061cd9aa131205657c811a52a9f8325a040c6c9 Errors in acpi_evaluate_object() can lead to incorrect state of buffer. This can lead to access to data in previously ACPI_FREEd buffer and secondary ACPI_FREE to the same buffer later. Handle errors in acpi_evaluate_object the same way it is done earlier with acpi_ns_handle_to_pathname. Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://github.com/acpica/acpica/commit/9061cd9a Fixes: 5fd033288a86 ("ACPICA: debugger: add command to dump all fields of particular subtype") Signed-off-by: Nikita Kiryushin Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpica/dbnames.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c index b91155ea9c343..c9131259f717b 100644 --- a/drivers/acpi/acpica/dbnames.c +++ b/drivers/acpi/acpica/dbnames.c @@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle, ACPI_FREE(buffer.pointer); buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); - + status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) { + acpi_os_printf("Could Not evaluate object %p\n", + obj_handle); + return (AE_OK); + } /* * Since this is a field unit, surround the output in braces */ -- GitLab From b8b533eeee5ba53950d7e1358c7fa1614013cdf6 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 25 Mar 2024 20:43:09 +0800 Subject: [PATCH 1467/2290] net: hns3: fix index limit to support all queue stats [ Upstream commit 47e39d213e09c6cae0d6b4d95e454ea404013312 ] Currently, hns hardware supports more than 512 queues and the index limit in hclge_comm_tqps_update_stats is wrong. So this patch removes it. Fixes: 287db5c40d15 ("net: hns3: create new set of common tqp stats APIs for PF and VF reuse") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Reviewed-by: Michal Kubiak Reviewed-by: Kalesh AP Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c index f3c9395d8351c..618f66d9586b3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c @@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS, true); - desc.data[0] = cpu_to_le32(tqp->index & 0x1ff); + desc.data[0] = cpu_to_le32(tqp->index); ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, -- GitLab From 50b69054f455dcdb34bd6b22764c7579b270eef3 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 25 Mar 2024 20:43:10 +0800 Subject: [PATCH 1468/2290] net: hns3: fix kernel crash when devlink reload during pf initialization [ Upstream commit 93305b77ffcb042f1538ecc383505e87d95aa05a ] The devlink reload process will access the hardware resources, but the register operation is done before the hardware is initialized. So, processing the devlink reload during initialization may lead to kernel crash. This patch fixes this by taking devl_lock during initialization. Fixes: b741269b2759 ("net: hns3: add support for registering devlink for PF") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 27037ce795902..9db363fbc34fd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11604,6 +11604,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_pci_uninit; + devl_lock(hdev->devlink); + /* Firmware command queue initialize */ ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) @@ -11778,6 +11780,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + devl_unlock(hdev->devlink); return 0; err_mdiobus_unreg: @@ -11790,6 +11793,7 @@ err_msi_uninit: err_cmd_uninit: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); err_devlink_uninit: + devl_unlock(hdev->devlink); hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.hw.io_base); -- GitLab From 0e111ce740fc705a4cde38df41ecc8e61ba0e5ef Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 25 Mar 2024 20:43:11 +0800 Subject: [PATCH 1469/2290] net: hns3: mark unexcuted loopback test result as UNEXECUTED [ Upstream commit 5bd088d6c21a45ee70e6116879310e54174d75eb ] Currently, loopback test may be skipped when resetting, but the test result will still show as 'PASS', because the driver doesn't set ETH_TEST_FL_FAILED flag. Fix it by setting the flag and initializating the value to UNEXECUTED. Fixes: 4c8dab1c709c ("net: hns3: reconstruct function hns3_self_test") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Michal Kubiak Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3_ethtool.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index e22835ae8a941..9fce976a08f01 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 +#define HNS3_NIC_LB_TEST_UNEXECUTED 4 + +static int hns3_get_sset_count(struct net_device *netdev, int stringset); static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en) { @@ -419,18 +422,26 @@ static void hns3_do_external_lb(struct net_device *ndev, static void hns3_self_test(struct net_device *ndev, struct ethtool_test *eth_test, u64 *data) { + int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST); struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; int st_param[HNAE3_LOOP_NONE][2]; bool if_running = netif_running(ndev); + int i; + + /* initialize the loopback test result, avoid marking an unexcuted + * loopback test as PASS. + */ + for (i = 0; i < cnt; i++) + data[i] = HNS3_NIC_LB_TEST_UNEXECUTED; if (hns3_nic_resetting(ndev)) { netdev_err(ndev, "dev resetting!"); - return; + goto failure; } if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) - return; + goto failure; if (netif_msg_ifdown(h)) netdev_info(ndev, "self test start\n"); @@ -452,6 +463,10 @@ static void hns3_self_test(struct net_device *ndev, if (netif_msg_ifdown(h)) netdev_info(ndev, "self test end\n"); + return; + +failure: + eth_test->flags |= ETH_TEST_FL_FAILED; } static void hns3_update_limit_promisc_mode(struct net_device *netdev, -- GitLab From efb4573feaa54bb3cc04db1d2334d18f4a9aa322 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:45 +0100 Subject: [PATCH 1470/2290] tls: recv: process_rx_list shouldn't use an offset with kvec [ Upstream commit 7608a971fdeb4c3eefa522d1bfe8d4bc6b2481cc ] Only MSG_PEEK needs to copy from an offset during the final process_rx_list call, because the bytes we copied at the beginning of tls_sw_recvmsg were left on the rx_list. In the KVEC case, we removed data from the rx_list as we were copying it, so there's no need to use an offset, just like in the normal case. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/e5487514f828e0347d2b92ca40002c62b58af73d.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index bdb5153f3788a..e40f6ed65e6a2 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2238,7 +2238,7 @@ recv_end: } /* Drain records from the rx_list & copy if required */ - if (is_peek || is_kvec) + if (is_peek) err = process_rx_list(ctx, msg, &control, copied + peeked, decrypted - peeked, is_peek, NULL); else -- GitLab From f52c8f1210da45d905c9d3b7a3ff4ea02c692fd7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:46 +0100 Subject: [PATCH 1471/2290] tls: adjust recv return with async crypto and failed copy to userspace [ Upstream commit 85eef9a41d019b59be7bc91793f26251909c0710 ] process_rx_list may not copy as many bytes as we want to the userspace buffer, for example in case we hit an EFAULT during the copy. If this happens, we should only count the bytes that were actually copied, which may be 0. Subtracting async_copy_bytes is correct in both peek and !peek cases, because decrypted == async_copy_bytes + peeked for the peek case: peek is always !ZC, and we can go through either the sync or async path. In the async case, we add chunk to both decrypted and async_copy_bytes. In the sync case, we add chunk to both decrypted and peeked. I missed that in commit 6caaf104423d ("tls: fix peeking with sync+async decryption"). Fixes: 4d42cd6bc2ac ("tls: rx: fix return value for async crypto") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1b5a1eaab3c088a9dd5d9f1059ceecd7afe888d1.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e40f6ed65e6a2..7166c0606527f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2244,6 +2244,9 @@ recv_end: else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); + + /* we could have copied less than we wanted, and possibly nothing */ + decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; -- GitLab From 30fabe50a7ace3e9d57cf7f9288f33ea408491c8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:48 +0100 Subject: [PATCH 1472/2290] tls: get psock ref after taking rxlock to avoid leak [ Upstream commit 417e91e856099e9b8a42a2520e2255e6afe024be ] At the start of tls_sw_recvmsg, we take a reference on the psock, and then call tls_rx_reader_lock. If that fails, we return directly without releasing the reference. Instead of adding a new label, just take the reference after locking has succeeded, since we don't need it before. Fixes: 4cbc325ed6b4 ("tls: rx: allow only one reader at a time") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/fe2ade22d030051ce4c3638704ed58b67d0df643.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7166c0606527f..348abadbc2d82 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2062,10 +2062,10 @@ int tls_sw_recvmsg(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); - psock = sk_psock_get(sk); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; + psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ -- GitLab From 24444af5ddf729376b90db0f135fa19973cb5dab Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 25 Mar 2024 14:36:27 -0400 Subject: [PATCH 1473/2290] mlxbf_gige: call request_irq() after NAPI initialized [ Upstream commit f7442a634ac06b953fc1f7418f307b25acd4cfbc ] The mlxbf_gige driver encounters a NULL pointer exception in mlxbf_gige_open() when kdump is enabled. The sequence to reproduce the exception is as follows: a) enable kdump b) trigger kdump via "echo c > /proc/sysrq-trigger" c) kdump kernel executes d) kdump kernel loads mlxbf_gige module e) the mlxbf_gige module runs its open() as the the "oob_net0" interface is brought up f) mlxbf_gige module will experience an exception during its open(), something like: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000004 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=00000000e29a4000 [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000086000004 [#1] SMP CPU: 0 PID: 812 Comm: NetworkManager Tainted: G OE 5.15.0-1035-bluefield #37-Ubuntu Hardware name: https://www.mellanox.com BlueField-3 SmartNIC Main Card/BlueField-3 SmartNIC Main Card, BIOS 4.6.0.13024 Jan 19 2024 pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : 0x0 lr : __napi_poll+0x40/0x230 sp : ffff800008003e00 x29: ffff800008003e00 x28: 0000000000000000 x27: 00000000ffffffff x26: ffff000066027238 x25: ffff00007cedec00 x24: ffff800008003ec8 x23: 000000000000012c x22: ffff800008003eb7 x21: 0000000000000000 x20: 0000000000000001 x19: ffff000066027238 x18: 0000000000000000 x17: ffff578fcb450000 x16: ffffa870b083c7c0 x15: 0000aaab010441d0 x14: 0000000000000001 x13: 00726f7272655f65 x12: 6769675f6662786c x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa870b0842398 x8 : 0000000000000004 x7 : fe5a48b9069706ea x6 : 17fdb11fc84ae0d2 x5 : d94a82549d594f35 x4 : 0000000000000000 x3 : 0000000000400100 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000066027238 Call trace: 0x0 net_rx_action+0x178/0x360 __do_softirq+0x15c/0x428 __irq_exit_rcu+0xac/0xec irq_exit+0x18/0x2c handle_domain_irq+0x6c/0xa0 gic_handle_irq+0xec/0x1b0 call_on_irq_stack+0x20/0x2c do_interrupt_handler+0x5c/0x70 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x18/0x2c el1h_64_irq+0x7c/0x80 __setup_irq+0x4c0/0x950 request_threaded_irq+0xf4/0x1bc mlxbf_gige_request_irqs+0x68/0x110 [mlxbf_gige] mlxbf_gige_open+0x5c/0x170 [mlxbf_gige] __dev_open+0x100/0x220 __dev_change_flags+0x16c/0x1f0 dev_change_flags+0x2c/0x70 do_setlink+0x220/0xa40 __rtnl_newlink+0x56c/0x8a0 rtnl_newlink+0x58/0x84 rtnetlink_rcv_msg+0x138/0x3c4 netlink_rcv_skb+0x64/0x130 rtnetlink_rcv+0x20/0x30 netlink_unicast+0x2ec/0x360 netlink_sendmsg+0x278/0x490 __sock_sendmsg+0x5c/0x6c ____sys_sendmsg+0x290/0x2d4 ___sys_sendmsg+0x84/0xd0 __sys_sendmsg+0x70/0xd0 __arm64_sys_sendmsg+0x2c/0x40 invoke_syscall+0x78/0x100 el0_svc_common.constprop.0+0x54/0x184 do_el0_svc+0x30/0xac el0_svc+0x48/0x160 el0t_64_sync_handler+0xa4/0x12c el0t_64_sync+0x1a4/0x1a8 Code: bad PC value ---[ end trace 7d1c3f3bf9d81885 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt Kernel Offset: 0x2870a7a00000 from 0xffff800008000000 PHYS_OFFSET: 0x80000000 CPU features: 0x0,000005c1,a3332a5a Memory Limit: none ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- The exception happens because there is a pending RX interrupt before the call to request_irq(RX IRQ) executes. Then, the RX IRQ handler fires immediately after this request_irq() completes. The RX IRQ handler runs "napi_schedule()" before NAPI is fully initialized via "netif_napi_add()" and "napi_enable()", both which happen later in the open() logic. The logic in mlxbf_gige_open() must fully initialize NAPI before any calls to request_irq() execute. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: David Thompson Reviewed-by: Asmaa Mnebhi Link: https://lore.kernel.org/r/20240325183627.7641-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../mellanox/mlxbf_gige/mlxbf_gige_main.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 113e3d9d33530..65e92541db6e5 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -139,13 +139,10 @@ static int mlxbf_gige_open(struct net_device *netdev) control |= MLXBF_GIGE_CONTROL_PORT_EN; writeq(control, priv->base + MLXBF_GIGE_CONTROL); - err = mlxbf_gige_request_irqs(priv); - if (err) - return err; mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); if (err) - goto free_irqs; + return err; /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -166,6 +163,10 @@ static int mlxbf_gige_open(struct net_device *netdev) napi_enable(&priv->napi); netif_start_queue(netdev); + err = mlxbf_gige_request_irqs(priv); + if (err) + goto napi_deinit; + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -182,14 +183,17 @@ static int mlxbf_gige_open(struct net_device *netdev) return 0; +napi_deinit: + netif_stop_queue(netdev); + napi_disable(&priv->napi); + netif_napi_del(&priv->napi); + mlxbf_gige_rx_deinit(priv); + tx_deinit: mlxbf_gige_tx_deinit(priv); phy_deinit: phy_stop(phydev); - -free_irqs: - mlxbf_gige_free_irqs(priv); return err; } -- GitLab From 98cdac206b112bec63852e94802791e316acc2c1 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Tue, 26 Mar 2024 22:42:45 -0400 Subject: [PATCH 1474/2290] bpf: Protect against int overflow for stack access size [ Upstream commit ecc6a2101840177e57c925c102d2d29f260d37c8 ] This patch re-introduces protection against the size of access to stack memory being negative; the access size can appear negative as a result of overflowing its signed int representation. This should not actually happen, as there are other protections along the way, but we should protect against it anyway. One code path was missing such protections (fixed in the previous patch in the series), causing out-of-bounds array accesses in check_stack_range_initialized(). This patch causes the verification of a program with such a non-sensical access size to fail. This check used to exist in a more indirect way, but was inadvertendly removed in a833a17aeac7. Fixes: a833a17aeac7 ("bpf: Fix verification of indirect var-off stack access") Reported-by: syzbot+33f4297b5f927648741a@syzkaller.appspotmail.com Reported-by: syzbot+aafd0513053a1cbf52ef@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/CAADnVQLORV5PT0iTAhRER+iLBTkByCYNBYyvBSgjN1T31K+gOw@mail.gmail.com/ Acked-by: Andrii Nakryiko Signed-off-by: Andrei Matei Link: https://lore.kernel.org/r/20240327024245.318299-3-andreimatei1@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1a29ac4db6eae..27cc6e3db5a86 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4965,6 +4965,11 @@ static int check_stack_access_within_bounds( err = check_stack_slot_within_bounds(min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ + if (!err && access_size < 0) + /* access_size should not be negative (or overflow an int); others checks + * along the way should have prevented such an access. + */ + err = -EFAULT; /* invalid negative access size; integer overflow? */ if (err) { if (tnum_is_const(reg->var_off)) { -- GitLab From 852698c9fd5b177202ef6683565460d807f93ca7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Mar 2024 14:13:24 +0000 Subject: [PATCH 1475/2290] cifs: Fix duplicate fscache cookie warnings [ Upstream commit 8876a37277cb832e1861c35f8c661825179f73f5 ] fscache emits a lot of duplicate cookie warnings with cifs because the index key for the fscache cookies does not include everything that the cifs_find_inode() function does. The latter is used with iget5_locked() to distinguish between inodes in the local inode cache. Fix this by adding the creation time and file type to the fscache cookie key. Additionally, add a couple of comments to note that if one is changed the other must be also. Signed-off-by: David Howells Fixes: 70431bfd825d ("cifs: Support fscache indexing rewrite") cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/fscache.c | 16 +++++++++++++++- fs/smb/client/inode.c | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index f64bad513ba6d..6df4ab2a6e5dc 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -12,6 +12,16 @@ #include "cifs_fs_sb.h" #include "cifsproto.h" +/* + * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode(). + */ +struct cifs_fscache_inode_key { + + __le64 uniqueid; /* server inode number */ + __le64 createtime; /* creation time on server */ + u8 type; /* S_IFMT file type */ +} __packed; + static void cifs_fscache_fill_volume_coherency( struct cifs_tcon *tcon, struct cifs_fscache_volume_coherency_data *cd) @@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) void cifs_fscache_get_inode_cookie(struct inode *inode) { struct cifs_fscache_inode_coherency_data cd; + struct cifs_fscache_inode_key key; struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + key.uniqueid = cpu_to_le64(cifsi->uniqueid); + key.createtime = cpu_to_le64(cifsi->createtime); + key.type = (inode->i_mode & S_IFMT) >> 12; cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd); cifsi->netfs.cache = fscache_acquire_cookie(tcon->fscache, 0, - &cifsi->uniqueid, sizeof(cifsi->uniqueid), + &key, sizeof(key), &cd, sizeof(cd), i_size_read(&cifsi->netfs.inode)); if (cifsi->netfs.cache) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 5343898bac8a6..634f28f0d331e 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1274,6 +1274,8 @@ cifs_find_inode(struct inode *inode, void *opaque) { struct cifs_fattr *fattr = opaque; + /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */ + /* don't match inode with different uniqueid */ if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; -- GitLab From 2553bfaa19b4219a8ad0ebbe785bc000cebf8274 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Tue, 26 Mar 2024 12:28:05 +0530 Subject: [PATCH 1476/2290] net: lan743x: Add set RFE read fifo threshold for PCI1x1x chips [ Upstream commit e4a58989f5c839316ac63675e8800b9eed7dbe96 ] PCI11x1x Rev B0 devices might drop packets when receiving back to back frames at 2.5G link speed. Change the B0 Rev device's Receive filtering Engine FIFO threshold parameter from its hardware default of 4 to 3 dwords to prevent the problem. Rev C0 and later hardware already defaults to 3 dwords. Fixes: bb4f6bffe33c ("net: lan743x: Add PCI11010 / PCI11414 device IDs") Signed-off-by: Raju Lakkaraju Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240326065805.686128-1-Raju.Lakkaraju@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan743x_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/microchip/lan743x_main.h | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e804613faa1fc..d5123e8c4a9f4 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -25,6 +25,8 @@ #define PCS_POWER_STATE_DOWN 0x6 #define PCS_POWER_STATE_UP 0x4 +#define RFE_RD_FIFO_TH_3_DWORDS 0x3 + static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) { u32 chip_rev; @@ -3217,6 +3219,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter) lan743x_pci_cleanup(adapter); } +static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter) +{ + u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_; + + if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) { + u32 misc_ctl; + + misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0); + misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_; + misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_, + RFE_RD_FIFO_TH_3_DWORDS); + lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl); + } +} + static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { @@ -3232,6 +3249,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, pci11x1x_strap_get_status(adapter); spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); + pci11x1x_set_rfe_rd_fifo_threshold(adapter); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 67877d3b6dd98..d304be17b9d82 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -26,6 +26,7 @@ #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) #define ID_REV_CHIP_REV_A0_ (0x00000000) #define ID_REV_CHIP_REV_B0_ (0x00000010) +#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0) #define FPGA_REV (0x04) #define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF) @@ -311,6 +312,9 @@ #define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8) #define SGMII_CTL_SGMII_POWER_DN_ BIT(1) +#define MISC_CTL_0 (0x920) +#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4) + /* Vendor Specific SGMII MMD details */ #define SR_VSMMD_PCS_ID1 0x0004 #define SR_VSMMD_PCS_ID2 0x0005 -- GitLab From 06426737653357e65661b6fb039f535c677fa306 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 26 Mar 2024 10:57:20 +0530 Subject: [PATCH 1477/2290] Octeontx2-af: fix pause frame configuration in GMP mode [ Upstream commit 40d4b4807cadd83fb3f46cc8cd67a945b5b25461 ] The Octeontx2 MAC block (CGX) has separate data paths (SMU and GMP) for different speeds, allowing for efficient data transfer. The previous patch which added pause frame configuration has a bug due to which pause frame feature is not working in GMP mode. This patch fixes the issue by configurating appropriate registers. Fixes: f7e086e754fe ("octeontx2-af: Pause frame configuration at cgx") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240326052720.4441-1-hkelam@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index e6fe599f7bf3a..254cad45a555f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -814,6 +814,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0; -- GitLab From 7d0567842b78390dd9b60f00f1d8f838d540e325 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Mar 2024 11:18:41 +0100 Subject: [PATCH 1478/2290] inet: inet_defrag: prevent sk release while still in use [ Upstream commit 18685451fc4e546fc0e718580d32df3c0e5c8272 ] ip_local_out() and other functions can pass skb->sk as function argument. If the skb is a fragment and reassembly happens before such function call returns, the sk must not be released. This affects skb fragments reassembled via netfilter or similar modules, e.g. openvswitch or ct_act.c, when run as part of tx pipeline. Eric Dumazet made an initial analysis of this bug. Quoting Eric: Calling ip_defrag() in output path is also implying skb_orphan(), which is buggy because output path relies on sk not disappearing. A relevant old patch about the issue was : 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") [..] net/ipv4/ip_output.c depends on skb->sk being set, and probably to an inet socket, not an arbitrary one. If we orphan the packet in ipvlan, then downstream things like FQ packet scheduler will not work properly. We need to change ip_defrag() to only use skb_orphan() when really needed, ie whenever frag_list is going to be used. Eric suggested to stash sk in fragment queue and made an initial patch. However there is a problem with this: If skb is refragmented again right after, ip_do_fragment() will copy head->sk to the new fragments, and sets up destructor to sock_wfree. IOW, we have no choice but to fix up sk_wmem accouting to reflect the fully reassembled skb, else wmem will underflow. This change moves the orphan down into the core, to last possible moment. As ip_defrag_offset is aliased with sk_buff->sk member, we must move the offset into the FRAG_CB, else skb->sk gets clobbered. This allows to delay the orphaning long enough to learn if the skb has to be queued or if the skb is completing the reasm queue. In the former case, things work as before, skb is orphaned. This is safe because skb gets queued/stolen and won't continue past reasm engine. In the latter case, we will steal the skb->sk reference, reattach it to the head skb, and fix up wmem accouting when inet_frag inflates truesize. Fixes: 7026b1ddb6b8 ("netfilter: Pass socket pointer down through okfn().") Diagnosed-by: Eric Dumazet Reported-by: xingwei lee Reported-by: yue sun Reported-by: syzbot+e5167d7144a62715044c@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240326101845.30836-1-fw@strlen.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 7 +-- net/ipv4/inet_fragment.c | 70 ++++++++++++++++++++----- net/ipv4/ip_fragment.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 4 files changed, 60 insertions(+), 21 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c30d419ebf545..c4a8520dc748f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -745,8 +745,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -870,10 +868,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9f9ac5013a71..834cdc57755f7 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include #include +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -390,12 +393,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -412,13 +415,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -432,7 +435,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -442,13 +445,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -457,6 +475,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -464,13 +488,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -486,7 +510,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -503,6 +527,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -510,6 +549,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -573,6 +614,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fb153569889ec..6c309c1ec3b0f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -378,6 +378,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -480,7 +481,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); - skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 38db0064d6613..87a394179092c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -293,6 +293,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -468,7 +469,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { -- GitLab From 58638e3b48796fae3c5a6a47167c83c471e8a8dd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 28 Mar 2024 15:30:39 +0100 Subject: [PATCH 1479/2290] dm integrity: fix out-of-range warning [ Upstream commit 8e91c2342351e0f5ef6c0a704384a7f6fc70c3b2 ] Depending on the value of CONFIG_HZ, clang complains about a pointless comparison: drivers/md/dm-integrity.c:4085:12: error: result of comparison of constant 42949672950 with expression of type 'unsigned int' is always false [-Werror,-Wtautological-constant-out-of-range-compare] if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { As the check remains useful for other configurations, shut up the warning by adding a second type cast to uint64_t. Fixes: 468dfca38b1a ("dm integrity: add a bitmap mode") Signed-off-by: Arnd Bergmann Reviewed-by: Mikulas Patocka Reviewed-by: Justin Stitt Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 9c9e2b50c63c3..696365f8f3b5f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4167,7 +4167,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { - if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { r = -EINVAL; ti->error = "Invalid bitmap_flush_interval argument"; goto bad; -- GitLab From 8d029111b809b59a8ba0510330d497481fc28991 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 25 Mar 2024 13:01:44 +0530 Subject: [PATCH 1480/2290] x86/cpufeatures: Add new word for scattered features [ Upstream commit 7f274e609f3d5f45c22b1dd59053f6764458b492 ] Add a new word for scattered features because all free bits among the existing Linux-defined auxiliary flags have been exhausted. Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/8380d2a0da469a1f0ad75b8954a79fb689599ff6.1711091584.git.sandipan.das@amd.com Stable-dep-of: 598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability") Signed-off-by: Sasha Levin --- arch/x86/include/asm/cpufeature.h | 6 ++++-- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f835b328ba24f..578c3020be7b4 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -96,8 +96,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -121,8 +122,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9a157942ae3dd..09cca23f020eb 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 000037078db43..380e963149cc7 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -112,6 +112,7 @@ #define DISABLED_MASK18 0 #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 7ba1726b71c7b..e9187ddd3d1fd 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -99,6 +99,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ -- GitLab From ad141b08d1ce078ad52a00e2eed1589208c54586 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 25 Mar 2024 13:01:45 +0530 Subject: [PATCH 1481/2290] perf/x86/amd/lbr: Use freeze based on availability [ Upstream commit 598c2fafc06fe5c56a1a415fb7b544b31453d637 ] Currently, the LBR code assumes that LBR Freeze is supported on all processors when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX] bit 1 is set. This is incorrect as the availability of the feature is additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set, which may not be set for all Zen 4 processors. Define a new feature bit for LBR and PMC freeze and set the freeze enable bit (FLBRI) in DebugCtl (MSR 0x1d9) conditionally. It should still be possible to use LBR without freeze for profile-guided optimization of user programs by using an user-only branch filter during profiling. When the user-only filter is enabled, branches are no longer recorded after the transition to CPL 0 upon PMI arrival. When branch entries are read in the PMI handler, the branch stack does not change. E.g. $ perf record -j any,u -e ex_ret_brn_tkn ./workload Since the feature bit is visible under flags in /proc/cpuinfo, it can be used to determine the feasibility of use-cases which require LBR Freeze to be supported by the hardware such as profile-guided optimization of kernels. Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support") Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.1711091584.git.sandipan.das@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/core.c | 4 ++-- arch/x86/events/amd/lbr.c | 16 ++++++++++------ arch/x86/include/asm/cpufeatures.h | 8 ++++++++ arch/x86/kernel/cpu/scattered.c | 1 + 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index fd091b9dd7067..3ac069a4559b0 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; - /* Read branch records before unfreezing */ - if (status & GLOBAL_STATUS_LBRS_FROZEN) { + /* Read branch records */ + if (x86_pmu.lbr_nr) { amd_pmu_lbr_read(); status &= ~GLOBAL_STATUS_LBRS_FROZEN; } diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 38a75216c12cf..b8fe74e8e0a60 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -400,10 +400,12 @@ void amd_pmu_lbr_enable_all(void) wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); } - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } @@ -416,10 +418,12 @@ void amd_pmu_lbr_disable_all(void) return; rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } } __init int amd_pmu_lbr_init(void) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 09cca23f020eb..1280daa729757 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -432,6 +432,14 @@ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ + /* * BUG word(s) */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index fc01f81f6e2a3..94e0a42528dcb 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -46,6 +46,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; -- GitLab From 923579201dece7cb9fc30662b4f6e7d7801042a8 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 5 Mar 2024 18:48:39 +0000 Subject: [PATCH 1482/2290] KVM: arm64: Fix host-programmed guest events in nVHE commit e89c928bedd77d181edc2df01cb6672184775140 upstream. Programming PMU events in the host that count during guest execution is a feature supported by perf, e.g. perf stat -e cpu_cycles:G ./lkvm run While this works for VHE, the guest/host event bitmaps are not carried through to the hypervisor in the nVHE configuration. Make kvm_pmu_update_vcpu_events() conditional on whether or not _hardware_ supports PMUv3 rather than if the vCPU as vPMU enabled. Cc: stable@vger.kernel.org Fixes: 84d751a019a9 ("KVM: arm64: Pass pmu events to hyp via vcpu") Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240305184840.636212-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Greg Kroah-Hartman --- include/kvm/arm_pmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 96b192139a23a..6196b71c5eb58 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -85,7 +85,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); */ #define kvm_pmu_update_vcpu_events(vcpu) \ do { \ - if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \ + if (!has_vhe() && kvm_arm_support_pmu_v3()) \ vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) -- GitLab From 3d61f1704bdf2b4335336590baac0e755b06fd71 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 30 Mar 2024 12:49:02 +0100 Subject: [PATCH 1483/2290] r8169: fix issue caused by buggy BIOS on certain boards with RTL8168d commit 5d872c9f46bd2ea3524af3c2420a364a13667135 upstream. On some boards with this chip version the BIOS is buggy and misses to reset the PHY page selector. This results in the PHY ID read accessing registers on a different page, returning a more or less random value. Fix this by resetting the page selector first. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/64f2055e-98b8-45ec-8568-665e3d54d4e6@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 06663c11ca96d..85ed9879af6ca 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5032,6 +5032,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp) struct mii_bus *new_bus; int ret; + /* On some boards with this chip version the BIOS is buggy and misses + * to reset the PHY page selector. This results in the PHY ID read + * accessing registers on a different page, returning a more or + * less random value. Fix this by resetting the page selector first. + */ + if (tp->mac_version == RTL_GIGA_MAC_VER_25 || + tp->mac_version == RTL_GIGA_MAC_VER_26) + r8169_mdio_write(tp, 0x1f, 0); + new_bus = devm_mdiobus_alloc(&pdev->dev); if (!new_bus) return -ENOMEM; -- GitLab From 5d920886c38209a6b321180b55f722f8d6aa1d32 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Apr 2024 17:16:14 -0700 Subject: [PATCH 1484/2290] x86/cpufeatures: Add CPUID_LNX_5 to track recently added Linux-defined word commit 8cb4a9a82b21623dbb4b3051dd30d98356cf95bc upstream. Add CPUID_LNX_5 to track cpufeatures' word 21, and add the appropriate compile-time assert in KVM to prevent direct lookups on the features in CPUID_LNX_5. KVM uses X86_FEATURE_* flags to manage guest CPUID, and so must translate features that are scattered by Linux from the Linux-defined bit to the hardware-defined bit, i.e. should never try to directly access scattered features in guest CPUID. Opportunistically add NR_CPUID_WORDS to enum cpuid_leafs, along with a compile-time assert in KVM's CPUID infrastructure to ensure that future additions update cpuid_leafs along with NCAPINTS. No functional change intended. Fixes: 7f274e609f3d ("x86/cpufeatures: Add new word for scattered features") Cc: Sandipan Das Signed-off-by: Sean Christopherson Acked-by: Dave Hansen Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 2 ++ arch/x86/kvm/reverse_cpuid.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 578c3020be7b4..16051c6f3b13d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 7c8e2b20a13b0..d2f6703a2633e 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -83,10 +83,12 @@ static const struct cpuid_reg reverse_cpuid[] = { */ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) { + BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } -- GitLab From 9bf4acc802966f07243777c3018400cc26cccbe4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 14 Mar 2024 09:44:12 +0100 Subject: [PATCH 1485/2290] Revert "Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT" commit 4790a73ace86f3d165bbedba898e0758e6e1b82d upstream. This reverts commit 7dcd3e014aa7faeeaf4047190b22d8a19a0db696. Qualcomm Bluetooth controllers like WCN6855 do not have persistent storage for the Bluetooth address and must therefore start as unconfigured to allow the user to set a valid address unless one has been provided by the boot firmware in the devicetree. A recent change snuck into v6.8-rc7 and incorrectly started marking the default (non-unique) address as valid. This specifically also breaks the Bluetooth setup for some user of the Lenovo ThinkPad X13s. Note that this is the second time Qualcomm breaks the driver this way and that this was fixed last year by commit 6945795bc81a ("Bluetooth: fix use-bdaddr-property quirk"), which also has some further details. Fixes: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Cc: stable@vger.kernel.org # 6.8 Cc: Janaki Ramaiah Thota Signed-off-by: Johan Hovold Reported-by: Clayton Craft Tested-by: Clayton Craft Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 2acda547f4f3e..63f7f58de49f0 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,7 +7,6 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -1845,17 +1844,7 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - - /* Set BDA quirk bit for reading BDA value from fwnode property - * only if that property exist in DT. - */ - if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); - } else { - bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); - } - + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); -- GitLab From 298dc5dd38d2652cd6b362e3295be2d81f244a19 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 Mar 2024 08:55:52 +0100 Subject: [PATCH 1486/2290] arm64: dts: qcom: sc7180-trogdor: mark bluetooth address as broken commit e12e28009e584c8f8363439f6a928ec86278a106 upstream. Several Qualcomm Bluetooth controllers lack persistent storage for the device address and instead one can be provided by the boot firmware using the 'local-bd-address' devicetree property. The Bluetooth bindings clearly states that the address should be specified in little-endian order, but due to a long-standing bug in the Qualcomm driver which reversed the address some boot firmware has been providing the address in big-endian order instead. The boot firmware in SC7180 Trogdor Chromebooks is known to be affected so mark the 'local-bd-address' property as broken to maintain backwards compatibility with older firmware when fixing the underlying driver bug. Note that ChromeOS always updates the kernel and devicetree in lockstep so that there is no need to handle backwards compatibility with older devicetrees. Fixes: 7ec3e67307f8 ("arm64: dts: qcom: sc7180-trogdor: add initial trogdor and lazor dt") Cc: stable@vger.kernel.org # 5.10 Cc: Rob Clark Reviewed-by: Douglas Anderson Signed-off-by: Johan Hovold Acked-by: Bjorn Andersson Reviewed-by: Bjorn Andersson Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index eae22e6e97c15..f55ce6f2fdc28 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -923,6 +923,8 @@ ap_spi_fp: &spi10 { vddrf-supply = <&pp1300_l2c>; vddch0-supply = <&pp3300_l10c>; max-speed = <3200000>; + + qcom,local-bd-address-broken; }; }; -- GitLab From a2812ff7ea40b53480835cb22dbcf2b40f19954a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 Mar 2024 08:55:54 +0100 Subject: [PATCH 1487/2290] Bluetooth: qca: fix device-address endianness commit 77f45cca8bc55d00520a192f5a7715133591c83e upstream. The WCN6855 firmware on the Lenovo ThinkPad X13s expects the Bluetooth device address in big-endian order when setting it using the EDL_WRITE_BD_ADDR_OPCODE command. Presumably, this is the case for all non-ROME devices which all use the EDL_WRITE_BD_ADDR_OPCODE command for this (unlike the ROME devices which use a different command and expect the address in little-endian order). Reverse the little-endian address before setting it to make sure that the address can be configured using tools like btmgmt or using the 'local-bd-address' devicetree property. Note that this can potentially break systems with boot firmware which has started relying on the broken behaviour and is incorrectly passing the address via devicetree in big-endian order. The only device affected by this should be the WCN3991 used in some Chromebooks. As ChromeOS updates the kernel and devicetree in lockstep, the new 'qcom,local-bd-address-broken' property can be used to determine if the firmware is buggy so that the underlying driver bug can be fixed without breaking backwards compatibility. Set the HCI_QUIRK_BDADDR_PROPERTY_BROKEN quirk for such platforms so that the address is reversed when parsing the address property. Fixes: 5c0a1001c8be ("Bluetooth: hci_qca: Add helper to set device address") Cc: stable@vger.kernel.org # 5.1 Cc: Balakrishna Godavarthi Cc: Matthias Kaehlcke Tested-by: Nikita Travkin # sc7180 Reviewed-by: Douglas Anderson Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 8 ++++++-- drivers/bluetooth/hci_qca.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 0211f704a358b..5277090c6d6d7 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -758,11 +758,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) { + bdaddr_t bdaddr_swapped; struct sk_buff *skb; int err; - skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr, - HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + baswap(&bdaddr_swapped, bdaddr); + + skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, + &bdaddr_swapped, HCI_EV_VENDOR, + HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 63f7f58de49f0..33956ddec9337 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -225,6 +225,7 @@ struct qca_serdev { struct qca_power *bt_power; u32 init_speed; u32 oper_speed; + bool bdaddr_property_broken; const char *firmware_name; }; @@ -1787,6 +1788,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + struct qca_serdev *qcadev; const char *soc_name; ret = qca_check_speeds(hu); @@ -1845,6 +1847,11 @@ retry: case QCA_WCN6855: case QCA_WCN7850: set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + qcadev = serdev_device_get_drvdata(hu->serdev); + if (qcadev->bdaddr_property_broken) + set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); @@ -2212,6 +2219,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); + qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev, + "qcom,local-bd-address-broken"); + if (data) qcadev->btsoc_type = data->soc_type; else -- GitLab From 3e773d04aef9c9e0cdf97cfd23bc82c52305e0a7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 Mar 2024 08:55:53 +0100 Subject: [PATCH 1488/2290] Bluetooth: add quirk for broken address properties commit 39646f29b100566451d37abc4cc8cdd583756dfe upstream. Some Bluetooth controllers lack persistent storage for the device address and instead one can be provided by the boot firmware using the 'local-bd-address' devicetree property. The Bluetooth devicetree bindings clearly states that the address should be specified in little-endian order, but due to a long-standing bug in the Qualcomm driver which reversed the address some boot firmware has been providing the address in big-endian order instead. Add a new quirk that can be set on platforms with broken firmware and use it to reverse the address when parsing the property so that the underlying driver bug can be fixed. Fixes: 5c0a1001c8be ("Bluetooth: hci_qca: Add helper to set device address") Cc: stable@vger.kernel.org # 5.1 Reviewed-by: Douglas Anderson Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci.h | 9 +++++++++ net/bluetooth/hci_sync.c | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c69e09909449f..09bc4bf805c62 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -175,6 +175,15 @@ enum { */ HCI_QUIRK_USE_BDADDR_PROPERTY, + /* When this quirk is set, the Bluetooth Device Address provided by + * the 'local-bd-address' fwnode property is incorrectly specified in + * big-endian order. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BDADDR_PROPERTY_BROKEN, + /* When this quirk is set, the duplicate filtering during * scanning is based on Bluetooth devices addresses. To allow * RSSI based updates, restart scanning if needed. diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 7e64cf880f9f1..e24b211b10ff5 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3293,7 +3293,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; - bacpy(&hdev->public_addr, &ba); + if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) + baswap(&hdev->public_addr, &ba); + else + bacpy(&hdev->public_addr, &ba); } struct hci_init_stage { -- GitLab From 38e3eaa861bde62b92e207b07a9dc595a784191f Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 27 Mar 2024 12:30:30 +0800 Subject: [PATCH 1489/2290] Bluetooth: hci_event: set the conn encrypted before conn establishes commit c569242cd49287d53b73a94233db40097d838535 upstream. We have a BT headset (Lenovo Thinkplus XT99), the pairing and connecting has no problem, once this headset is paired, bluez will remember this device and will auto re-connect it whenever the device is powered on. The auto re-connecting works well with Windows and Android, but with Linux, it always fails. Through debugging, we found at the rfcomm connection stage, the bluetooth stack reports "Connection refused - security block (0x0003)". For this device, the re-connecting negotiation process is different from other BT headsets, it sends the Link_KEY_REQUEST command before the CONNECT_REQUEST completes, and it doesn't send ENCRYPT_CHANGE command during the negotiation. When the device sends the "connect complete" to hci, the ev->encr_mode is 1. So here in the conn_complete_evt(), if ev->encr_mode is 1, link type is ACL and HCI_CONN_ENCRYPT is not set, we set HCI_CONN_ENCRYPT to this conn, and update conn->enc_key_size accordingly. After this change, this BT headset could re-connect with Linux successfully. This is the btmon log after applying the patch, after receiving the "Connect Complete" with "Encryption: Enabled", will send the command to read encryption key size: > HCI Event: Connect Request (0x04) plen 10 Address: 8C:3C:AA:D8:11:67 (OUI 8C-3C-AA) Class: 0x240404 Major class: Audio/Video (headset, speaker, stereo, video, vcr) Minor class: Wearable Headset Device Rendering (Printing, Speaker) Audio (Speaker, Microphone, Headset) Link type: ACL (0x01) ... > HCI Event: Link Key Request (0x17) plen 6 Address: 8C:3C:AA:D8:11:67 (OUI 8C-3C-AA) < HCI Command: Link Key Request Reply (0x01|0x000b) plen 22 Address: 8C:3C:AA:D8:11:67 (OUI 8C-3C-AA) Link key: ${32-hex-digits-key} ... > HCI Event: Connect Complete (0x03) plen 11 Status: Success (0x00) Handle: 256 Address: 8C:3C:AA:D8:11:67 (OUI 8C-3C-AA) Link type: ACL (0x01) Encryption: Enabled (0x01) < HCI Command: Read Encryption Key... (0x05|0x0008) plen 2 Handle: 256 < ACL Data TX: Handle 256 flags 0x00 dlen 10 L2CAP: Information Request (0x0a) ident 1 len 2 Type: Extended features supported (0x0002) > HCI Event: Command Complete (0x0e) plen 7 Read Encryption Key Size (0x05|0x0008) ncmd 1 Status: Success (0x00) Handle: 256 Key size: 16 Cc: stable@vger.kernel.org Link: https://github.com/bluez/bluez/issues/704 Reviewed-by: Paul Menzel Reviewed-by: Luiz Augusto von Dentz Signed-off-by: Hui Wang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_event.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b150dee88f35c..bc14223f66937 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3234,6 +3234,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (test_bit(HCI_ENCRYPT, &hdev->flags)) set_bit(HCI_CONN_ENCRYPT, &conn->flags); + /* "Link key request" completed ahead of "connect request" completes */ + if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && + ev->link_type == ACL_LINK) { + struct link_key *key; + struct hci_cp_read_enc_key_size cp; + + key = hci_find_link_key(hdev, &ev->bdaddr); + if (key) { + set_bit(HCI_CONN_ENCRYPT, &conn->flags); + + if (!(hdev->commands[20] & 0x10)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } else { + cp.handle = cpu_to_le16(conn->handle); + if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, + sizeof(cp), &cp)) { + bt_dev_err(hdev, "sending read key size failed"); + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } + } + + hci_encrypt_cfm(conn, ev->status); + } + } + /* Get remote features */ if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; -- GitLab From 18e189442a5896255e764f8e875c13d16248ef2f Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Wed, 27 Mar 2024 15:24:56 +0100 Subject: [PATCH 1490/2290] Bluetooth: Fix TOCTOU in HCI debugfs implementation commit 7835fcfd132eb88b87e8eb901f88436f63ab60f7 upstream. struct hci_dev members conn_info_max_age, conn_info_min_age, le_conn_max_interval, le_conn_min_interval, le_adv_max_interval, and le_adv_min_interval can be modified from the HCI core code, as well through debugfs. The debugfs implementation, that's only available to privileged users, will check for boundaries, making sure that the minimum value being set is strictly above the maximum value that already exists, and vice-versa. However, as both minimum and maximum values can be changed concurrently to us modifying them, we need to make sure that the value we check is the value we end up using. For example, with ->conn_info_max_age set to 10, conn_info_min_age_set() gets called from vfs handlers to set conn_info_min_age to 8. In conn_info_min_age_set(), this goes through: if (val == 0 || val > hdev->conn_info_max_age) return -EINVAL; Concurrently, conn_info_max_age_set() gets called to set to set the conn_info_max_age to 7: if (val == 0 || val > hdev->conn_info_max_age) return -EINVAL; That check will also pass because we used the old value (10) for conn_info_max_age. After those checks that both passed, the struct hci_dev access is mutex-locked, disabling concurrent access, but that does not matter because the invalid value checks both passed, and we'll end up with conn_info_min_age = 8 and conn_info_max_age = 7 To fix this problem, we need to lock the structure access before so the check and assignment are not interrupted. This fix was originally devised by the BassCheck[1] team, and considered the problem to be an atomicity one. This isn't the case as there aren't any concerns about the variable changing while we check it, but rather after we check it parallel to another change. This patch fixes CVE-2024-24858 and CVE-2024-24857. [1] https://sites.google.com/view/basscheck/ Co-developed-by: Gui-Dong Han <2045gemini@gmail.com> Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Link: https://lore.kernel.org/linux-bluetooth/20231222161317.6255-1-2045gemini@gmail.com/ Link: https://nvd.nist.gov/vuln/detail/CVE-2024-24858 Link: https://lore.kernel.org/linux-bluetooth/20231222162931.6553-1-2045gemini@gmail.com/ Link: https://lore.kernel.org/linux-bluetooth/20231222162310.6461-1-2045gemini@gmail.com/ Link: https://nvd.nist.gov/vuln/detail/CVE-2024-24857 Fixes: 31ad169148df ("Bluetooth: Add conn info lifetime parameters to debugfs") Fixes: 729a1051da6f ("Bluetooth: Expose default LE advertising interval via debugfs") Fixes: 71c3b60ec6d2 ("Bluetooth: Move BR/EDR debugfs file creation into hci_debugfs.c") Signed-off-by: Bastien Nocera Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_debugfs.c | 48 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 6124b3425f351..c9400a7d93d7b 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -217,10 +217,12 @@ static int conn_info_min_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val > hdev->conn_info_max_age) + hci_dev_lock(hdev); + if (val == 0 || val > hdev->conn_info_max_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_min_age = val; hci_dev_unlock(hdev); @@ -245,10 +247,12 @@ static int conn_info_max_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val < hdev->conn_info_min_age) + hci_dev_lock(hdev); + if (val == 0 || val < hdev->conn_info_min_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_max_age = val; hci_dev_unlock(hdev); @@ -566,10 +570,12 @@ static int sniff_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_min_interval = val; hci_dev_unlock(hdev); @@ -594,10 +600,12 @@ static int sniff_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_max_interval = val; hci_dev_unlock(hdev); @@ -849,10 +857,12 @@ static int conn_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_min_interval = val; hci_dev_unlock(hdev); @@ -877,10 +887,12 @@ static int conn_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_max_interval = val; hci_dev_unlock(hdev); @@ -989,10 +1001,12 @@ static int adv_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_min_interval = val; hci_dev_unlock(hdev); @@ -1017,10 +1031,12 @@ static int adv_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_max_interval = val; hci_dev_unlock(hdev); -- GitLab From 7c1250796b6c262b505a46192f4716b8c6a6a8c6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 27 Mar 2024 13:14:56 +0100 Subject: [PATCH 1491/2290] xen-netfront: Add missing skb_mark_for_recycle commit 037965402a010898d34f4e35327d22c0a95cd51f upstream. Notice that skb_mark_for_recycle() is introduced later than fixes tag in commit 6a5bcd84e886 ("page_pool: Allow drivers to hint on SKB recycling"). It is believed that fixes tag were missing a call to page_pool_release_page() between v5.9 to v5.14, after which is should have used skb_mark_for_recycle(). Since v6.6 the call page_pool_release_page() were removed (in commit 535b9c61bdef ("net: page_pool: hide page_pool_release_page()") and remaining callers converted (in commit 6bfef2ec0172 ("Merge branch 'net-page_pool-remove-page_pool_release_page'")). This leak became visible in v6.8 via commit dba1b8a7ab68 ("mm/page_pool: catch page_pool memory leaks"). Cc: stable@vger.kernel.org Fixes: 6c5aa6fc4def ("xen networking: add basic XDP support for xen-netfront") Reported-by: Leonidas Spyropoulos Link: https://bugzilla.kernel.org/show_bug.cgi?id=218654 Reported-by: Arthur Borsboom Signed-off-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/171154167446.2671062.9127105384591237363.stgit@firesoul Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index dc404e05970cd..95b5ab4b964e2 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) return NULL; } skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); + skb_mark_for_recycle(skb); /* Align ip header to a 16 bytes boundary */ skb_reserve(skb, NET_IP_ALIGN); -- GitLab From cbaac2e5488ed54833897264a5ffb2a341a9f196 Mon Sep 17 00:00:00 2001 From: Mahmoud Adam Date: Tue, 26 Mar 2024 16:31:33 +0100 Subject: [PATCH 1492/2290] net/rds: fix possible cp null dereference commit 62fc3357e079a07a22465b9b6ef71bb6ea75ee4b upstream. cp might be null, calling cp->cp_conn would produce null dereference [Simon Horman adds:] Analysis: * cp is a parameter of __rds_rdma_map and is not reassigned. * The following call-sites pass a NULL cp argument to __rds_rdma_map() - rds_get_mr() - rds_get_mr_for_dest * Prior to the code above, the following assumes that cp may be NULL (which is indicative, but could itself be unnecessary) trans_private = rs->rs_transport->get_mr( sg, nents, rs, &mr->r_key, cp ? cp->cp_conn : NULL, args->vec.addr, args->vec.bytes, need_odp ? ODP_ZEROBASED : ODP_NOT_NEEDED); * The code modified by this patch is guarded by IS_ERR(trans_private), where trans_private is assigned as per the previous point in this analysis. The only implementation of get_mr that I could locate is rds_ib_get_mr() which can return an ERR_PTR if the conn (4th) argument is NULL. * ret is set to PTR_ERR(trans_private). rds_ib_get_mr can return ERR_PTR(-ENODEV) if the conn (4th) argument is NULL. Thus ret may be -ENODEV in which case the code in question will execute. Conclusion: * cp may be NULL at the point where this patch adds a check; this patch does seem to address a possible bug Fixes: c055fc00c07b ("net/rds: fix WARNING in rds_conn_connect_if_down") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mahmoud Adam Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240326153132.55580-1-mngyadam@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index a4e3c5de998be..00dbcd4d28e68 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, } ret = PTR_ERR(trans_private); /* Trigger connection so that its ready for the next retry */ - if (ret == -ENODEV) + if (ret == -ENODEV && cp) rds_conn_connect_if_down(cp->cp_conn); goto out; } -- GitLab From fc77240f6316d17fc58a8881927c3732b1d75d51 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Wed, 3 Apr 2024 15:21:58 +0200 Subject: [PATCH 1493/2290] net: usb: ax88179_178a: avoid the interface always configured as random address commit 2e91bb99b9d4f756e92e83c4453f894dda220f09 upstream. After the commit d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets"), reset is not executed from bind operation and mac address is not read from the device registers or the devicetree at that moment. Since the check to configure if the assigned mac address is random or not for the interface, happens after the bind operation from usbnet_probe, the interface keeps configured as random address, although the address is correctly read and set during open operation (the only reset now). In order to keep only one reset for the device and to avoid the interface always configured as random address, after reset, configure correctly the suitable field from the driver, if the mac address is read successfully from the device registers or the devicetree. Take into account if a locally administered address (random) was previously stored. cc: stable@vger.kernel.org # 6.6+ Fixes: d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets") Reported-by: Dave Stevenson Signed-off-by: Jose Ignacio Tornos Martinez Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240403132158.344838-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index d837c18874161..e0e9b4c53cb02 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev) if (is_valid_ether_addr(mac)) { eth_hw_addr_set(dev->net, mac); + if (!is_local_ether_addr(mac)) + dev->net->addr_assign_type = NET_ADDR_PERM; } else { netdev_info(dev->net, "invalid MAC address, using random\n"); eth_hw_addr_random(dev->net); -- GitLab From db388b8e12aa7c3660617cc5e8beb90f71bba206 Mon Sep 17 00:00:00 2001 From: Marco Pinna Date: Fri, 29 Mar 2024 17:12:59 +0100 Subject: [PATCH 1494/2290] vsock/virtio: fix packet delivery to tap device commit b32a09ea7c38849ff925489a6bf5bd8914bc45df upstream. Commit 82dfb540aeb2 ("VSOCK: Add virtio vsock vsockmon hooks") added virtio_transport_deliver_tap_pkt() for handing packets to the vsockmon device. However, in virtio_transport_send_pkt_work(), the function is called before actually sending the packet (i.e. before placing it in the virtqueue with virtqueue_add_sgs() and checking whether it returned successfully). Queuing the packet in the virtqueue can fail even multiple times. However, in virtio_transport_deliver_tap_pkt() we deliver the packet to the monitoring tap interface only the first time we call it. This certainly avoids seeing the same packet replicated multiple times in the monitoring interface, but it can show the packet sent with the wrong timestamp or even before we succeed to queue it in the virtqueue. Move virtio_transport_deliver_tap_pkt() after calling virtqueue_add_sgs() and making sure it returned successfully. Fixes: 82dfb540aeb2 ("VSOCK: Add virtio vsock vsockmon hooks") Cc: stable@vge.kernel.org Signed-off-by: Marco Pinna Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20240329161259.411751-1-marco.pinn95@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/virtio_transport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 16575ea836590..5434c9f11d28d 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -109,7 +109,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) if (!skb) break; - virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); @@ -128,6 +127,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) break; } + virtio_transport_deliver_tap_pkt(skb); + if (reply) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int val; -- GitLab From 1c9e71ca615debed700f665ba434e22dbbfd9cd6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 Mar 2024 11:47:51 +0100 Subject: [PATCH 1495/2290] Revert "x86/mm/ident_map: Use gbpages only where full GB page should be mapped." commit c567f2948f57bdc03ed03403ae0234085f376b7d upstream. This reverts commit d794734c9bbfe22f86686dc2909c25f5ffe1a572. While the original change tries to fix a bug, it also unintentionally broke existing systems, see the regressions reported at: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjoseph.com/ Since d794734c9bbf was also marked for -stable, let's back it out before causing more damage. Note that due to another upstream change the revert was not 100% automatic: 0a845e0f6348 mm/treewide: replace pud_large() with pud_leaf() Signed-off-by: Ingo Molnar Cc: Cc: Russ Anderson Cc: Steve Wahl Cc: Dave Hansen Link: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjoseph.com/ Fixes: d794734c9bbf ("x86/mm/ident_map: Use gbpages only where full GB page should be mapped.") Signed-off-by: Steve Wahl Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/ident_map.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index f50cc210a9818..968d7005f4a72 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - /* if this is already a gbpage, this portion is already mapped */ - if (pud_large(*pud)) - continue; - - /* Is using a gbpage allowed? */ - use_gbpage = info->direct_gbpages; - - /* Don't use gbpage if it maps more than the requested region. */ - /* at the begining: */ - use_gbpage &= ((addr & ~PUD_MASK) == 0); - /* ... or at the end: */ - use_gbpage &= ((next & ~PUD_MASK) == 0); - - /* Never overwrite existing mappings */ - use_gbpage &= !pud_present(*pud); - - if (use_gbpage) { + if (info->direct_gbpages) { pud_t pudval; + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; -- GitLab From 745cf6a843896cdac8766c74379300ed73c78830 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 1 Apr 2024 00:33:02 +0200 Subject: [PATCH 1496/2290] netfilter: nf_tables: reject new basechain after table flag update commit 994209ddf4f430946f6247616b2e33d179243769 upstream. When dormant flag is toggled, hooks are disabled in the commit phase by iterating over current chains in table (existing and new). The following configuration allows for an inconsistent state: add table x add chain x y { type filter hook input priority 0; } add table x { flags dormant; } add chain x w { type filter hook input priority 1; } which triggers the following warning when trying to unregister chain w which is already unregistered. [ 127.322252] WARNING: CPU: 7 PID: 1211 at net/netfilter/core.c:50 1 __nf_unregister_net_hook+0x21a/0x260 [...] [ 127.322519] Call Trace: [ 127.322521] [ 127.322524] ? __warn+0x9f/0x1a0 [ 127.322531] ? __nf_unregister_net_hook+0x21a/0x260 [ 127.322537] ? report_bug+0x1b1/0x1e0 [ 127.322545] ? handle_bug+0x3c/0x70 [ 127.322552] ? exc_invalid_op+0x17/0x40 [ 127.322556] ? asm_exc_invalid_op+0x1a/0x20 [ 127.322563] ? kasan_save_free_info+0x3b/0x60 [ 127.322570] ? __nf_unregister_net_hook+0x6a/0x260 [ 127.322577] ? __nf_unregister_net_hook+0x21a/0x260 [ 127.322583] ? __nf_unregister_net_hook+0x6a/0x260 [ 127.322590] ? __nf_tables_unregister_hook+0x8a/0xe0 [nf_tables] [ 127.322655] nft_table_disable+0x75/0xf0 [nf_tables] [ 127.322717] nf_tables_commit+0x2571/0x2620 [nf_tables] Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2a5d9075a081d..a7f84fe96de8e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2372,6 +2372,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook; + if (table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; -- GitLab From 4e8447a9a3d367b5065a0b7abe101da6e0037b6e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Apr 2024 18:04:36 +0200 Subject: [PATCH 1497/2290] netfilter: nf_tables: flush pending destroy work before exit_net release commit 24cea9677025e0de419989ecb692acd4bb34cac2 upstream. Similar to 2c9f0293280e ("netfilter: nf_tables: flush pending destroy work before netlink notifier") to address a race between exit_net and the destroy workqueue. The trace below shows an element to be released via destroy workqueue while exit_net path (triggered via module removal) has already released the set that is used in such transaction. [ 1360.547789] BUG: KASAN: slab-use-after-free in nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.547861] Read of size 8 at addr ffff888140500cc0 by task kworker/4:1/152465 [ 1360.547870] CPU: 4 PID: 152465 Comm: kworker/4:1 Not tainted 6.8.0+ #359 [ 1360.547882] Workqueue: events nf_tables_trans_destroy_work [nf_tables] [ 1360.547984] Call Trace: [ 1360.547991] [ 1360.547998] dump_stack_lvl+0x53/0x70 [ 1360.548014] print_report+0xc4/0x610 [ 1360.548026] ? __virt_addr_valid+0xba/0x160 [ 1360.548040] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 1360.548054] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548176] kasan_report+0xae/0xe0 [ 1360.548189] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548312] nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548447] ? __pfx_nf_tables_trans_destroy_work+0x10/0x10 [nf_tables] [ 1360.548577] ? _raw_spin_unlock_irq+0x18/0x30 [ 1360.548591] process_one_work+0x2f1/0x670 [ 1360.548610] worker_thread+0x4d3/0x760 [ 1360.548627] ? __pfx_worker_thread+0x10/0x10 [ 1360.548640] kthread+0x16b/0x1b0 [ 1360.548653] ? __pfx_kthread+0x10/0x10 [ 1360.548665] ret_from_fork+0x2f/0x50 [ 1360.548679] ? __pfx_kthread+0x10/0x10 [ 1360.548690] ret_from_fork_asm+0x1a/0x30 [ 1360.548707] [ 1360.548719] Allocated by task 192061: [ 1360.548726] kasan_save_stack+0x20/0x40 [ 1360.548739] kasan_save_track+0x14/0x30 [ 1360.548750] __kasan_kmalloc+0x8f/0xa0 [ 1360.548760] __kmalloc_node+0x1f1/0x450 [ 1360.548771] nf_tables_newset+0x10c7/0x1b50 [nf_tables] [ 1360.548883] nfnetlink_rcv_batch+0xbc4/0xdc0 [nfnetlink] [ 1360.548909] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink] [ 1360.548927] netlink_unicast+0x367/0x4f0 [ 1360.548935] netlink_sendmsg+0x34b/0x610 [ 1360.548944] ____sys_sendmsg+0x4d4/0x510 [ 1360.548953] ___sys_sendmsg+0xc9/0x120 [ 1360.548961] __sys_sendmsg+0xbe/0x140 [ 1360.548971] do_syscall_64+0x55/0x120 [ 1360.548982] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ 1360.548994] Freed by task 192222: [ 1360.548999] kasan_save_stack+0x20/0x40 [ 1360.549009] kasan_save_track+0x14/0x30 [ 1360.549019] kasan_save_free_info+0x3b/0x60 [ 1360.549028] poison_slab_object+0x100/0x180 [ 1360.549036] __kasan_slab_free+0x14/0x30 [ 1360.549042] kfree+0xb6/0x260 [ 1360.549049] __nft_release_table+0x473/0x6a0 [nf_tables] [ 1360.549131] nf_tables_exit_net+0x170/0x240 [nf_tables] [ 1360.549221] ops_exit_list+0x50/0xa0 [ 1360.549229] free_exit_list+0x101/0x140 [ 1360.549236] unregister_pernet_operations+0x107/0x160 [ 1360.549245] unregister_pernet_subsys+0x1c/0x30 [ 1360.549254] nf_tables_module_exit+0x43/0x80 [nf_tables] [ 1360.549345] __do_sys_delete_module+0x253/0x370 [ 1360.549352] do_syscall_64+0x55/0x120 [ 1360.549360] entry_SYSCALL_64_after_hwframe+0x55/0x5d (gdb) list *__nft_release_table+0x473 0x1e033 is in __nft_release_table (net/netfilter/nf_tables_api.c:11354). 11349 list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { 11350 list_del(&flowtable->list); 11351 nft_use_dec(&table->use); 11352 nf_tables_flowtable_destroy(flowtable); 11353 } 11354 list_for_each_entry_safe(set, ns, &table->sets, list) { 11355 list_del(&set->list); 11356 nft_use_dec(&table->use); 11357 if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) 11358 nft_map_deactivate(&ctx, set); (gdb) [ 1360.549372] Last potentially related work creation: [ 1360.549376] kasan_save_stack+0x20/0x40 [ 1360.549384] __kasan_record_aux_stack+0x9b/0xb0 [ 1360.549392] __queue_work+0x3fb/0x780 [ 1360.549399] queue_work_on+0x4f/0x60 [ 1360.549407] nft_rhash_remove+0x33b/0x340 [nf_tables] [ 1360.549516] nf_tables_commit+0x1c6a/0x2620 [nf_tables] [ 1360.549625] nfnetlink_rcv_batch+0x728/0xdc0 [nfnetlink] [ 1360.549647] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink] [ 1360.549671] netlink_unicast+0x367/0x4f0 [ 1360.549680] netlink_sendmsg+0x34b/0x610 [ 1360.549690] ____sys_sendmsg+0x4d4/0x510 [ 1360.549697] ___sys_sendmsg+0xc9/0x120 [ 1360.549706] __sys_sendmsg+0xbe/0x140 [ 1360.549715] do_syscall_64+0x55/0x120 [ 1360.549725] entry_SYSCALL_64_after_hwframe+0x55/0x5d Fixes: 0935d5588400 ("netfilter: nf_tables: asynchronous release") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a7f84fe96de8e..6483186a48f6c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10981,6 +10981,7 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); + nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); -- GitLab From 9b5b7708ec2be21dd7ef8ca0e3abe4ae9f3b083b Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 3 Apr 2024 15:22:04 +0800 Subject: [PATCH 1498/2290] netfilter: nf_tables: Fix potential data-race in __nft_flowtable_type_get() commit 24225011d81b471acc0e1e315b7d9905459a6304 upstream. nft_unregister_flowtable_type() within nf_flow_inet_module_exit() can concurrent with __nft_flowtable_type_get() within nf_tables_newflowtable(). And thhere is not any protection when iterate over nf_tables_flowtables list in __nft_flowtable_type_get(). Therefore, there is pertential data-race of nf_tables_flowtables list entry. Use list_for_each_entry_rcu() to iterate over nf_tables_flowtables list in __nft_flowtable_type_get(), and use rcu_read_lock() in the caller nft_flowtable_type_get() to protect the entire type query process. Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Ziyang Xuan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6483186a48f6c..8d38cd5047692 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7841,11 +7841,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, return err; } +/* call under rcu_read_lock */ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; - list_for_each_entry(type, &nf_tables_flowtables, list) { + list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { if (family == type->family) return type; } @@ -7857,9 +7858,13 @@ nft_flowtable_type_get(struct net *net, u8 family) { const struct nf_flowtable_type *type; + rcu_read_lock(); type = __nft_flowtable_type_get(family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES -- GitLab From 18aae2cb87e5faa9c5bd865260ceadac60d5a6c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Apr 2024 12:20:51 +0000 Subject: [PATCH 1499/2290] netfilter: validate user input for expected length commit 0c83842df40f86e529db6842231154772c20edcc upstream. I got multiple syzbot reports showing old bugs exposed by BPF after commit 20f2505fb436 ("bpf: Try to avoid kzalloc in cgroup/{s,g}etsockopt") setsockopt() @optlen argument should be taken into account before copying data. BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] BUG: KASAN: slab-out-of-bounds in copy_from_sockptr include/linux/sockptr.h:55 [inline] BUG: KASAN: slab-out-of-bounds in do_replace net/ipv4/netfilter/ip_tables.c:1111 [inline] BUG: KASAN: slab-out-of-bounds in do_ipt_set_ctl+0x902/0x3dd0 net/ipv4/netfilter/ip_tables.c:1627 Read of size 96 at addr ffff88802cd73da0 by task syz-executor.4/7238 CPU: 1 PID: 7238 Comm: syz-executor.4 Not tainted 6.9.0-rc2-next-20240403-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 __asan_memcpy+0x29/0x70 mm/kasan/shadow.c:105 copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] copy_from_sockptr include/linux/sockptr.h:55 [inline] do_replace net/ipv4/netfilter/ip_tables.c:1111 [inline] do_ipt_set_ctl+0x902/0x3dd0 net/ipv4/netfilter/ip_tables.c:1627 nf_setsockopt+0x295/0x2c0 net/netfilter/nf_sockopt.c:101 do_sock_setsockopt+0x3af/0x720 net/socket.c:2311 __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x72/0x7a RIP: 0033:0x7fd22067dde9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fd21f9ff0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007fd2207abf80 RCX: 00007fd22067dde9 RDX: 0000000000000040 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007fd2206ca47a R08: 0000000000000001 R09: 0000000000000000 R10: 0000000020000880 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fd2207abf80 R15: 00007ffd2d0170d8 Allocated by task 7238: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:370 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:387 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slub.c:4069 [inline] __kmalloc_noprof+0x200/0x410 mm/slub.c:4082 kmalloc_noprof include/linux/slab.h:664 [inline] __cgroup_bpf_run_filter_setsockopt+0xd47/0x1050 kernel/bpf/cgroup.c:1869 do_sock_setsockopt+0x6b4/0x720 net/socket.c:2293 __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x72/0x7a The buggy address belongs to the object at ffff88802cd73da0 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 0 bytes inside of allocated 1-byte region [ffff88802cd73da0, ffff88802cd73da1) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88802cd73020 pfn:0x2cd73 flags: 0xfff80000000000(node=0|zone=1|lastcpupid=0xfff) page_type: 0xffffefff(slab) raw: 00fff80000000000 ffff888015041280 dead000000000100 dead000000000122 raw: ffff88802cd73020 000000008080007f 00000001ffffefff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 5103, tgid 2119833701 (syz-executor.4), ts 5103, free_ts 70804600828 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1490 prep_new_page mm/page_alloc.c:1498 [inline] get_page_from_freelist+0x2e7e/0x2f40 mm/page_alloc.c:3454 __alloc_pages_noprof+0x256/0x6c0 mm/page_alloc.c:4712 __alloc_pages_node_noprof include/linux/gfp.h:244 [inline] alloc_pages_node_noprof include/linux/gfp.h:271 [inline] alloc_slab_page+0x5f/0x120 mm/slub.c:2249 allocate_slab+0x5a/0x2e0 mm/slub.c:2412 new_slab mm/slub.c:2465 [inline] ___slab_alloc+0xcd1/0x14b0 mm/slub.c:3615 __slab_alloc+0x58/0xa0 mm/slub.c:3705 __slab_alloc_node mm/slub.c:3758 [inline] slab_alloc_node mm/slub.c:3936 [inline] __do_kmalloc_node mm/slub.c:4068 [inline] kmalloc_node_track_caller_noprof+0x286/0x450 mm/slub.c:4089 kstrdup+0x3a/0x80 mm/util.c:62 device_rename+0xb5/0x1b0 drivers/base/core.c:4558 dev_change_name+0x275/0x860 net/core/dev.c:1232 do_setlink+0xa4b/0x41f0 net/core/rtnetlink.c:2864 __rtnl_newlink net/core/rtnetlink.c:3680 [inline] rtnl_newlink+0x180b/0x20a0 net/core/rtnetlink.c:3727 rtnetlink_rcv_msg+0x89b/0x10d0 net/core/rtnetlink.c:6594 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2559 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 page last free pid 5146 tgid 5146 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1110 [inline] free_unref_page+0xd3c/0xec0 mm/page_alloc.c:2617 discard_slab mm/slub.c:2511 [inline] __put_partials+0xeb/0x130 mm/slub.c:2980 put_cpu_partial+0x17c/0x250 mm/slub.c:3055 __slab_free+0x2ea/0x3d0 mm/slub.c:4254 qlink_free mm/kasan/quarantine.c:163 [inline] qlist_free_all+0x9e/0x140 mm/kasan/quarantine.c:179 kasan_quarantine_reduce+0x14f/0x170 mm/kasan/quarantine.c:286 __kasan_slab_alloc+0x23/0x80 mm/kasan/common.c:322 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3888 [inline] slab_alloc_node mm/slub.c:3948 [inline] __do_kmalloc_node mm/slub.c:4068 [inline] __kmalloc_node_noprof+0x1d7/0x450 mm/slub.c:4076 kmalloc_node_noprof include/linux/slab.h:681 [inline] kvmalloc_node_noprof+0x72/0x190 mm/util.c:634 bucket_table_alloc lib/rhashtable.c:186 [inline] rhashtable_rehash_alloc+0x9e/0x290 lib/rhashtable.c:367 rht_deferred_worker+0x4e1/0x2440 lib/rhashtable.c:427 process_one_work kernel/workqueue.c:3218 [inline] process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3299 worker_thread+0x86d/0xd70 kernel/workqueue.c:3380 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 Memory state around the buggy address: ffff88802cd73c80: 07 fc fc fc 05 fc fc fc 05 fc fc fc fa fc fc fc ffff88802cd73d00: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc >ffff88802cd73d80: fa fc fc fc 01 fc fc fc fa fc fc fc fa fc fc fc ^ ffff88802cd73e00: fa fc fc fc fa fc fc fc 05 fc fc fc 07 fc fc fc ffff88802cd73e80: 07 fc fc fc 07 fc fc fc 07 fc fc fc 07 fc fc fc Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Pablo Neira Ayuso Link: https://lore.kernel.org/r/20240404122051.2303764-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 6 ++++++ net/ipv4/netfilter/arp_tables.c | 4 ++++ net/ipv4/netfilter/ip_tables.c | 4 ++++ net/ipv6/netfilter/ip6_tables.c | 4 ++++ 4 files changed, 18 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index aa23479b20b2a..ed62c1026fe93 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) struct ebt_table_info *newinfo; struct ebt_replace tmp; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; @@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg, { struct compat_ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 2407066b0fec1..b150c9929b12e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index da5998011ab9b..1f365e28e316c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1110,6 +1110,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1494,6 +1496,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0ce0ed17c7583..37a2b3301e423 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1127,6 +1127,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1503,6 +1505,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; -- GitLab From 8a57544e9285a16dce8e58f470a1b30f426812c4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 1 Nov 2023 11:49:48 +0100 Subject: [PATCH 1500/2290] vboxsf: Avoid an spurious warning if load_nls_xxx() fails commit de3f64b738af57e2732b91a0774facc675b75b54 upstream. If an load_nls_xxx() function fails a few lines above, the 'sbi->bdi_id' is still 0. So, in the error handling path, we will call ida_simple_remove(..., 0) which is not allocated yet. In order to prevent a spurious "ida_free called for id=0 which is not allocated." message, tweak the error handling path and add a new label. Fixes: 0fd169576648 ("fs: Add VirtualBox guest shared folder (vboxsf) support") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/d09eaaa4e2e08206c58a1a27ca9b3e81dc168773.1698835730.git.christophe.jaillet@wanadoo.fr Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- fs/vboxsf/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index d2f6df69f6110..74952e58cca06 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -151,7 +151,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) if (!sbi->nls) { vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); err = -EINVAL; - goto fail_free; + goto fail_destroy_idr; } } @@ -224,6 +224,7 @@ fail_free: ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); if (sbi->nls) unload_nls(sbi->nls); +fail_destroy_idr: idr_destroy(&sbi->ino_idr); kfree(sbi); return err; -- GitLab From a44770fed86515eedb5a7c00b787f847ebb134a5 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 2 Apr 2024 12:46:21 +0200 Subject: [PATCH 1501/2290] bpf, sockmap: Prevent lock inversion deadlock in map delete elem commit ff91059932401894e6c86341915615c5eb0eca48 upstream. syzkaller started using corpuses where a BPF tracing program deletes elements from a sockmap/sockhash map. Because BPF tracing programs can be invoked from any interrupt context, locks taken during a map_delete_elem operation must be hardirq-safe. Otherwise a deadlock due to lock inversion is possible, as reported by lockdep: CPU0 CPU1 ---- ---- lock(&htab->buckets[i].lock); local_irq_disable(); lock(&host->lock); lock(&htab->buckets[i].lock); lock(&host->lock); Locks in sockmap are hardirq-unsafe by design. We expects elements to be deleted from sockmap/sockhash only in task (normal) context with interrupts enabled, or in softirq context. Detect when map_delete_elem operation is invoked from a context which is _not_ hardirq-unsafe, that is interrupts are disabled, and bail out with an error. Note that map updates are not affected by this issue. BPF verifier does not allow updating sockmap/sockhash from a BPF tracing program today. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: xingwei lee Reported-by: yue sun Reported-by: syzbot+bc922f476bd65abbd466@syzkaller.appspotmail.com Reported-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Tested-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com Acked-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=d4066896495db380182e Closes: https://syzkaller.appspot.com/bug?extid=bc922f476bd65abbd466 Link: https://lore.kernel.org/bpf/20240402104621.1050319-1-jakub@cloudflare.com Signed-off-by: Greg Kroah-Hartman --- net/core/sock_map.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 91140bc0541f3..aa7ff6a464291 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -413,6 +413,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock *sk; int err = 0; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + raw_spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) @@ -926,6 +929,9 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) struct bpf_shtab_elem *elem; int ret = -ENOENT; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); -- GitLab From 55d3fe7b2b7bc354e7cbc1f7b8f98a29ccd5a366 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 3 Apr 2024 13:09:08 +0000 Subject: [PATCH 1502/2290] net/sched: act_skbmod: prevent kernel-infoleak commit d313eb8b77557a6d5855f42d2234bd592c7b50dd upstream. syzbot found that tcf_skbmod_dump() was copying four bytes from kernel stack to user space [1]. The issue here is that 'struct tc_skbmod' has a four bytes hole. We need to clear the structure before filling fields. [1] BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in copy_to_user_iter lib/iov_iter.c:24 [inline] BUG: KMSAN: kernel-infoleak in iterate_ubuf include/linux/iov_iter.h:29 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance include/linux/iov_iter.h:271 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x366/0x2520 lib/iov_iter.c:185 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copy_to_user_iter lib/iov_iter.c:24 [inline] iterate_ubuf include/linux/iov_iter.h:29 [inline] iterate_and_advance2 include/linux/iov_iter.h:245 [inline] iterate_and_advance include/linux/iov_iter.h:271 [inline] _copy_to_iter+0x366/0x2520 lib/iov_iter.c:185 copy_to_iter include/linux/uio.h:196 [inline] simple_copy_to_iter net/core/datagram.c:532 [inline] __skb_datagram_iter+0x185/0x1000 net/core/datagram.c:420 skb_copy_datagram_iter+0x5c/0x200 net/core/datagram.c:546 skb_copy_datagram_msg include/linux/skbuff.h:4050 [inline] netlink_recvmsg+0x432/0x1610 net/netlink/af_netlink.c:1962 sock_recvmsg_nosec net/socket.c:1046 [inline] sock_recvmsg+0x2c4/0x340 net/socket.c:1068 __sys_recvfrom+0x35a/0x5f0 net/socket.c:2242 __do_sys_recvfrom net/socket.c:2260 [inline] __se_sys_recvfrom net/socket.c:2256 [inline] __x64_sys_recvfrom+0x126/0x1d0 net/socket.c:2256 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was stored to memory at: pskb_expand_head+0x30f/0x19d0 net/core/skbuff.c:2253 netlink_trim+0x2c2/0x330 net/netlink/af_netlink.c:1317 netlink_unicast+0x9f/0x1260 net/netlink/af_netlink.c:1351 nlmsg_unicast include/net/netlink.h:1144 [inline] nlmsg_notify+0x21d/0x2f0 net/netlink/af_netlink.c:2610 rtnetlink_send+0x73/0x90 net/core/rtnetlink.c:741 rtnetlink_maybe_send include/linux/rtnetlink.h:17 [inline] tcf_add_notify net/sched/act_api.c:2048 [inline] tcf_action_add net/sched/act_api.c:2071 [inline] tc_ctl_action+0x146e/0x19d0 net/sched/act_api.c:2119 rtnetlink_rcv_msg+0x1737/0x1900 net/core/rtnetlink.c:6595 netlink_rcv_skb+0x375/0x650 net/netlink/af_netlink.c:2559 rtnetlink_rcv+0x34/0x40 net/core/rtnetlink.c:6613 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0xf4c/0x1260 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x10df/0x11f0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 ____sys_sendmsg+0x877/0xb60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x4a0 net/socket.c:2674 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was stored to memory at: __nla_put lib/nlattr.c:1041 [inline] nla_put+0x1c6/0x230 lib/nlattr.c:1099 tcf_skbmod_dump+0x23f/0xc20 net/sched/act_skbmod.c:256 tcf_action_dump_old net/sched/act_api.c:1191 [inline] tcf_action_dump_1+0x85e/0x970 net/sched/act_api.c:1227 tcf_action_dump+0x1fd/0x460 net/sched/act_api.c:1251 tca_get_fill+0x519/0x7a0 net/sched/act_api.c:1628 tcf_add_notify_msg net/sched/act_api.c:2023 [inline] tcf_add_notify net/sched/act_api.c:2042 [inline] tcf_action_add net/sched/act_api.c:2071 [inline] tc_ctl_action+0x1365/0x19d0 net/sched/act_api.c:2119 rtnetlink_rcv_msg+0x1737/0x1900 net/core/rtnetlink.c:6595 netlink_rcv_skb+0x375/0x650 net/netlink/af_netlink.c:2559 rtnetlink_rcv+0x34/0x40 net/core/rtnetlink.c:6613 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0xf4c/0x1260 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x10df/0x11f0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 ____sys_sendmsg+0x877/0xb60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x4a0 net/socket.c:2674 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Local variable opt created at: tcf_skbmod_dump+0x9d/0xc20 net/sched/act_skbmod.c:244 tcf_action_dump_old net/sched/act_api.c:1191 [inline] tcf_action_dump_1+0x85e/0x970 net/sched/act_api.c:1227 Bytes 188-191 of 248 are uninitialized Memory access of size 248 starts at ffff888117697680 Data copied to user address 00007ffe56d855f0 Fixes: 86da71b57383 ("net_sched: Introduce skbmod action") Signed-off-by: Eric Dumazet Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240403130908.93421-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/act_skbmod.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d98758a639340..744ff9729469e 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -239,13 +239,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_skbmod *d = to_skbmod(a); unsigned char *b = skb_tail_pointer(skb); struct tcf_skbmod_params *p; - struct tc_skbmod opt = { - .index = d->tcf_index, - .refcnt = refcount_read(&d->tcf_refcnt) - ref, - .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, - }; + struct tc_skbmod opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + opt.index = d->tcf_index; + opt.refcnt = refcount_read(&d->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; p = rcu_dereference_protected(d->skbmod_p, -- GitLab From b7d1ce2cc7192e8a037faa3f5d3ba72c25976460 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Apr 2024 13:41:33 +0000 Subject: [PATCH 1503/2290] net/sched: fix lockdep splat in qdisc_tree_reduce_backlog() commit 7eb322360b0266481e560d1807ee79e0cef5742b upstream. qdisc_tree_reduce_backlog() is called with the qdisc lock held, not RTNL. We must use qdisc_lookup_rcu() instead of qdisc_lookup() syzbot reported: WARNING: suspicious RCU usage 6.1.74-syzkaller #0 Not tainted ----------------------------- net/sched/sch_api.c:305 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by udevd/1142: #0: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:306 [inline] #0: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:747 [inline] #0: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: net_tx_action+0x64a/0x970 net/core/dev.c:5282 #1: ffff888171861108 (&sch->q.lock){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:350 [inline] #1: ffff888171861108 (&sch->q.lock){+.-.}-{2:2}, at: net_tx_action+0x754/0x970 net/core/dev.c:5297 #2: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:306 [inline] #2: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:747 [inline] #2: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: qdisc_tree_reduce_backlog+0x84/0x580 net/sched/sch_api.c:792 stack backtrace: CPU: 1 PID: 1142 Comm: udevd Not tainted 6.1.74-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: [] __dump_stack lib/dump_stack.c:88 [inline] [] dump_stack_lvl+0x1b1/0x28f lib/dump_stack.c:106 [] dump_stack+0x15/0x1e lib/dump_stack.c:113 [] lockdep_rcu_suspicious+0x1b9/0x260 kernel/locking/lockdep.c:6592 [] qdisc_lookup+0xac/0x6f0 net/sched/sch_api.c:305 [] qdisc_tree_reduce_backlog+0x243/0x580 net/sched/sch_api.c:811 [] pfifo_tail_enqueue+0x32c/0x4b0 net/sched/sch_fifo.c:51 [] qdisc_enqueue include/net/sch_generic.h:833 [inline] [] netem_dequeue+0xeb3/0x15d0 net/sched/sch_netem.c:723 [] dequeue_skb net/sched/sch_generic.c:292 [inline] [] qdisc_restart net/sched/sch_generic.c:397 [inline] [] __qdisc_run+0x249/0x1e60 net/sched/sch_generic.c:415 [] qdisc_run+0xd6/0x260 include/net/pkt_sched.h:125 [] net_tx_action+0x7c9/0x970 net/core/dev.c:5313 [] __do_softirq+0x2bd/0x9bd kernel/softirq.c:616 [] invoke_softirq kernel/softirq.c:447 [inline] [] __irq_exit_rcu+0xca/0x230 kernel/softirq.c:700 [] irq_exit_rcu+0x9/0x20 kernel/softirq.c:712 [] sysvec_apic_timer_interrupt+0x42/0x90 arch/x86/kernel/apic/apic.c:1107 [] asm_sysvec_apic_timer_interrupt+0x1b/0x20 arch/x86/include/asm/idtentry.h:656 Fixes: d636fc5dd692 ("net: sched: add rcu annotations around qdisc->qdisc_sleeping") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240402134133.2352776-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e8f988e1c7e64..334a563e0bc14 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -806,7 +806,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) notify = !sch->q.qlen && !WARN_ON_ONCE(!n && !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ - sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); + sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON_ONCE(parentid != TC_H_ROOT); break; -- GitLab From e01835f3a1bd640e0f4b9e3b62fcfa7889c5d85a Mon Sep 17 00:00:00 2001 From: Piotr Wejman Date: Mon, 1 Apr 2024 21:22:39 +0200 Subject: [PATCH 1504/2290] net: stmmac: fix rx queue priority assignment commit b3da86d432b7cd65b025a11f68613e333d2483db upstream. The driver should ensure that same priority is not mapped to multiple rx queues. From DesignWare Cores Ethernet Quality-of-Service Databook, section 17.1.29 MAC_RxQ_Ctrl2: "[...]The software must ensure that the content of this field is mutually exclusive to the PSRQ fields for other queues, that is, the same priority is not mapped to multiple Rx queues[...]" Previously rx_queue_priority() function was: - clearing all priorities from a queue - adding new priorities to that queue After this patch it will: - first assign new priorities to a queue - then remove those priorities from all other queues - keep other priorities previously assigned to that queue Fixes: a8f5102af2a7 ("net: stmmac: TX and RX queue priority configuration") Fixes: 2142754f8b9c ("net: stmmac: Add MAC related callbacks for XGMAC2") Signed-off-by: Piotr Wejman Link: https://lore.kernel.org/r/20240401192239.33942-1-piotrwejman90@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 40 ++++++++++++++----- .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 38 ++++++++++++++---- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 84276eb681d70..39112d5cb5b80 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -87,19 +87,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 base_register; - u32 value; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3; - if (queue >= 4) - queue -= 4; + ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3); - value = readl(ioaddr + base_register); + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) & + GMAC_RXQCTRL_PSRQX_MASK(i)); - value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue); - value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & GMAC_RXQCTRL_PSRQX_MASK(queue); - writel(value, ioaddr + base_register); + + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + } else { + queue -= 4; + + ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + GMAC_RXQCTRL_PSRQX_MASK(queue); + + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + } } static void dwmac4_tx_queue_priority(struct mac_device_info *hw, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index ec1616ffbfa7a..dd73f38ec08d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -97,17 +97,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 value, reg; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3; - if (queue >= 4) + ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3); + + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) & + XGMAC_PSRQ(i)); + + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); + + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + } else { queue -= 4; - value = readl(ioaddr + reg); - value &= ~XGMAC_PSRQ(queue); - value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue); + ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); - writel(value, ioaddr + reg); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + } } static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio, -- GitLab From 77f5e52d7beedd92ca9264eaa0f2f802dde347a3 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 2 Apr 2024 09:16:34 +0200 Subject: [PATCH 1505/2290] net: phy: micrel: lan8814: Fix when enabling/disabling 1-step timestamping commit de99e1ea3a35f23ff83a31d6b08f43d27b2c6345 upstream. There are 2 issues with the blamed commit. 1. When the phy is initialized, it would enable the disabled of UDPv4 checksums. The UDPv6 checksum is already enabled by default. So when 1-step is configured then it would clear these flags. 2. After the 1-step is configured, then if 2-step is configured then the 1-step would be still configured because it is not clearing the flag. So the sync frames will still have origin timestamps set. Fix this by reading first the value of the register and then just change bit 12 as this one determines if the timestamp needs to be inserted in the frame, without changing any other bits. Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Reviewed-by: Divya Koppera Link: https://lore.kernel.org/r/20240402071634.2483524-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 9481f172830f5..e67bffd6f4238 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2188,6 +2188,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) struct hwtstamp_config config; int txcfg = 0, rxcfg = 0; int pkt_ts_enable; + int tx_mod; if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; @@ -2237,9 +2238,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable); lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable); - if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) + tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD); + if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) { lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, - PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) { + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, + tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + } if (config.rx_filter != HWTSTAMP_FILTER_NONE) lan8814_config_ts_intr(ptp_priv->phydev, true); -- GitLab From 10608161696c2768f53426642f78a42bcaaa53e8 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Fri, 29 Mar 2024 09:16:31 +0300 Subject: [PATCH 1506/2290] net: phy: micrel: Fix potential null pointer dereference commit 96c155943a703f0655c0c4cab540f67055960e91 upstream. In lan8814_get_sig_rx() and lan8814_get_sig_tx() ptp_parse_header() may return NULL as ptp_header due to abnormal packet type or corrupted packet. Fix this bug by adding ptp_header check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Aleksandr Mishin Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240329061631.33199-1-amishin@t-argos.ru Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e67bffd6f4238..2cbb1d1830bbd 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2303,7 +2303,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts, } } -static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2313,7 +2313,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) ptp_header = ptp_parse_header(skb, type); skb_pull_inline(skb, ETH_HLEN); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static bool lan8814_match_rx_ts(struct kszphy_ptp_priv *ptp_priv, @@ -2325,7 +2329,8 @@ static bool lan8814_match_rx_ts(struct kszphy_ptp_priv *ptp_priv, bool ret = false; u16 skb_sig; - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + return ret; /* Iterate over all RX timestamps and match it with the received skbs */ spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags); @@ -2605,7 +2610,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) return 0; } -static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2613,7 +2618,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) type = ptp_classify_raw(skb); ptp_header = ptp_parse_header(skb, type); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static void lan8814_dequeue_tx_skb(struct kszphy_ptp_priv *ptp_priv) @@ -2631,7 +2640,8 @@ static void lan8814_dequeue_tx_skb(struct kszphy_ptp_priv *ptp_priv) spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) { - lan8814_get_sig_tx(skb, &skb_sig); + if (!lan8814_get_sig_tx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &seq_id, sizeof(seq_id))) continue; @@ -2675,7 +2685,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv, spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) { - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id))) continue; -- GitLab From 66cb6659008b7b723ee558ff6df9e5459360b28d Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:34:02 +0100 Subject: [PATCH 1507/2290] selftests: net: gro fwd: update vxlan GRO test expectations commit 0fb101be97ca27850c5ecdbd1269423ce4d1f607 upstream. UDP tunnel packets can't be GRO in-between their endpoints as this causes different issues. The UDP GRO fwd vxlan tests were relying on this and their expectations have to be fixed. We keep both vxlan tests and expected no GRO from happening. The vxlan UDP GRO bench test was removed as it's not providing any valuable information now. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/udpgro_fwd.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 9690a5d7ffd7d..9e9b4644e0ea3 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -239,7 +239,7 @@ for family in 4 6; do create_vxlan_pair ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on - run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 cleanup # use NAT to circumvent GRO FWD check @@ -252,13 +252,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null - run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST - cleanup - - create_vxlan_pair - run_bench "UDP tunnel fwd perf" $OL_NET$DST - ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on - run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup done -- GitLab From 2eeab8c47c3c0276e0746bc382f405c9a236a5ad Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:33:59 +0100 Subject: [PATCH 1508/2290] gro: fix ownership transfer commit ed4cccef64c1d0d5b91e69f7a8a6697c3a865486 upstream. If packets are GROed with fraglist they might be segmented later on and continue their journey in the stack. In skb_segment_list those skbs can be reused as-is. This is an issue as their destructor was removed in skb_gro_receive_list but not the reference to their socket, and then they can't be orphaned. Fix this by also removing the reference to the socket. For example this could be observed, kernel BUG at include/linux/skbuff.h:3131! (skb_orphan) RIP: 0010:ip6_rcv_core+0x11bc/0x19a0 Call Trace: ipv6_list_rcv+0x250/0x3f0 __netif_receive_skb_list_core+0x49d/0x8f0 netif_receive_skb_list_internal+0x634/0xd40 napi_complete_done+0x1d2/0x7d0 gro_cell_poll+0x118/0x1f0 A similar construction is found in skb_gro_receive, apply the same change there. Fixes: 5e10da5385d2 ("skbuff: allow 'slow_gro' for skb carring sock reference") Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/gro.c | 3 ++- net/ipv4/udp_offload.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/gro.c b/net/core/gro.c index 352f966cb1dac..47118e97ecfdd 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -252,8 +252,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) } merge: - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8096576fd9bde..b9e638f76753c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -441,8 +441,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) NAPI_GRO_CB(p)->count++; p->data_len += skb->len; - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; p->truesize += skb->truesize; p->len += skb->len; -- GitLab From 23178ec5abbdecfdd67eca17f3494f7c6ca0d925 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 28 Mar 2024 13:59:05 +0100 Subject: [PATCH 1509/2290] x86/bugs: Fix the SRSO mitigation on Zen3/4 commit 4535e1a4174c4111d92c5a9a21e542d232e0fcaa upstream. The original version of the mitigation would patch in the calls to the untraining routines directly. That is, the alternative() in UNTRAIN_RET will patch in the CALL to srso_alias_untrain_ret() directly. However, even if commit e7c25c441e9e ("x86/cpu: Cleanup the untrain mess") meant well in trying to clean up the situation, due to micro- architectural reasons, the untraining routine srso_alias_untrain_ret() must be the target of a CALL instruction and not of a JMP instruction as it is done now. Reshuffle the alternative macros to accomplish that. Fixes: e7c25c441e9e ("x86/cpu: Cleanup the untrain mess") Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ingo Molnar Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 1 + arch/x86/include/asm/nospec-branch.h | 20 ++++++++++++++------ arch/x86/lib/retpoline.S | 4 +--- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 8f80de627c60a..5cdccea455544 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8f6f17a8617b6..47e4e06a47d76 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -167,11 +167,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -188,9 +197,8 @@ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ defined(CONFIG_CPU_SRSO) ANNOTATE_UNRET_END - ALTERNATIVE_2 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + CALL_UNTRAIN_RET + ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 65c5c44f006bc..24c850e1e239f 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -252,9 +252,7 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ - "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ - "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS + ALTERNATIVE "jmp retbleed_untrain_ret", "jmp srso_untrain_ret", X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) -- GitLab From ed37bdaee64b51faaee55d2dfce5b702ace7bab0 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 2 Apr 2024 16:05:49 +0200 Subject: [PATCH 1510/2290] x86/retpoline: Do the necessary fixup to the Zen3/4 srso return thunk for !SRSO commit 0e110732473e14d6520e49d75d2c88ef7d46fe67 upstream. The srso_alias_untrain_ret() dummy thunk in the !CONFIG_MITIGATION_SRSO case is there only for the altenative in CALL_UNTRAIN_RET to have a symbol to resolve. However, testing with kernels which don't have CONFIG_MITIGATION_SRSO enabled, leads to the warning in patch_return() to fire: missing return thunk: srso_alias_untrain_ret+0x0/0x10-0x0: eb 0e 66 66 2e WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:826 apply_returns (arch/x86/kernel/alternative.c:826 Put in a plain "ret" there so that gcc doesn't put a return thunk in in its place which special and gets checked. In addition: ERROR: modpost: "srso_alias_untrain_ret" [arch/x86/kvm/kvm-amd.ko] undefined! make[2]: *** [scripts/Makefile.modpost:145: Module.symvers] Chyba 1 make[1]: *** [/usr/src/linux-6.8.3/Makefile:1873: modpost] Chyba 2 make: *** [Makefile:240: __sub-make] Chyba 2 since !SRSO builds would use the dummy return thunk as reported by petr.pisar@atlas.cz, https://bugzilla.kernel.org/show_bug.cgi?id=218679. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202404020901.da75a60f-oliver.sang@intel.com Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/all/202404020901.da75a60f-oliver.sang@intel.com/ Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 24c850e1e239f..a96e816e5ccd7 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -110,6 +110,7 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ret int3 SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) #endif SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) -- GitLab From 1829b618ccc42c6740f0b91e9251d7dddc2b4108 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 29 Mar 2024 11:06:37 -0700 Subject: [PATCH 1511/2290] i40e: Fix VF MAC filter removal commit ea2a1cfc3b2019bdea6324acd3c03606b60d71ad upstream. Commit 73d9629e1c8c ("i40e: Do not allow untrusted VF to remove administratively set MAC") fixed an issue where untrusted VF was allowed to remove its own MAC address although this was assigned administratively from PF. Unfortunately the introduced check is wrong because it causes that MAC filters for other MAC addresses including multi-cast ones are not removed. if (ether_addr_equal(addr, vf->default_lan_addr.addr) && i40e_can_vf_change_mac(vf)) was_unimac_deleted = true; else continue; if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ... The else path with `continue` effectively skips any MAC filter removal except one for primary MAC addr when VF is allowed to do so. Fix the check condition so the `continue` is only done for primary MAC address. Fixes: 73d9629e1c8c ("i40e: Do not allow untrusted VF to remove administratively set MAC") Signed-off-by: Ivan Vecera Reviewed-by: Michal Schmidt Reviewed-by: Brett Creeley Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240329180638.211412-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index ed4be80fec2a5..99cab36fb1075 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3078,11 +3078,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) /* Allow to delete VF primary MAC only if it was not set * administratively by PF or if VF is trusted. */ - if (ether_addr_equal(addr, vf->default_lan_addr.addr) && - i40e_can_vf_change_mac(vf)) - was_unimac_deleted = true; - else - continue; + if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { + if (i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + } if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = I40E_ERR_INVALID_MAC_ADDR; -- GitLab From 1db7fcb2b290c47c202b79528824f119fa28937d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Mar 2024 11:22:48 +0000 Subject: [PATCH 1512/2290] erspan: make sure erspan_base_hdr is present in skb->head commit 17af420545a750f763025149fa7b833a4fc8b8f0 upstream. syzbot reported a problem in ip6erspan_rcv() [1] Issue is that ip6erspan_rcv() (and erspan_rcv()) no longer make sure erspan_base_hdr is present in skb linear part (skb->head) before getting @ver field from it. Add the missing pskb_may_pull() calls. v2: Reload iph pointer in erspan_rcv() after pskb_may_pull() because skb->head might have changed. [1] BUG: KMSAN: uninit-value in pskb_may_pull_reason include/linux/skbuff.h:2742 [inline] BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2756 [inline] BUG: KMSAN: uninit-value in ip6erspan_rcv net/ipv6/ip6_gre.c:541 [inline] BUG: KMSAN: uninit-value in gre_rcv+0x11f8/0x1930 net/ipv6/ip6_gre.c:610 pskb_may_pull_reason include/linux/skbuff.h:2742 [inline] pskb_may_pull include/linux/skbuff.h:2756 [inline] ip6erspan_rcv net/ipv6/ip6_gre.c:541 [inline] gre_rcv+0x11f8/0x1930 net/ipv6/ip6_gre.c:610 ip6_protocol_deliver_rcu+0x1d4c/0x2ca0 net/ipv6/ip6_input.c:438 ip6_input_finish net/ipv6/ip6_input.c:483 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492 ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586 dst_input include/net/dst.h:460 [inline] ip6_rcv_finish+0x955/0x970 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:314 [inline] ipv6_rcv+0xde/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5538 [inline] __netif_receive_skb+0x1da/0xa00 net/core/dev.c:5652 netif_receive_skb_internal net/core/dev.c:5738 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5798 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1549 tun_get_user+0x5566/0x69e0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2108 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb63/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xe0 fs/read_write.c:652 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was created at: slab_post_alloc_hook mm/slub.c:3804 [inline] slab_alloc_node mm/slub.c:3845 [inline] kmem_cache_alloc_node+0x613/0xc50 mm/slub.c:3888 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:577 __alloc_skb+0x35b/0x7a0 net/core/skbuff.c:668 alloc_skb include/linux/skbuff.h:1318 [inline] alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6504 sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2795 tun_alloc_skb drivers/net/tun.c:1525 [inline] tun_get_user+0x209a/0x69e0 drivers/net/tun.c:1846 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2108 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb63/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xe0 fs/read_write.c:652 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 CPU: 1 PID: 5045 Comm: syz-executor114 Not tainted 6.9.0-rc1-syzkaller-00021-g962490525cff #0 Fixes: cb73ee40b1b3 ("net: ip_gre: use erspan key field for tunnel lookup") Reported-by: syzbot+1c1cf138518bf0c53d68@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/000000000000772f2c0614b66ef7@google.com/ Signed-off-by: Eric Dumazet Cc: Lorenzo Bianconi Link: https://lore.kernel.org/r/20240328112248.1101491-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 5 +++++ net/ipv6/ip6_gre.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d67d026d7f975..0267fa05374aa 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, tpi->flags | TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); } else { + if (unlikely(!pskb_may_pull(skb, + gre_hdr_len + sizeof(*ershdr)))) + return PACKET_REJECT; + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d3fba7d8dec4e..b3e2d658af809 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, struct ip6_tnl *tunnel; u8 ver; + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; ver = ershdr->ver; -- GitLab From 2febb7eeb493dd31ee05dfd307a4f0e61ac183ca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 29 Mar 2024 09:05:59 -0700 Subject: [PATCH 1513/2290] selftests: reuseaddr_conflict: add missing new line at the end of the output commit 31974122cfdeaf56abc18d8ab740d580d9833e90 upstream. The netdev CI runs in a VM and captures serial, so stdout and stderr get combined. Because there's a missing new line in stderr the test ends up corrupting KTAP: # Successok 1 selftests: net: reuseaddr_conflict which should have been: # Success ok 1 selftests: net: reuseaddr_conflict Fixes: 422d8dc6fd3a ("selftest: add a reuseaddr test") Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240329160559.249476-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/reuseaddr_conflict.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c index 7c5b12664b03b..bfb07dc495186 100644 --- a/tools/testing/selftests/net/reuseaddr_conflict.c +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -109,6 +109,6 @@ int main(void) fd1 = open_port(0, 1); if (fd1 >= 0) error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); - fprintf(stderr, "Success"); + fprintf(stderr, "Success\n"); return 0; } -- GitLab From 167d4b47a9bdcb01541dfa29e9f3cbb8edd3dfd2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 1 Apr 2024 14:10:04 -0700 Subject: [PATCH 1514/2290] ipv6: Fix infinite recursion in fib6_dump_done(). commit d21d40605bca7bd5fc23ef03d4c1ca1f48bc2cae upstream. syzkaller reported infinite recursive calls of fib6_dump_done() during netlink socket destruction. [1] From the log, syzkaller sent an AF_UNSPEC RTM_GETROUTE message, and then the response was generated. The following recvmmsg() resumed the dump for IPv6, but the first call of inet6_dump_fib() failed at kzalloc() due to the fault injection. [0] 12:01:34 executing program 3: r0 = socket$nl_route(0x10, 0x3, 0x0) sendmsg$nl_route(r0, ... snip ...) recvmmsg(r0, ... snip ...) (fail_nth: 8) Here, fib6_dump_done() was set to nlk_sk(sk)->cb.done, and the next call of inet6_dump_fib() set it to nlk_sk(sk)->cb.args[3]. syzkaller stopped receiving the response halfway through, and finally netlink_sock_destruct() called nlk_sk(sk)->cb.done(). fib6_dump_done() calls fib6_dump_end() and nlk_sk(sk)->cb.done() if it is still not NULL. fib6_dump_end() rewrites nlk_sk(sk)->cb.done() by nlk_sk(sk)->cb.args[3], but it has the same function, not NULL, calling itself recursively and hitting the stack guard page. To avoid the issue, let's set the destructor after kzalloc(). [0]: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 1 PID: 432110 Comm: syz-executor.3 Not tainted 6.8.0-12821-g537c2e91d354-dirty #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:117) should_fail_ex (lib/fault-inject.c:52 lib/fault-inject.c:153) should_failslab (mm/slub.c:3733) kmalloc_trace (mm/slub.c:3748 mm/slub.c:3827 mm/slub.c:3992) inet6_dump_fib (./include/linux/slab.h:628 ./include/linux/slab.h:749 net/ipv6/ip6_fib.c:662) rtnl_dump_all (net/core/rtnetlink.c:4029) netlink_dump (net/netlink/af_netlink.c:2269) netlink_recvmsg (net/netlink/af_netlink.c:1988) ____sys_recvmsg (net/socket.c:1046 net/socket.c:2801) ___sys_recvmsg (net/socket.c:2846) do_recvmmsg (net/socket.c:2943) __x64_sys_recvmmsg (net/socket.c:3041 net/socket.c:3034 net/socket.c:3034) [1]: BUG: TASK stack guard page was hit at 00000000f2fa9af1 (stack is 00000000b7912430..000000009a436beb) stack guard page: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 223719 Comm: kworker/1:3 Not tainted 6.8.0-12821-g537c2e91d354-dirty #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events netlink_sock_destruct_work RIP: 0010:fib6_dump_done (net/ipv6/ip6_fib.c:570) Code: 3c 24 e8 f3 e9 51 fd e9 28 fd ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 41 57 41 56 41 55 41 54 55 48 89 fd <53> 48 8d 5d 60 e8 b6 4d 07 fd 48 89 da 48 b8 00 00 00 00 00 fc ff RSP: 0018:ffffc9000d980000 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffffffff84405990 RCX: ffffffff844059d3 RDX: ffff8881028e0000 RSI: ffffffff84405ac2 RDI: ffff88810c02f358 RBP: ffff88810c02f358 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000224 R12: 0000000000000000 R13: ffff888007c82c78 R14: ffff888007c82c68 R15: ffff888007c82c68 FS: 0000000000000000(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffc9000d97fff8 CR3: 0000000102309002 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: <#DF> fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1)) fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1)) ... fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1)) fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1)) netlink_sock_destruct (net/netlink/af_netlink.c:401) __sk_destruct (net/core/sock.c:2177 (discriminator 2)) sk_destruct (net/core/sock.c:2224) __sk_free (net/core/sock.c:2235) sk_free (net/core/sock.c:2246) process_one_work (kernel/workqueue.c:3259) worker_thread (kernel/workqueue.c:3329 kernel/workqueue.c:3416) kthread (kernel/kthread.c:388) ret_from_fork (arch/x86/kernel/process.c:153) ret_from_fork_asm (arch/x86/entry/entry_64.S:256) Modules linked in: Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240401211003.25274-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1840735e9cb07..e606374854ce5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -646,19 +646,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (!w) { /* New dump: * - * 1. hook callback destructor. - */ - cb->args[3] = (long)cb->done; - cb->done = fib6_dump_done; - - /* - * 2. allocate and initialize walker. + * 1. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); if (!w) return -ENOMEM; w->func = fib6_dump_node; cb->args[2] = (long)w; + + /* 2. hook callback destructor. + */ + cb->args[3] = (long)cb->done; + cb->done = fib6_dump_done; + } arg.skb = skb; -- GitLab From 80247e0eca14ff177d565f58ecd3010f6b7910a4 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 25 Mar 2024 17:09:29 -0400 Subject: [PATCH 1515/2290] mlxbf_gige: stop interface during shutdown commit 09ba28e1cd3cf715daab1fca6e1623e22fd754a6 upstream. The mlxbf_gige driver intermittantly encounters a NULL pointer exception while the system is shutting down via "reboot" command. The mlxbf_driver will experience an exception right after executing its shutdown() method. One example of this exception is: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000070 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000011d373000 [0000000000000070] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 96000004 [#1] SMP CPU: 0 PID: 13 Comm: ksoftirqd/0 Tainted: G S OE 5.15.0-bf.6.gef6992a #1 Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.0.2.12669 Apr 21 2023 pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige] lr : mlxbf_gige_poll+0x54/0x160 [mlxbf_gige] sp : ffff8000080d3c10 x29: ffff8000080d3c10 x28: ffffcce72cbb7000 x27: ffff8000080d3d58 x26: ffff0000814e7340 x25: ffff331cd1a05000 x24: ffffcce72c4ea008 x23: ffff0000814e4b40 x22: ffff0000814e4d10 x21: ffff0000814e4128 x20: 0000000000000000 x19: ffff0000814e4a80 x18: ffffffffffffffff x17: 000000000000001c x16: ffffcce72b4553f4 x15: ffff80008805b8a7 x14: 0000000000000000 x13: 0000000000000030 x12: 0101010101010101 x11: 7f7f7f7f7f7f7f7f x10: c2ac898b17576267 x9 : ffffcce720fa5404 x8 : ffff000080812138 x7 : 0000000000002e9a x6 : 0000000000000080 x5 : ffff00008de3b000 x4 : 0000000000000000 x3 : 0000000000000001 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige] mlxbf_gige_poll+0x54/0x160 [mlxbf_gige] __napi_poll+0x40/0x1c8 net_rx_action+0x314/0x3a0 __do_softirq+0x128/0x334 run_ksoftirqd+0x54/0x6c smpboot_thread_fn+0x14c/0x190 kthread+0x10c/0x110 ret_from_fork+0x10/0x20 Code: 8b070000 f9000ea0 f95056c0 f86178a1 (b9407002) ---[ end trace 7cc3941aa0d8e6a4 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt Kernel Offset: 0x4ce722520000 from 0xffff800008000000 PHYS_OFFSET: 0x80000000 CPU features: 0x000005c1,a3330e5a Memory Limit: none ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- During system shutdown, the mlxbf_gige driver's shutdown() is always executed. However, the driver's stop() method will only execute if networking interface configuration logic within the Linux distribution has been setup to do so. If shutdown() executes but stop() does not execute, NAPI remains enabled and this can lead to an exception if NAPI is scheduled while the hardware interface has only been partially deinitialized. The networking interface managed by the mlxbf_gige driver must be properly stopped during system shutdown so that IFF_UP is cleared, the hardware interface is put into a clean state, and NAPI is fully deinitialized. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: David Thompson Link: https://lore.kernel.org/r/20240325210929.25362-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 65e92541db6e5..d6b4d163bbbfd 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "mlxbf_gige.h" @@ -417,8 +418,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev) { struct mlxbf_gige *priv = platform_get_drvdata(pdev); - writeq(0, priv->base + MLXBF_GIGE_INT_EN); - mlxbf_gige_clean_port(priv); + rtnl_lock(); + netif_device_detach(priv->netdev); + + if (netif_running(priv->netdev)) + dev_close(priv->netdev); + + rtnl_unlock(); } static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { -- GitLab From fd6692e9b5edc46a0751e5c9218f1ee7898718c5 Mon Sep 17 00:00:00 2001 From: Atlas Yu Date: Thu, 28 Mar 2024 13:51:52 +0800 Subject: [PATCH 1516/2290] r8169: skip DASH fw status checks when DASH is disabled commit 5e864d90b20803edf6bd44a99fb9afa7171785f2 upstream. On devices that support DASH, the current code in the "rtl_loop_wait" function raises false alarms when DASH is disabled. This occurs because the function attempts to wait for the DASH firmware to be ready, even though it's not relevant in this case. r8169 0000:0c:00.0 eth0: RTL8168ep/8111ep, 38:7c:76:49:08:d9, XID 502, IRQ 86 r8169 0000:0c:00.0 eth0: jumbo features [frames: 9194 bytes, tx checksumming: ko] r8169 0000:0c:00.0 eth0: DASH disabled ... r8169 0000:0c:00.0 eth0: rtl_ep_ocp_read_cond == 0 (loop: 30, delay: 10000). This patch modifies the driver start/stop functions to skip checking the DASH firmware status when DASH is explicitly disabled. This prevents unnecessary delays and false alarms. The patch has been tested on several ThinkStation P8/PX workstations. Fixes: 0ab0c45d8aae ("r8169: add handling DASH when DASH is disabled") Signed-off-by: Atlas Yu Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20240328055152.18443-1-atlas.yu@canonical.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 31 ++++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 85ed9879af6ca..3dbcb311dcbf2 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1135,17 +1135,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01); } +static void rtl_dash_loop_wait(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long usecs, int n, bool high) +{ + if (!tp->dash_enabled) + return; + rtl_loop_wait(tp, c, usecs, n, high); +} + +static void rtl_dash_loop_wait_high(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, true); +} + +static void rtl_dash_loop_wait_low(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, false); +} + static void rtl8168dp_driver_start(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START); - rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_start(struct rtl8169_private *tp) { r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); + rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); } static void rtl8168_driver_start(struct rtl8169_private *tp) @@ -1159,7 +1182,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp) static void rtl8168dp_driver_stop(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP); - rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_stop(struct rtl8169_private *tp) @@ -1167,7 +1190,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp) rtl8168ep_stop_cmac(tp); r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); } static void rtl8168_driver_stop(struct rtl8169_private *tp) -- GitLab From d12245080cb259d82b34699f6cd4ec11bdb688bd Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:33:58 +0100 Subject: [PATCH 1517/2290] udp: do not accept non-tunnel GSO skbs landing in a tunnel commit 3d010c8031e39f5fa1e8b13ada77e0321091011f upstream. When rx-udp-gro-forwarding is enabled UDP packets might be GROed when being forwarded. If such packets might land in a tunnel this can cause various issues and udp_gro_receive makes sure this isn't the case by looking for a matching socket. This is performed in udp4/6_gro_lookup_skb but only in the current netns. This is an issue with tunneled packets when the endpoint is in another netns. In such cases the packets will be GROed at the UDP level, which leads to various issues later on. The same thing can happen with rx-gro-list. We saw this with geneve packets being GROed at the UDP level. In such case gso_size is set; later the packet goes through the geneve rx path, the geneve header is pulled, the offset are adjusted and frag_list skbs are not adjusted with regard to geneve. When those skbs hit skb_fragment, it will misbehave. Different outcomes are possible depending on what the GROed skbs look like; from corrupted packets to kernel crashes. One example is a BUG_ON[1] triggered in skb_segment while processing the frag_list. Because gso_size is wrong (geneve header was pulled) skb_segment thinks there is "geneve header size" of data in frag_list, although it's in fact the next packet. The BUG_ON itself has nothing to do with the issue. This is only one of the potential issues. Looking up for a matching socket in udp_gro_receive is fragile: the lookup could be extended to all netns (not speaking about performances) but nothing prevents those packets from being modified in between and we could still not find a matching socket. It's OK to keep the current logic there as it should cover most cases but we also need to make sure we handle tunnel packets being GROed too early. This is done by extending the checks in udp_unexpected_gso: GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits and landing in a tunnel must be segmented. [1] kernel BUG at net/core/skbuff.c:4408! RIP: 0010:skb_segment+0xd2a/0xf70 __udp_gso_segment+0xaa/0x560 Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets") Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/udp.h | 28 ++++++++++++++++++++++++++++ net/ipv4/udp.c | 7 +++++++ net/ipv4/udp_offload.c | 6 ++++-- net/ipv6/udp.c | 2 +- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/include/linux/udp.h b/include/linux/udp.h index efd9ab6df3797..79a4eae6f1f8f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -140,6 +140,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -153,6 +171,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7856b7a3e0ee9..2a78c78186c37 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -603,6 +603,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +EXPORT_SYMBOL(udp_encap_needed_key); + +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +EXPORT_SYMBOL(udpv6_encap_needed_key); +#endif + void udp_encap_enable(void) { static_branch_inc(&udp_encap_needed_key); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b9e638f76753c..be3ee7cfa9550 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -544,8 +544,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - /* we can do L4 aggregation only if the packet can't land in a tunnel - * otherwise we could corrupt the inner stream + /* We can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream. Detecting such packets + * cannot be foolproof and the aggregation might still happen in some + * cases. Such packets should be caught in udp_unexpected_gso later. */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c2c02dea6c386..1775e9b9b85ad 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -476,7 +476,7 @@ csum_copy_err: goto try_again; } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); -- GitLab From 8c58d384050b99d0aac8dd4c5a0d1c09f0f33152 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:34:00 +0100 Subject: [PATCH 1518/2290] udp: do not transition UDP GRO fraglist partial checksums to unnecessary commit f0b8c30345565344df2e33a8417a27503589247d upstream. UDP GRO validates checksums and in udp4/6_gro_complete fraglist packets are converted to CHECKSUM_UNNECESSARY to avoid later checks. However this is an issue for CHECKSUM_PARTIAL packets as they can be looped in an egress path and then their partial checksums are not fixed. Different issues can be observed, from invalid checksum on packets to traces like: gen01: hw csum failure skb len=3008 headroom=160 headlen=1376 tailroom=0 mac=(106,14) net=(120,40) trans=160 shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) csum(0xffff232e ip_summed=2 complete_sw=0 valid=0 level=0) hash(0x77e3d716 sw=1 l4=1) proto=0x86dd pkttype=0 iif=12 ... Fix this by only converting CHECKSUM_NONE packets to CHECKSUM_UNNECESSARY by reusing __skb_incr_checksum_unnecessary. All other checksum types are kept as-is, including CHECKSUM_COMPLETE as fraglist packets being segmented back would have their skb->csum valid. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp_offload.c | 8 +------- net/ipv6/udp_offload.c | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index be3ee7cfa9550..de58415c1f97f 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -710,13 +710,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 7720d04ed396d..b98c4c8d8e274 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -169,13 +169,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } -- GitLab From 940ff35ae8d19dc820c3127d39ed391f8fe70b9e Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:34:01 +0100 Subject: [PATCH 1519/2290] udp: prevent local UDP tunnel packets from being GROed commit 64235eabc4b5b18c507c08a1f16cdac6c5661220 upstream. GRO has a fundamental issue with UDP tunnel packets as it can't detect those in a foolproof way and GRO could happen before they reach the tunnel endpoint. Previous commits have fixed issues when UDP tunnel packets come from a remote host, but if those packets are issued locally they could run into checksum issues. If the inner packet has a partial checksum the information will be lost in the GRO logic, either in udp4/6_gro_complete or in udp_gro_complete_segment and packets will have an invalid checksum when leaving the host. Prevent local UDP tunnel packets from ever being GROed at the outer UDP level. Due to skb->encapsulation being wrongly used in some drivers this is actually only preventing UDP tunnel packets with a partial checksum to be GROed (see iptunnel_handle_offloads) but those were also the packets triggering issues so in practice this should be sufficient. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets") Suggested-by: Paolo Abeni Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp_offload.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index de58415c1f97f..84b7d6089f76c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -551,6 +551,12 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { + /* If the packet was locally encapsulated in a UDP tunnel that + * wasn't detected above, do not GRO. + */ + if (skb->encapsulation) + goto out; + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; -- GitLab From 39efe5b6f6114247ea72fa8c1ed2faaead784cfe Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 26 Mar 2024 17:51:49 +0530 Subject: [PATCH 1520/2290] octeontx2-af: Fix issue with loading coalesced KPU profiles commit 0ba80d96585662299d4ea4624043759ce9015421 upstream. The current implementation for loading coalesced KPU profiles has a limitation. The "offset" field, which is used to locate profiles within the profile is restricted to a u16. This restricts the number of profiles that can be loaded. This patch addresses this limitation by increasing the size of the "offset" field. Fixes: 11c730bfbf5b ("octeontx2-af: support for coalescing KPU profiles") Signed-off-by: Hariprasad Kelam Reviewed-by: Kalesh AP Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 55639c133dd02..91a4ea529d077 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1669,7 +1669,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz, struct npc_coalesced_kpu_prfl *img_data = NULL; int i = 0, rc = -EINVAL; void __iomem *kpu_prfl_addr; - u16 offset; + u32 offset; img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr; if (le64_to_cpu(img_data->signature) == KPU_SIGN && -- GitLab From e7e7030f0ae29cdc70ed845144f1cabd509e29d5 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 28 Mar 2024 10:06:21 +0800 Subject: [PATCH 1521/2290] octeontx2-pf: check negative error code in otx2_open() commit e709acbd84fb6ef32736331b0147f027a3ef4c20 upstream. otx2_rxtx_enable() return negative error code such as -EIO, check -EIO rather than EIO to fix this problem. Fixes: c926252205c4 ("octeontx2-pf: Disable packet I/O for graceful exit") Signed-off-by: Su Hui Reviewed-by: Subbaraya Sundeep Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Link: https://lore.kernel.org/r/20240328020620.4054692-1-suhui@nfschina.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 7e2c30927c312..6b7fb324e756e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1914,7 +1914,7 @@ int otx2_open(struct net_device *netdev) * mcam entries are enabled to receive the packets. Hence disable the * packet I/O. */ - if (err == EIO) + if (err == -EIO) goto err_disable_rxtx; else if (err) goto err_tx_stop_queues; -- GitLab From d417e3c16dc55b40ecd9371c00d3215ca3191552 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Thu, 28 Mar 2024 19:55:05 +0300 Subject: [PATCH 1522/2290] octeontx2-af: Add array index check commit ef15ddeeb6bee87c044bf7754fac524545bf71e8 upstream. In rvu_map_cgx_lmac_pf() the 'iter', which is used as an array index, can reach value (up to 14) that exceed the size (MAX_LMAC_COUNT = 8) of the array. Fix this bug by adding 'iter' value check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 91c6945ea1f9 ("octeontx2-af: cn10k: Add RPM MAC support") Signed-off-by: Aleksandr Mishin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index d1e6b12ecfa70..cc6d6c94f4002 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) continue; lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { + if (iter >= MAX_LMAC_COUNT) + continue; lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu), iter); rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac); -- GitLab From fe74ea5b8b78d96b9ba313f070b18833a4e32b32 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Wed, 13 Mar 2024 10:44:00 +0100 Subject: [PATCH 1523/2290] i40e: fix i40e_count_filters() to count only active/new filters commit eb58c598ce45b7e787568fe27016260417c3d807 upstream. The bug usually affects untrusted VFs, because they are limited to 18 MACs, it affects them badly, not letting to create MAC all filters. Not stable to reproduce, it happens when VF user creates MAC filters when other MACVLAN operations are happened in parallel. But consequence is that VF can't receive desired traffic. Fix counter to be bumped only for new or active filters. Fixes: 621650cabee5 ("i40e: Refactoring VF MAC filters counting to make more reliable") Signed-off-by: Aleksandr Loktionov Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Paul Menzel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/i40e/i40e_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d8a7fb21b7b76..5be56db1dafda 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1249,8 +1249,11 @@ int i40e_count_filters(struct i40e_vsi *vsi) int bkt; int cnt = 0; - hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) - ++cnt; + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_ACTIVE) + ++cnt; + } return cnt; } -- GitLab From 3e89846283f3cf7c7a8e28b342576fd7c561d2ba Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Wed, 13 Mar 2024 10:56:39 +0100 Subject: [PATCH 1524/2290] i40e: fix vf may be used uninitialized in this function warning commit f37c4eac99c258111d414d31b740437e1925b8e8 upstream. To fix the regression introduced by commit 52424f974bc5, which causes servers hang in very hard to reproduce conditions with resets races. Using two sources for the information is the root cause. In this function before the fix bumping v didn't mean bumping vf pointer. But the code used this variables interchangeably, so stale vf could point to different/not intended vf. Remove redundant "v" variable and iterate via single VF pointer across whole function instead to guarantee VF pointer validity. Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF") Signed-off-by: Aleksandr Loktionov Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Przemek Kitszel Reviewed-by: Paul Menzel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 99cab36fb1075..a5f0c95cba8b5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1626,8 +1626,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; - int i, v; u32 reg; + int i; /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) @@ -1638,11 +1638,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) { - vf = &pf->vf[v]; + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is being reset no need to trigger reset again */ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - i40e_trigger_vf_reset(&pf->vf[v], flr); + i40e_trigger_vf_reset(vf, flr); } /* HW requires some time to make sure it can flush the FIFO for a VF @@ -1651,14 +1650,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) * the VFs using a simple iterator that increments once that VF has * finished resetting. */ - for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { + for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) { usleep_range(10000, 20000); /* Check each VF in sequence, beginning with the VF to fail * the previous check. */ - while (v < pf->num_alloc_vfs) { - vf = &pf->vf[v]; + while (vf < &pf->vf[pf->num_alloc_vfs]) { if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) @@ -1668,7 +1666,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If the current VF has finished resetting, move on * to the next VF in sequence. */ - v++; + ++vf; } } @@ -1678,39 +1676,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* Display a warning if at least one VF didn't manage to reset in * time, but continue on with the operation. */ - if (v < pf->num_alloc_vfs) + if (vf < &pf->vf[pf->num_alloc_vfs]) dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n", - pf->vf[v].vf_id); + vf->vf_id); usleep_range(10000, 20000); /* Begin disabling all the rings associated with VFs, but do not wait * between each VF. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]); } /* Now that we've notified HW to disable all of the VF rings, wait * until they finish. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]); } /* Hw may need up to 50ms to finish disabling the RX queues. We @@ -1719,12 +1717,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) mdelay(50); /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_cleanup_reset_vf(&pf->vf[v]); + i40e_cleanup_reset_vf(vf); } i40e_flush(hw); -- GitLab From 2990d8eacd2ccd545feec181b49867fa0e68591d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 6 Oct 2023 13:50:20 -0500 Subject: [PATCH 1525/2290] drm/amd: Evict resources during PM ops prepare() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5095d5418193eb2748c7d8553c7150b8f1c44696 ] Linux PM core has a prepare() callback run before suspend. If the system is under high memory pressure, the resources may need to be evicted into swap instead. If the storage backing for swap is offlined during the suspend() step then such a call may fail. So move this step into prepare() to move evict majority of resources and update all non-pmops callers to call the same callback. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Stable-dep-of: ca299b4512d4 ("drm/amd: Flush GFXOFF requests in prepare stage") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 ++++++--- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e636c7850f777..dd22d2559720c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1342,6 +1342,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); +int amdgpu_device_prepare(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6a4749c0c5a58..902a446cc4d38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1639,6 +1639,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, } else { pr_info("switched off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + amdgpu_device_prepare(dev); amdgpu_device_suspend(dev, true); amdgpu_device_cache_pci_state(pdev); /* Shut down the device */ @@ -4167,6 +4168,31 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) /* * Suspend & resume. */ +/** + * amdgpu_device_prepare - prepare for device suspend + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int amdgpu_device_prepare(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int r; + + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + /* Evict the majority of BOs before starting suspend sequence */ + r = amdgpu_device_evict_resources(adev); + if (r) + return r; + + return 0; +} + /** * amdgpu_device_suspend - initiate device suspend * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f24c3a20e901d..9a5416331f02e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2391,8 +2391,9 @@ static int amdgpu_pmops_prepare(struct device *dev) /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev)) - return pm_runtime_suspended(dev); + if (amdgpu_device_supports_boco(drm_dev) && + pm_runtime_suspended(dev)) + return 1; /* if we will not support s3 or s2i for the device * then skip suspend @@ -2401,7 +2402,7 @@ static int amdgpu_pmops_prepare(struct device *dev) !amdgpu_acpi_is_s3_active(adev)) return 1; - return 0; + return amdgpu_device_prepare(drm_dev); } static void amdgpu_pmops_complete(struct device *dev) @@ -2600,6 +2601,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_boco(drm_dev)) adev->mp1_state = PP_MP1_STATE_UNLOAD; + ret = amdgpu_device_prepare(drm_dev); + if (ret) + return ret; ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; -- GitLab From 43df8e64dfb86940623bb23b2e20eb09b1791eaf Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 6 Oct 2023 13:50:21 -0500 Subject: [PATCH 1526/2290] drm/amd: Add concept of running prepare_suspend() sequence for IP blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cb11ca3233aa3303dc11dca25977d2e7f24be00f ] If any IP blocks allocate memory during their hw_fini() sequence this can cause the suspend to fail under memory pressure. Introduce a new phase that IP blocks can use to allocate memory before suspend starts so that it can potentially be evicted into swap instead. Reviewed-by: Christian König Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Stable-dep-of: ca299b4512d4 ("drm/amd: Flush GFXOFF requests in prepare stage") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++++++++- drivers/gpu/drm/amd/include/amd_shared.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 902a446cc4d38..77e35b919b064 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4180,7 +4180,7 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) int amdgpu_device_prepare(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); - int r; + int i, r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -4190,6 +4190,16 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) return r; + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (!adev->ip_blocks[i].version->funcs->prepare_suspend) + continue; + r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); + if (r) + return r; + } + return 0; } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index f175e65b853a0..34467427c9f97 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -294,6 +294,7 @@ struct amd_ip_funcs { int (*hw_init)(void *handle); int (*hw_fini)(void *handle); void (*late_fini)(void *handle); + int (*prepare_suspend)(void *handle); int (*suspend)(void *handle); int (*resume)(void *handle); bool (*is_idle)(void *handle); -- GitLab From 810dd068ae3b3973b769bd52866ca3b9286b5254 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 20 Mar 2024 13:32:21 -0500 Subject: [PATCH 1527/2290] drm/amd: Flush GFXOFF requests in prepare stage [ Upstream commit ca299b4512d4b4f516732a48ce9aa19d91f4473e ] If the system hasn't entered GFXOFF when suspend starts it can cause hangs accessing GC and RLC during the suspend stage. Cc: # 6.1.y: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Cc: # 6.1.y: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Cc: # 6.1.y: 2ceec37b0e3d ("drm/amd: Add missing kernel doc for prepare_suspend()") Cc: # 6.1.y: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Cc: # 6.6.y: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Cc: # 6.6.y: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Cc: # 6.6.y: 2ceec37b0e3d ("drm/amd: Add missing kernel doc for prepare_suspend()") Cc: # 6.6.y: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Cc: # 6.1+ Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132 Fixes: ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 77e35b919b064..b11690a816e73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4190,6 +4190,8 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) return r; + flush_delayed_work(&adev->gfx.gfx_off_delay_work); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; -- GitLab From f3c2ceb847595d6c9f2b8bb9c786ba8027dbf1f6 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 7 Oct 2022 14:38:40 -0700 Subject: [PATCH 1528/2290] i40e: Store the irq number in i40e_q_vector [ Upstream commit 6b85a4f39ff7177b2428d4deab1151a31754e391 ] Make it easy to figure out the IRQ number for a particular i40e_q_vector by storing the assigned IRQ in the structure itself. Signed-off-by: Joe Damato Acked-by: Jesse Brandeburg Acked-by: Sridhar Samudrala Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Stable-dep-of: ea558de7238b ("i40e: Enforce software interrupt during busy-poll exit") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 7d4cc4eafd59e..59c4e9d642980 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -992,6 +992,7 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; /* lan device */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5be56db1dafda..ee0d7c29e8f17 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4145,6 +4145,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) } /* register for affinity change notifications */ + q_vector->irq_num = irq_num; q_vector->affinity_notify.notify = i40e_irq_affinity_notify; q_vector->affinity_notify.release = i40e_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); -- GitLab From 2f6953617d1c887524fbd66398b1d0a5efb1c289 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 13 Nov 2023 15:10:24 -0800 Subject: [PATCH 1529/2290] i40e: Remove _t suffix from enum type names [ Upstream commit addca9175e5f74cf29e8ad918c38c09b8663b5b8 ] Enum type names should not be suffixed by '_t'. Either to use 'typedef enum name name_t' to so plain 'name_t var' instead of 'enum name_t var'. Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20231113231047.548659-6-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Stable-dep-of: ea558de7238b ("i40e: Enforce software interrupt during busy-poll exit") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e.h | 4 ++-- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 6 +++--- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 59c4e9d642980..35862dedd59b5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -108,7 +108,7 @@ #define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ /* driver state flags */ -enum i40e_state_t { +enum i40e_state { __I40E_TESTING, __I40E_CONFIG_BUSY, __I40E_CONFIG_DONE, @@ -156,7 +156,7 @@ enum i40e_state_t { BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED) /* VSI state flags */ -enum i40e_vsi_state_t { +enum i40e_vsi_state { __I40E_VSI_DOWN, __I40E_VSI_NEEDS_RESTART, __I40E_VSI_SYNCING_FILTERS, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 97a9efe7b713e..5f2555848a69e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -34,7 +34,7 @@ enum i40e_ptp_pin { GPIO_4 }; -enum i40e_can_set_pins_t { +enum i40e_can_set_pins { CANT_DO_PINS = -1, CAN_SET_PINS, CAN_DO_PINS @@ -192,7 +192,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw) * return CAN_DO_PINS if pins can be manipulated within a NIC or * return CANT_DO_PINS otherwise. **/ -static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf) +static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf) { if (!i40e_is_ptp_pin_dev(&pf->hw)) { dev_warn(&pf->pdev->dev, @@ -1081,7 +1081,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf) static int i40e_ptp_set_pins(struct i40e_pf *pf, struct i40e_ptp_pins_settings *pins) { - enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf); + enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf); int i = 0; if (pin_caps == CANT_DO_PINS) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 768290dc6f48b..602ddd956245e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -57,7 +57,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl) * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any * register but instead is a special value meaning "don't update" ITR0/1/2. */ -enum i40e_dyn_idx_t { +enum i40e_dyn_idx { I40E_IDX_ITR0 = 0, I40E_IDX_ITR1 = 1, I40E_IDX_ITR2 = 2, @@ -304,7 +304,7 @@ struct i40e_rx_queue_stats { u64 page_busy_count; }; -enum i40e_ring_state_t { +enum i40e_ring_state { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, __I40E_RING_STATE_NBITS /* must be last */ -- GitLab From 095cfa2d9bde4ddf444fdf208ac3ace598a439de Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 16 Mar 2024 12:38:29 +0100 Subject: [PATCH 1530/2290] i40e: Enforce software interrupt during busy-poll exit [ Upstream commit ea558de7238bb12c3435c47f0631e9d17bf4a09f ] As for ice bug fixed by commit b7306b42beaf ("ice: manage interrupts during poll exit") followed by commit 23be7075b318 ("ice: fix software generating extra interrupts") I'm seeing the similar issue also with i40e driver. In certain situation when busy-loop is enabled together with adaptive coalescing, the driver occasionally misses that there are outstanding descriptors to clean when exiting busy poll. Try to catch the remaining work by triggering a software interrupt when exiting busy poll. No extra interrupts will be generated when busy polling is not used. The issue was found when running sockperf ping-pong tcp test with adaptive coalescing and busy poll enabled (50 as value busy_pool and busy_read sysctl knobs) and results in huge latency spikes with more than 100000us. The fix is inspired from the ice driver and do the following: 1) During napi poll exit in case of busy-poll (napo_complete_done() returns false) this is recorded to q_vector that we were in busy loop. 2) Extends i40e_buildreg_itr() to be able to add an enforced software interrupt into built value 2) In i40e_update_enable_itr() enforces a software interrupt trigger if we are exiting busy poll to catch any pending clean-ups 3) Reuses unused 3rd ITR (interrupt throttle) index and set it to 20K interrupts per second to limit the number of these sw interrupts. Test results ============ Prior: [root@dell-per640-07 net]# sockperf ping-pong -i 10.9.9.1 --tcp -m 1000 --mps=max -t 120 sockperf: == version #3.10-no.git == sockperf[CLIENT] send on:sockperf: using recvfrom() to block on socket(s) [ 0] IP = 10.9.9.1 PORT = 11111 # TCP sockperf: Warmup stage (sending a few dummy messages)... sockperf: Starting test... sockperf: Test end (interrupted by timer) sockperf: Test ended sockperf: [Total Run] RunTime=119.999 sec; Warm up time=400 msec; SentMessages=2438563; ReceivedMessages=2438562 sockperf: ========= Printing statistics for Server No: 0 sockperf: [Valid Duration] RunTime=119.549 sec; SentMessages=2429473; ReceivedMessages=2429473 sockperf: ====> avg-latency=24.571 (std-dev=93.297, mean-ad=4.904, median-ad=1.510, siqr=1.063, cv=3.797, std-error=0.060, 99.0% ci=[24.417, 24.725]) sockperf: # dropped messages = 0; # duplicated messages = 0; # out-of-order messages = 0 sockperf: Summary: Latency is 24.571 usec sockperf: Total 2429473 observations; each percentile contains 24294.73 observations sockperf: ---> observation = 103294.331 sockperf: ---> percentile 99.999 = 45.633 sockperf: ---> percentile 99.990 = 37.013 sockperf: ---> percentile 99.900 = 35.910 sockperf: ---> percentile 99.000 = 33.390 sockperf: ---> percentile 90.000 = 28.626 sockperf: ---> percentile 75.000 = 27.741 sockperf: ---> percentile 50.000 = 26.743 sockperf: ---> percentile 25.000 = 25.614 sockperf: ---> observation = 12.220 After: [root@dell-per640-07 net]# sockperf ping-pong -i 10.9.9.1 --tcp -m 1000 --mps=max -t 120 sockperf: == version #3.10-no.git == sockperf[CLIENT] send on:sockperf: using recvfrom() to block on socket(s) [ 0] IP = 10.9.9.1 PORT = 11111 # TCP sockperf: Warmup stage (sending a few dummy messages)... sockperf: Starting test... sockperf: Test end (interrupted by timer) sockperf: Test ended sockperf: [Total Run] RunTime=119.999 sec; Warm up time=400 msec; SentMessages=2400055; ReceivedMessages=2400054 sockperf: ========= Printing statistics for Server No: 0 sockperf: [Valid Duration] RunTime=119.549 sec; SentMessages=2391186; ReceivedMessages=2391186 sockperf: ====> avg-latency=24.965 (std-dev=5.934, mean-ad=4.642, median-ad=1.485, siqr=1.067, cv=0.238, std-error=0.004, 99.0% ci=[24.955, 24.975]) sockperf: # dropped messages = 0; # duplicated messages = 0; # out-of-order messages = 0 sockperf: Summary: Latency is 24.965 usec sockperf: Total 2391186 observations; each percentile contains 23911.86 observations sockperf: ---> observation = 195.841 sockperf: ---> percentile 99.999 = 45.026 sockperf: ---> percentile 99.990 = 39.009 sockperf: ---> percentile 99.900 = 35.922 sockperf: ---> percentile 99.000 = 33.482 sockperf: ---> percentile 90.000 = 28.902 sockperf: ---> percentile 75.000 = 27.821 sockperf: ---> percentile 50.000 = 26.860 sockperf: ---> percentile 25.000 = 25.685 sockperf: ---> observation = 12.277 Fixes: 0bcd952feec7 ("ethernet/intel: consolidate NAPI and NAPI exit") Reported-by: Hugo Ferreira Reviewed-by: Michal Schmidt Signed-off-by: Ivan Vecera Reviewed-by: Jesse Brandeburg Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++ .../net/ethernet/intel/i40e/i40e_register.h | 3 + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 82 ++++++++++++++----- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 + 5 files changed, 72 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 35862dedd59b5..5293fc00938cf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -992,6 +992,7 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + bool in_busy_poll; int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ee0d7c29e8f17..a9db1ed74d3fc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3891,6 +3891,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; + /* Set ITR for software interrupts triggered after exiting + * busy-loop polling. + */ + wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), + I40E_ITR_20K); + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 7339003aa17cd..694cb3e45c1ec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -328,8 +328,11 @@ #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3 #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5 +#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT) #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24 #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT) +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25 +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT) #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */ #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0 #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 94cf82668efaa..3d83fccf742b1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2571,7 +2571,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static inline u32 i40e_buildreg_itr(const int type, u16 itr) +/** + * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register + * @itr_idx: interrupt throttling index + * @interval: interrupt throttling interval value in usecs + * @force_swint: force software interrupt + * + * The function builds a value for I40E_PFINT_DYN_CTLN register that + * is used to update interrupt throttling interval for specified ITR index + * and optionally enforces a software interrupt. If the @itr_idx is equal + * to I40E_ITR_NONE then no interval change is applied and only @force_swint + * parameter is taken into account. If the interval change and enforced + * software interrupt are not requested then the built value just enables + * appropriate vector interrupt. + **/ +static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval, + bool force_swint) { u32 val; @@ -2585,23 +2600,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) * an event in the PBA anyway so we need to rely on the automask * to hold pending events for us until the interrupt is re-enabled * - * The itr value is reported in microseconds, and the register - * value is recorded in 2 microsecond units. For this reason we - * only need to shift by the interval shift - 1 instead of the - * full value. + * We have to shift the given value as it is reported in microseconds + * and the register value is recorded in 2 microsecond units. */ - itr &= I40E_ITR_MASK; + interval >>= 1; + /* 1. Enable vector interrupt + * 2. Update the interval for the specified ITR index + * (I40E_ITR_NONE in the register is used to indicate that + * no interval update is requested) + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); + FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) | + FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval); + + /* 3. Enforce software interrupt trigger if requested + * (These software interrupts rate is limited by ITR2 that is + * set to 20K interrupts per second) + */ + if (force_swint) + val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | + I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK | + FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK, + I40E_SW_ITR); return val; } -/* a small macro to shorten up some long lines */ -#define INTREG I40E_PFINT_DYN_CTLN - /* The act of updating the ITR will cause it to immediately trigger. In order * to prevent this from throwing off adaptive update statistics we defer the * update so that it can only happen so often. So after either Tx or Rx are @@ -2620,8 +2645,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { + enum i40e_dyn_idx itr_idx = I40E_ITR_NONE; struct i40e_hw *hw = &vsi->back->hw; - u32 intval; + u16 interval = 0; + u32 itr_val; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { @@ -2643,8 +2670,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ if (q_vector->rx.target_itr < q_vector->rx.current_itr) { /* Rx ITR needs to be reduced, this is highest priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || @@ -2653,25 +2680,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, /* Tx ITR needs to be reduced, this is second priority * Tx ITR needs to be increased more than Rx, fourth priority */ - intval = i40e_buildreg_itr(I40E_TX_ITR, - q_vector->tx.target_itr); + itr_idx = I40E_TX_ITR; + interval = q_vector->tx.target_itr; q_vector->tx.current_itr = q_vector->tx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { /* Rx ITR needs to be increased, third priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else { /* No ITR update, lowest priority */ - intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown) q_vector->itr_countdown--; } - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), intval); + /* Do not update interrupt control register if VSI is down */ + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + /* Update ITR interval if necessary and enforce software interrupt + * if we are exiting busy poll. + */ + if (q_vector->in_busy_poll) { + itr_val = i40e_buildreg_itr(itr_idx, interval, true); + q_vector->in_busy_poll = false; + } else { + itr_val = i40e_buildreg_itr(itr_idx, interval, false); + } + wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val); } /** @@ -2778,6 +2816,8 @@ tx_only: */ if (likely(napi_complete_done(napi, work_done))) i40e_update_enable_itr(vsi, q_vector); + else + q_vector->in_busy_poll = true; return min(work_done, budget - 1); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 602ddd956245e..6e567d343e031 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -67,6 +67,7 @@ enum i40e_dyn_idx { /* these are indexes into ITRN registers */ #define I40E_RX_ITR I40E_IDX_ITR0 #define I40E_TX_ITR I40E_IDX_ITR1 +#define I40E_SW_ITR I40E_IDX_ITR2 /* Supported RSS offloads */ #define I40E_DEFAULT_RSS_HENA ( \ -- GitLab From a33b7cb184921487ae7b8b739bcdc7d0266d2eb7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:23:15 +0100 Subject: [PATCH 1531/2290] r8169: use spinlock to protect mac ocp register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 91c8643578a21e435c412ffbe902bb4b4773e262 ] For disabling ASPM during NAPI poll we'll have to access mac ocp registers in atomic context. This could result in races because a mac ocp read consists of a write to register OCPDR, followed by a read from the same register. Therefore add a spinlock to protect access to mac ocp registers. Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Stable-dep-of: 5e864d90b208 ("r8169: skip DASH fw status checks when DASH is disabled") Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 37 ++++++++++++++++++++--- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 3dbcb311dcbf2..c7dd0eb94817f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -615,6 +615,8 @@ struct rtl8169_private { struct work_struct work; } wk; + spinlock_t mac_ocp_lock; + unsigned supports_gmii:1; unsigned aspm_manageable:1; unsigned dash_enabled:1; @@ -850,7 +852,7 @@ static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg) (RTL_R32(tp, GPHY_OCP) & 0xffff) : -ETIMEDOUT; } -static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) +static void __r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) { if (rtl_ocp_reg_failure(reg)) return; @@ -858,7 +860,16 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data); } -static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) +static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) +{ + unsigned long flags; + + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + __r8168_mac_ocp_write(tp, reg, data); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); +} + +static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) { if (rtl_ocp_reg_failure(reg)) return 0; @@ -868,12 +879,28 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) return RTL_R32(tp, OCPDR); } +static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) +{ + unsigned long flags; + u16 val; + + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + val = __r8168_mac_ocp_read(tp, reg); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); + + return val; +} + static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask, u16 set) { - u16 data = r8168_mac_ocp_read(tp, reg); + unsigned long flags; + u16 data; - r8168_mac_ocp_write(tp, reg, (data & ~mask) | set); + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + data = __r8168_mac_ocp_read(tp, reg); + __r8168_mac_ocp_write(tp, reg, (data & ~mask) | set); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); } /* Work around a hw issue with RTL8168g PHY, the quirk disables @@ -5232,6 +5259,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; + spin_lock_init(&tp->mac_ocp_lock); + dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, struct pcpu_sw_netstats); if (!dev->tstats) -- GitLab From 9109472e7102f58b915e473dabc8b7961a39b3f7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:24:00 +0100 Subject: [PATCH 1532/2290] r8169: use spinlock to protect access to registers Config2 and Config5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6bc6c4e6893ee79a9862c61d1635e7da6d5a3333 ] For disabling ASPM during NAPI poll we'll have to access both registers in atomic context. Use a spinlock to protect access. Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Stable-dep-of: 5e864d90b208 ("r8169: skip DASH fw status checks when DASH is disabled") Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 47 ++++++++++++++++++----- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index c7dd0eb94817f..4a1710b2726ce 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -615,6 +615,7 @@ struct rtl8169_private { struct work_struct work; } wk; + spinlock_t config25_lock; spinlock_t mac_ocp_lock; unsigned supports_gmii:1; @@ -680,6 +681,28 @@ static void rtl_pci_commit(struct rtl8169_private *tp) RTL_R8(tp, ChipCmd); } +static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set) +{ + unsigned long flags; + u8 val; + + spin_lock_irqsave(&tp->config25_lock, flags); + val = RTL_R8(tp, Config2); + RTL_W8(tp, Config2, (val & ~clear) | set); + spin_unlock_irqrestore(&tp->config25_lock, flags); +} + +static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set) +{ + unsigned long flags; + u8 val; + + spin_lock_irqsave(&tp->config25_lock, flags); + val = RTL_R8(tp, Config5); + RTL_W8(tp, Config5, (val & ~clear) | set); + spin_unlock_irqrestore(&tp->config25_lock, flags); +} + static bool rtl_is_8125(struct rtl8169_private *tp) { return tp->mac_version >= RTL_GIGA_MAC_VER_61; @@ -1401,6 +1424,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) { WAKE_MAGIC, Config3, MagicPacket } }; unsigned int i, tmp = ARRAY_SIZE(cfg); + unsigned long flags; u8 options; rtl_unlock_config_regs(tp); @@ -1419,12 +1443,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0); } + spin_lock_irqsave(&tp->config25_lock, flags); for (i = 0; i < tmp; i++) { options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask; if (wolopts & cfg[i].opt) options |= cfg[i].mask; RTL_W8(tp, cfg[i].reg, options); } + spin_unlock_irqrestore(&tp->config25_lock, flags); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: @@ -1436,10 +1462,10 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) case RTL_GIGA_MAC_VER_34: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63: - options = RTL_R8(tp, Config2) & ~PME_SIGNAL; if (wolopts) - options |= PME_SIGNAL; - RTL_W8(tp, Config2, options); + rtl_mod_config2(tp, 0, PME_SIGNAL); + else + rtl_mod_config2(tp, PME_SIGNAL, 0); break; default: break; @@ -2748,8 +2774,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { /* Don't enable ASPM in the chip if OS can't control ASPM */ if (enable && tp->aspm_manageable) { - RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en); - RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn); + rtl_mod_config5(tp, 0, ASPM_en); + rtl_mod_config2(tp, 0, ClkReqEn); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48: @@ -2772,8 +2798,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) break; } - RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en); + rtl_mod_config2(tp, ClkReqEn, 0); + rtl_mod_config5(tp, ASPM_en, 0); } udelay(10); @@ -2934,7 +2960,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp) RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST); RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); } static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) @@ -2967,7 +2993,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); rtl_hw_aspm_clkreq_enable(tp, true); } @@ -2990,7 +3016,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); rtl8168_config_eee_mac(tp); } @@ -5259,6 +5285,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; + spin_lock_init(&tp->config25_lock); spin_lock_init(&tp->mac_ocp_lock); dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, -- GitLab From 77db987b47b7c93d6e7eadec5c211f3afcea42dd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:25:49 +0100 Subject: [PATCH 1533/2290] r8169: prepare rtl_hw_aspm_clkreq_enable for usage in atomic context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 49ef7d846d4bd77b0b9f1f801fc765b004690a07 ] Bail out if the function is used with chip versions that don't support ASPM configuration. In addition remove the delay, it tuned out that it's not needed, also vendor driver r8125 doesn't have it. Suggested-by: Kai-Heng Feng Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Stable-dep-of: 5e864d90b208 ("r8169: skip DASH fw status checks when DASH is disabled") Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 4a1710b2726ce..256630f57ffe1 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2772,6 +2772,9 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp) static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { + if (tp->mac_version < RTL_GIGA_MAC_VER_32) + return; + /* Don't enable ASPM in the chip if OS can't control ASPM */ if (enable && tp->aspm_manageable) { rtl_mod_config5(tp, 0, ASPM_en); @@ -2801,8 +2804,6 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) rtl_mod_config2(tp, ClkReqEn, 0); rtl_mod_config5(tp, ASPM_en, 0); } - - udelay(10); } static void rtl_set_fifo_size(struct rtl8169_private *tp, u16 rx_stat, -- GitLab From 4eed9d0a4816d81bf6b009a8d3a05f3817b9463e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 26 Mar 2024 13:42:45 -0700 Subject: [PATCH 1534/2290] tcp: Fix bind() regression for v6-only wildcard and v4(-mapped-v6) non-wildcard addresses. [ Upstream commit d91ef1e1b55f730bee8ce286b02b7bdccbc42973 ] Jianguo Wu reported another bind() regression introduced by bhash2. Calling bind() for the following 3 addresses on the same port, the 3rd one should fail but now succeeds. 1. 0.0.0.0 or ::ffff:0.0.0.0 2. [::] w/ IPV6_V6ONLY 3. IPv4 non-wildcard address or v4-mapped-v6 non-wildcard address The first two bind() create tb2 like this: bhash2 -> tb2(:: w/ IPV6_V6ONLY) -> tb2(0.0.0.0) The 3rd bind() will match with the IPv6 only wildcard address bucket in inet_bind2_bucket_match_addr_any(), however, no conflicting socket exists in the bucket. So, inet_bhash2_conflict() will returns false, and thus, inet_bhash2_addr_any_conflict() returns false consequently. As a result, the 3rd bind() bypasses conflict check, which should be done against the IPv4 wildcard address bucket. So, in inet_bhash2_addr_any_conflict(), we must iterate over all buckets. Note that we cannot add ipv6_only flag for inet_bind2_bucket as it would confuse the following patetrn. 1. [::] w/ SO_REUSE{ADDR,PORT} and IPV6_V6ONLY 2. [::] w/ SO_REUSE{ADDR,PORT} 3. IPv4 non-wildcard address or v4-mapped-v6 non-wildcard address The first bind() would create a bucket with ipv6_only flag true, the second bind() would add the [::] socket into the same bucket, and the third bind() could succeed based on the wrong assumption that ipv6_only bucket would not conflict with v4(-mapped-v6) address. Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Diagnosed-by: Jianguo Wu Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240326204251.51301-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/inet_connection_sock.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f7832d4253820..8407098a59391 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -289,6 +289,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l struct sock_reuseport *reuseport_cb; struct inet_bind_hashbucket *head2; struct inet_bind2_bucket *tb2; + bool conflict = false; bool reuseport_cb_ok; rcu_read_lock(); @@ -301,18 +302,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l spin_lock(&head2->lock); - inet_bind_bucket_for_each(tb2, &head2->chain) - if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) - break; + inet_bind_bucket_for_each(tb2, &head2->chain) { + if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) + continue; - if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, - reuseport_ok)) { - spin_unlock(&head2->lock); - return true; + if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok)) + continue; + + conflict = true; + break; } spin_unlock(&head2->lock); - return false; + + return conflict; } /* -- GitLab From c1781222079cac0510a276194ca0a18fe12b8a80 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Thu, 27 Oct 2022 21:45:02 +0300 Subject: [PATCH 1535/2290] drivers: net: convert to boolean for the mac_managed_pm flag [ Upstream commit eca485d22165695587bed02d8b9d0f7f44246c4a ] Signed-off-by: Dennis Kirjanov Signed-off-by: David S. Miller Stable-dep-of: cbc17e7802f5 ("net: fec: Set mac_managed_pm during probe") Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 2 +- drivers/net/ethernet/realtek/r8169_main.c | 2 +- drivers/net/usb/asix_devices.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 97d12c7eea772..51eb30c20c7cf 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2236,7 +2236,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - phy_dev->mac_managed_pm = 1; + phy_dev->mac_managed_pm = true; phy_attached_info(phy_dev); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 256630f57ffe1..6e3417712e402 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5148,7 +5148,7 @@ static int r8169_mdio_register(struct rtl8169_private *tp) return -EUNATCH; } - tp->phydev->mac_managed_pm = 1; + tp->phydev->mac_managed_pm = true; phy_support_asym_pause(tp->phydev); diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 6eacbf17f1c0c..34cd568b27f19 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -714,7 +714,7 @@ static int ax88772_init_phy(struct usbnet *dev) } phy_suspend(priv->phydev); - priv->phydev->mac_managed_pm = 1; + priv->phydev->mac_managed_pm = true; phy_attached_info(priv->phydev); @@ -734,7 +734,7 @@ static int ax88772_init_phy(struct usbnet *dev) return -ENODEV; } - priv->phydev_int->mac_managed_pm = 1; + priv->phydev_int->mac_managed_pm = true; phy_suspend(priv->phydev_int); return 0; -- GitLab From b3608fe28fab298ee2bcdde26bdaf9c179e8c69f Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 28 Mar 2024 15:59:29 +0000 Subject: [PATCH 1536/2290] net: fec: Set mac_managed_pm during probe [ Upstream commit cbc17e7802f5de37c7c262204baadfad3f7f99e5 ] Setting mac_managed_pm during interface up is too late. In situations where the link is not brought up yet and the system suspends the regular PHY power management will run. Since the FEC ETHEREN control bit is cleared (automatically) on suspend the controller is off in resume. When the regular PHY power management resume path runs in this context it will write to the MII_DATA register but nothing will be transmitted on the MDIO bus. This can be observed by the following log: fec 5b040000.ethernet eth0: MDIO read timeout Microchip LAN87xx T1 5b040000.ethernet-1:04: PM: dpm_run_callback(): mdio_bus_phy_resume+0x0/0xc8 returns -110 Microchip LAN87xx T1 5b040000.ethernet-1:04: PM: failed to resume: error -110 The data written will however remain in the MII_DATA register. When the link later is set to administrative up it will trigger a call to fec_restart() which will restore the MII_SPEED register. This triggers the quirk explained in f166f890c8f0 ("net: ethernet: fec: Replace interrupt driven MDIO with polled IO") causing an extra MII_EVENT. This extra event desynchronizes all the MDIO register reads, causing them to complete too early. Leading all reads to read as 0 because fec_enet_mdio_wait() returns too early. When a Microchip LAN8700R PHY is connected to the FEC, the 0 reads causes the PHY to be initialized incorrectly and the PHY will not transmit any ethernet signal in this state. It cannot be brought out of this state without a power cycle of the PHY. Fixes: 557d5dc83f68 ("net: fec: use mac-managed PHY PM") Closes: https://lore.kernel.org/netdev/1f45bdbe-eab1-4e59-8f24-add177590d27@actia.se/ Signed-off-by: Wei Fang [jernberg: commit message] Signed-off-by: John Ernberg Link: https://lore.kernel.org/r/20240328155909.59613-2-john.ernberg@actia.se Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 51eb30c20c7cf..ebff14b0837d9 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2236,8 +2236,6 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - phy_dev->mac_managed_pm = true; - phy_attached_info(phy_dev); return 0; @@ -2249,10 +2247,12 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); bool suppress_preamble = false; + struct phy_device *phydev; struct device_node *node; int err = -ENXIO; u32 mii_speed, holdtime; u32 bus_freq; + int addr; /* * The i.MX28 dual fec interfaces are not equal. @@ -2362,6 +2362,13 @@ static int fec_enet_mii_init(struct platform_device *pdev) goto err_out_free_mdiobus; of_node_put(node); + /* find all the PHY devices on the bus and set mac_managed_pm to true */ + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + phydev = mdiobus_get_phy(fep->mii_bus, addr); + if (phydev) + phydev->mac_managed_pm = true; + } + mii_cnt++; /* save fec0 mii_bus */ -- GitLab From 265a0fc55f137823c10005c4bce2d235d3a4f4ab Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 2 Feb 2024 10:41:22 +0200 Subject: [PATCH 1537/2290] net: ravb: Let IP-specific receive function to interrogate descriptors [ Upstream commit 2b993bfdb47b3aaafd8fe9cd5038b5e297b18ee1 ] ravb_poll() initial code used to interrogate the first descriptor of the RX queue in case gPTP is false to determine if ravb_rx() should be called. This is done for non-gPTP IPs. For gPTP IPs the driver PTP-specific information was used to determine if receive function should be called. As every IP has its own receive function that interrogates the RX descriptors list in the same way the ravb_poll() was doing there is no need to double check this in ravb_poll(). Removing the code from ravb_poll() leads to a cleaner code. Signed-off-by: Claudiu Beznea Reviewed-by: Sergey Shtylyov Signed-off-by: Paolo Abeni Stable-dep-of: 596a4254915f ("net: ravb: Always process TX descriptor ring") Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index e7b70006261f7..36e2718c564ac 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1290,25 +1290,16 @@ static int ravb_poll(struct napi_struct *napi, int budget) struct net_device *ndev = napi->dev; struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; - bool gptp = info->gptp || info->ccc_gac; - struct ravb_rx_desc *desc; unsigned long flags; int q = napi - priv->napi; int mask = BIT(q); int quota = budget; - unsigned int entry; - if (!gptp) { - entry = priv->cur_rx[q] % priv->num_rx_ring[q]; - desc = &priv->gbeth_rx_ring[entry]; - } /* Processing RX Descriptor Ring */ /* Clear RX interrupt */ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (gptp || desc->die_dt != DT_FEMPTY) { - if (ravb_rx(ndev, "a, q)) - goto out; - } + if (ravb_rx(ndev, "a, q)) + goto out; /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); -- GitLab From f9690dfa181434d828f7583e83d3176116f88566 Mon Sep 17 00:00:00 2001 From: Paul Barker Date: Tue, 2 Apr 2024 15:53:04 +0100 Subject: [PATCH 1538/2290] net: ravb: Always process TX descriptor ring [ Upstream commit 596a4254915f94c927217fe09c33a6828f33fb25 ] The TX queue should be serviced each time the poll function is called, even if the full RX work budget has been consumed. This prevents starvation of the TX queue when RX bandwidth usage is high. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Paul Barker Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20240402145305.82148-1-paul.barker.ct@bp.renesas.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 36e2718c564ac..b11cc365d19e3 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1294,12 +1294,12 @@ static int ravb_poll(struct napi_struct *napi, int budget) int q = napi - priv->napi; int mask = BIT(q); int quota = budget; + bool unmask; /* Processing RX Descriptor Ring */ /* Clear RX interrupt */ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (ravb_rx(ndev, "a, q)) - goto out; + unmask = !ravb_rx(ndev, "a, q); /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); @@ -1309,6 +1309,9 @@ static int ravb_poll(struct napi_struct *napi, int budget) netif_wake_subqueue(ndev, q); spin_unlock_irqrestore(&priv->lock, flags); + if (!unmask) + goto out; + napi_complete(napi); /* Re-enable RX/TX interrupts */ -- GitLab From f089d4554a0b22860bfb1b8392f1aed32033245f Mon Sep 17 00:00:00 2001 From: Paul Barker Date: Tue, 2 Apr 2024 15:53:05 +0100 Subject: [PATCH 1539/2290] net: ravb: Always update error counters [ Upstream commit 101b76418d7163240bc74a7e06867dca0e51183e ] The error statistics should be updated each time the poll function is called, even if the full RX work budget has been consumed. This prevents the counts from becoming stuck when RX bandwidth usage is high. This also ensures that error counters are not updated after we've re-enabled interrupts as that could result in a race condition. Also drop an unnecessary space. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Paul Barker Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20240402145305.82148-2-paul.barker.ct@bp.renesas.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index b11cc365d19e3..756ac4a07f60b 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1309,6 +1309,15 @@ static int ravb_poll(struct napi_struct *napi, int budget) netif_wake_subqueue(ndev, q); spin_unlock_irqrestore(&priv->lock, flags); + /* Receive error message handling */ + priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; + if (info->nc_queues) + priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; + if (priv->rx_over_errors != ndev->stats.rx_over_errors) + ndev->stats.rx_over_errors = priv->rx_over_errors; + if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) + ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; + if (!unmask) goto out; @@ -1325,14 +1334,6 @@ static int ravb_poll(struct napi_struct *napi, int budget) } spin_unlock_irqrestore(&priv->lock, flags); - /* Receive error message handling */ - priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; - if (info->nc_queues) - priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; - if (priv->rx_over_errors != ndev->stats.rx_over_errors) - ndev->stats.rx_over_errors = priv->rx_over_errors; - if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) - ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; out: return budget - quota; } -- GitLab From 2f7efda53a0a8b462fdd217e732147d7611befe2 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 22 May 2023 18:12:48 +0200 Subject: [PATCH 1540/2290] KVM: SVM: enhance info printk's in SEV init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6d1bc9754b04075d938b47cf7f7800814b8911a7 ] Let's print available ASID ranges for SEV/SEV-ES guests. This information can be useful for system administrator to debug if SEV/SEV-ES fails to enable. There are a few reasons. SEV: - NPT is disabled (module parameter) - CPU lacks some features (sev, decodeassists) - Maximum SEV ASID is 0 SEV-ES: - mmio_caching is disabled (module parameter) - CPU lacks sev_es feature - Minimum SEV ASID value is 1 (can be adjusted in BIOS/UEFI) Cc: Sean Christopherson Cc: Paolo Bonzini Cc: Stéphane Graber Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20230522161249.800829-3-aleksandr.mikhalitsyn@canonical.com [sean: print '0' for min SEV-ES ASID if there are no available ASIDs] Signed-off-by: Sean Christopherson Stable-dep-of: 0aa6b90ef9d7 ("KVM: SVM: Add support for allowing zero SEV ASIDs") Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3dc0ee1fe9db9..1fe9257d87b2d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2217,7 +2217,6 @@ void __init sev_hardware_setup(void) if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)) goto out; - pr_info("SEV supported: %u ASIDs\n", sev_asid_count); sev_supported = true; /* SEV-ES support requested? */ @@ -2245,10 +2244,18 @@ void __init sev_hardware_setup(void) if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)) goto out; - pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count); sev_es_supported = true; out: + if (boot_cpu_has(X86_FEATURE_SEV)) + pr_info("SEV %s (ASIDs %u - %u)\n", + sev_supported ? "enabled" : "disabled", + min_sev_asid, max_sev_asid); + if (boot_cpu_has(X86_FEATURE_SEV_ES)) + pr_info("SEV-ES %s (ASIDs %u - %u)\n", + sev_es_supported ? "enabled" : "disabled", + min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); + sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; #endif -- GitLab From 815c2a1c432b072c7cb9f9fde69cbf093ecd34b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Jun 2023 17:44:49 -0700 Subject: [PATCH 1541/2290] KVM: SVM: WARN, but continue, if misc_cg_set_capacity() fails [ Upstream commit 106ed2cad9f7bd803bd31a18fe7a9219b077bf95 ] WARN and continue if misc_cg_set_capacity() fails, as the only scenario in which it can fail is if the specified resource is invalid, which should never happen when CONFIG_KVM_AMD_SEV=y. Deliberately not bailing "fixes" a theoretical bug where KVM would leak the ASID bitmaps on failure, which again can't happen. If the impossible should happen, the end result is effectively the same with respect to SEV and SEV-ES (they are unusable), while continuing on has the advantage of letting KVM load, i.e. userspace can still run non-SEV guests. Reported-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20230607004449.1421131-1-seanjc@google.com Signed-off-by: Sean Christopherson Stable-dep-of: 0aa6b90ef9d7 ("KVM: SVM: Add support for allowing zero SEV ASIDs") Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 1fe9257d87b2d..0316fdf5040f7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2214,9 +2214,7 @@ void __init sev_hardware_setup(void) } sev_asid_count = max_sev_asid - min_sev_asid + 1; - if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)) - goto out; - + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); sev_supported = true; /* SEV-ES support requested? */ @@ -2241,9 +2239,7 @@ void __init sev_hardware_setup(void) goto out; sev_es_asid_count = min_sev_asid - 1; - if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)) - goto out; - + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)); sev_es_supported = true; out: -- GitLab From 2233bd583cb5e9175c75d54686d77bbdd56e09e2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 31 Jan 2024 15:56:07 -0800 Subject: [PATCH 1542/2290] KVM: SVM: Use unsigned integers when dealing with ASIDs [ Upstream commit 466eec4a22a76c462781bf6d45cb02cbedf21a61 ] Convert all local ASID variables and parameters throughout the SEV code from signed integers to unsigned integers. As ASIDs are fundamentally unsigned values, and the global min/max variables are appropriately unsigned integers, too. Functionally, this is a glorified nop as KVM guarantees min_sev_asid is non-zero, and no CPU supports -1u as the _only_ asid, i.e. the signed vs. unsigned goof won't cause problems in practice. Opportunistically use sev_get_asid() in sev_flush_encrypted_page() instead of open coding an equivalent. Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20240131235609.4161407-3-seanjc@google.com Signed-off-by: Sean Christopherson Stable-dep-of: 0aa6b90ef9d7 ("KVM: SVM: Add support for allowing zero SEV ASIDs") Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 18 ++++++++++-------- arch/x86/kvm/trace.h | 10 +++++----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0316fdf5040f7..9e50eaf967f22 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -76,9 +76,10 @@ struct enc_region { }; /* Called with the sev_bitmap_lock held, or on shutdown */ -static int sev_flush_asids(int min_asid, int max_asid) +static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) { - int ret, asid, error = 0; + int ret, error = 0; + unsigned int asid; /* Check if there are any ASIDs to reclaim before performing a flush */ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); @@ -108,7 +109,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(int min_asid, int max_asid) +static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) { if (sev_flush_asids(min_asid, max_asid)) return false; @@ -135,8 +136,9 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) static int sev_asid_new(struct kvm_sev_info *sev) { - int asid, min_asid, max_asid, ret; + unsigned int asid, min_asid, max_asid; bool retry = true; + int ret; WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); @@ -179,7 +181,7 @@ e_uncharge: return ret; } -static int sev_get_asid(struct kvm *kvm) +static unsigned int sev_get_asid(struct kvm *kvm) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -276,8 +278,8 @@ e_no_asid: static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { + unsigned int asid = sev_get_asid(kvm); struct sev_data_activate activate; - int asid = sev_get_asid(kvm); int ret; /* activate ASID on the given handle */ @@ -2290,7 +2292,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) */ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { - int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + unsigned int asid = sev_get_asid(vcpu->kvm); /* * Note! The address must be a kernel address, as regular page walk @@ -2611,7 +2613,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - int asid = sev_get_asid(svm->vcpu.kvm); + unsigned int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ svm->asid = asid; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index bc25589ad5886..6c1dcf44c4fa3 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -729,13 +729,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit, * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, - TP_PROTO(__u64 rip, int asid, u64 address), + TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( - __field( __u64, rip ) - __field( int, asid ) - __field( __u64, address ) + __field( __u64, rip ) + __field( unsigned int, asid ) + __field( __u64, address ) ), TP_fast_assign( @@ -744,7 +744,7 @@ TRACE_EVENT(kvm_invlpga, __entry->address = address; ), - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", + TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); -- GitLab From 4af6d5b4d9f1769b2d95d2a96cc270274b91337e Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Wed, 31 Jan 2024 15:56:08 -0800 Subject: [PATCH 1543/2290] KVM: SVM: Add support for allowing zero SEV ASIDs [ Upstream commit 0aa6b90ef9d75b4bd7b6d106d85f2a3437697f91 ] Some BIOSes allow the end user to set the minimum SEV ASID value (CPUID 0x8000001F_EDX) to be greater than the maximum number of encrypted guests, or maximum SEV ASID value (CPUID 0x8000001F_ECX) in order to dedicate all the SEV ASIDs to SEV-ES or SEV-SNP. The SEV support, as coded, does not handle the case where the minimum SEV ASID value can be greater than the maximum SEV ASID value. As a result, the following confusing message is issued: [ 30.715724] kvm_amd: SEV enabled (ASIDs 1007 - 1006) Fix the support to properly handle this case. Fixes: 916391a2d1dc ("KVM: SVM: Add support for SEV-ES capability in KVM") Suggested-by: Sean Christopherson Signed-off-by: Ashish Kalra Cc: stable@vger.kernel.org Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/20240104190520.62510-1-Ashish.Kalra@amd.com Link: https://lore.kernel.org/r/20240131235609.4161407-4-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 9e50eaf967f22..d8e192ad59538 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -136,10 +136,21 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) static int sev_asid_new(struct kvm_sev_info *sev) { - unsigned int asid, min_asid, max_asid; + /* + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. + * Note: min ASID can end up larger than the max if basic SEV support is + * effectively disabled by disallowing use of ASIDs for SEV guests. + */ + unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; + unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + unsigned int asid; bool retry = true; int ret; + if (min_asid > max_asid) + return -ENOTTY; + WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); ret = sev_misc_cg_try_charge(sev); @@ -151,12 +162,6 @@ static int sev_asid_new(struct kvm_sev_info *sev) mutex_lock(&sev_bitmap_lock); - /* - * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. - * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. - */ - min_asid = sev->es_active ? 1 : min_sev_asid; - max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); if (asid > max_asid) { @@ -2215,8 +2220,10 @@ void __init sev_hardware_setup(void) goto out; } - sev_asid_count = max_sev_asid - min_sev_asid + 1; - WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + if (min_sev_asid <= max_sev_asid) { + sev_asid_count = max_sev_asid - min_sev_asid + 1; + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + } sev_supported = true; /* SEV-ES support requested? */ @@ -2247,7 +2254,9 @@ void __init sev_hardware_setup(void) out: if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", - sev_supported ? "enabled" : "disabled", + sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : + "unusable" : + "disabled", min_sev_asid, max_sev_asid); if (boot_cpu_has(X86_FEATURE_SEV_ES)) pr_info("SEV-ES %s (ASIDs %u - %u)\n", -- GitLab From 90a477dfda3be83709e1f761710e09835f51b49f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 24 Nov 2023 16:08:22 +0100 Subject: [PATCH 1544/2290] fs/pipe: Fix lockdep false-positive in watchqueue pipe_write() [ Upstream commit 055ca83559912f2cfd91c9441427bac4caf3c74e ] When you try to splice between a normal pipe and a notification pipe, get_pipe_info(..., true) fails, so splice() falls back to treating the notification pipe like a normal pipe - so we end up in iter_file_splice_write(), which first locks the input pipe, then calls vfs_iter_write(), which locks the output pipe. Lockdep complains about that, because we're taking a pipe lock while already holding another pipe lock. I think this probably (?) can't actually lead to deadlocks, since you'd need another way to nest locking a normal pipe into locking a watch_queue pipe, but the lockdep annotations don't make that clear. Bail out earlier in pipe_write() for notification pipes, before taking the pipe lock. Reported-and-tested-by: Closes: https://syzkaller.appspot.com/bug?extid=011e4ea1da6692cf881c Fixes: c73be61cede5 ("pipe: Add general notification queue support") Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20231124150822.2121798-1-jannh@google.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/pipe.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 9873a6030df56..aa8e6ffe1cb58 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -424,6 +424,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) bool was_empty = false; bool wake_next_writer = false; + /* + * Reject writing to watch queue pipes before the point where we lock + * the pipe. + * Otherwise, lockdep would be unhappy if the caller already has another + * pipe locked. + * If we had to support locking a normal pipe and a notification pipe at + * the same time, we could set up lockdep annotations for that, but + * since we don't actually need that, it's simpler to just bail here. + */ + if (pipe_has_watch_queue(pipe)) + return -EXDEV; + /* Null write succeeds. */ if (unlikely(total_len == 0)) return 0; @@ -436,11 +448,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) goto out; } - if (pipe_has_watch_queue(pipe)) { - ret = -EXDEV; - goto out; - } - /* * If it wasn't empty we try to merge new data into * the last buffer. -- GitLab From d00c24ddec511f73bc5f91b86a114f36ab508076 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 9 Jan 2024 12:39:03 +0900 Subject: [PATCH 1545/2290] 9p: Fix read/write debug statements to report server reply [ Upstream commit be3193e58ec210b2a72fb1134c2a0695088a911d ] Previous conversion to iov missed these debug statements which would now always print the requested size instead of the actual server reply. Write also added a loop in a much older commit but we didn't report these, while reads do report each iteration -- it's more coherent to keep reporting all requests to server so move that at the same time. Fixes: 7f02464739da ("9p: convert to advancing variant of iov_iter_get_pages_alloc()") Signed-off-by: Dominique Martinet Message-ID: <20240109-9p-rw-trace-v1-1-327178114257@codewreck.org> Signed-off-by: Sasha Levin --- net/9p/client.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 84b93b04d0f06..1d9a8a1f3f107 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1581,7 +1581,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, received = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); if (non_zc) { int n = copy_to_iter(dataptr, received, to); @@ -1607,9 +1607,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) int total = 0; *err = 0; - p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", - fid->fid, offset, iov_iter_count(from)); - while (iov_iter_count(from)) { int count = iov_iter_count(from); int rsize = fid->iounit; @@ -1621,6 +1618,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (count < rsize) rsize = count; + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", + fid->fid, offset, rsize, count); + /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0, @@ -1648,7 +1648,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) written = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); p9_req_put(clnt, req); iov_iter_revert(from, count - written - iov_iter_count(from)); -- GitLab From 81f7c9da2bed3df06fd4c6a1ae8034001b1ed959 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 12 Mar 2024 01:20:53 +0000 Subject: [PATCH 1546/2290] drivers/perf: riscv: Disable PERF_SAMPLE_BRANCH_* while not supported [ Upstream commit ea6873118493019474abbf57d5a800da365734df ] RISC-V perf driver does not yet support branch sampling. Although the specification is in the works [0], it is best to disable such events until support is available, otherwise we will get unexpected results. Due to this reason, two riscv bpf testcases get_branch_snapshot and perf_branches/perf_branches_hw fail. Link: https://github.com/riscv/riscv-control-transfer-records [0] Fixes: f5bfa23f576f ("RISC-V: Add a perf core library for pmu drivers") Signed-off-by: Pu Lehui Reviewed-by: Atish Patra Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240312012053.1178140-1-pulehui@huaweicloud.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- drivers/perf/riscv_pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 56897d4d4fd3e..2d5cf135e8a1d 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -246,6 +246,10 @@ static int riscv_pmu_event_init(struct perf_event *event) u64 event_config = 0; uint64_t cmask; + /* driver does not support branch stack sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + hwc->flags = 0; mapped_event = rvpmu->event_map(event, &event_config); if (mapped_event < 0) { -- GitLab From 392c47fea7aac184340684774d982607065fca82 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Fri, 22 Mar 2024 16:45:25 +0000 Subject: [PATCH 1547/2290] drm/panfrost: fix power transition timeout warnings [ Upstream commit 2bd02f5a0bac4bb13e0da18652dc75ba0e4958ec ] Increase the timeout value to prevent system logs on Amlogic boards flooding with power transition warnings: [ 13.047638] panfrost ffe40000.gpu: shader power transition timeout [ 13.048674] panfrost ffe40000.gpu: l2 power transition timeout [ 13.937324] panfrost ffe40000.gpu: shader power transition timeout [ 13.938351] panfrost ffe40000.gpu: l2 power transition timeout ... [39829.506904] panfrost ffe40000.gpu: shader power transition timeout [39829.507938] panfrost ffe40000.gpu: l2 power transition timeout [39949.508369] panfrost ffe40000.gpu: shader power transition timeout [39949.509405] panfrost ffe40000.gpu: l2 power transition timeout The 2000 value has been found through trial and error testing with devices using G52 and G31 GPUs. Fixes: 22aa1a209018 ("drm/panfrost: Really power off GPU cores in panfrost_gpu_power_off()") Signed-off-by: Christian Hewitt Reviewed-by: Steven Price Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240322164525.2617508-1-christianshewitt@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panfrost/panfrost_gpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 55d2430485168..40b6314459926 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -379,19 +379,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev) gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO, - val, !val, 1, 1000); + val, !val, 1, 2000); if (ret) dev_err(pfdev->dev, "shader power transition timeout"); gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO, - val, !val, 1, 1000); + val, !val, 1, 2000); if (ret) dev_err(pfdev->dev, "tiler power transition timeout"); gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present); ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO, - val, !val, 0, 1000); + val, !val, 0, 2000); if (ret) dev_err(pfdev->dev, "l2 power transition timeout"); } -- GitLab From eb028d1ebd0b410095b79308b82f01ca1598223d Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 25 Mar 2024 17:18:12 -0500 Subject: [PATCH 1548/2290] ASoC: rt5682-sdw: fix locking sequence [ Upstream commit 310a5caa4e861616a27a83c3e8bda17d65026fa8 ] The disable_irq_lock protects the 'disable_irq' value, we need to lock before testing it. Fixes: 02fb23d72720 ("ASoC: rt5682-sdw: fix for JD event handling in ClockStop Mode0") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Chao Song Link: https://msgid.link/r/20240325221817.206465-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c index 868a61c8b0608..7685011a09354 100644 --- a/sound/soc/codecs/rt5682-sdw.c +++ b/sound/soc/codecs/rt5682-sdw.c @@ -787,12 +787,12 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt5682->disable_irq_lock); if (rt5682->disable_irq == true) { - mutex_lock(&rt5682->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); rt5682->disable_irq = false; - mutex_unlock(&rt5682->disable_irq_lock); } + mutex_unlock(&rt5682->disable_irq_lock); goto regmap_sync; } -- GitLab From 044c34fe3531a8061b7c23a40da82e3f2b28f80f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 25 Mar 2024 17:18:13 -0500 Subject: [PATCH 1549/2290] ASoC: rt711-sdca: fix locking sequence [ Upstream commit ee287771644394d071e6a331951ee8079b64f9a7 ] The disable_irq_lock protects the 'disable_irq' value, we need to lock before testing it. Fixes: 23adeb7056ac ("ASoC: rt711-sdca: fix for JD event handling in ClockStop Mode0") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Chao Song Link: https://msgid.link/r/20240325221817.206465-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdca-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index 487d3010ddc19..931dbc68548ee 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -443,13 +443,13 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt711->disable_irq_lock); if (rt711->disable_irq == true) { - mutex_lock(&rt711->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); rt711->disable_irq = false; - mutex_unlock(&rt711->disable_irq_lock); } + mutex_unlock(&rt711->disable_irq_lock); goto regmap_sync; } -- GitLab From 4ff3d8ac62348697accf6d3e661c2d28f929f44a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 25 Mar 2024 17:18:14 -0500 Subject: [PATCH 1550/2290] ASoC: rt711-sdw: fix locking sequence [ Upstream commit aae86cfd8790bcc7693a5a0894df58de5cb5128c ] The disable_irq_lock protects the 'disable_irq' value, we need to lock before testing it. Fixes: b69de265bd0e ("ASoC: rt711: fix for JD event handling in ClockStop Mode0") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Chao Song Link: https://msgid.link/r/20240325221817.206465-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index 9545b8a7eb192..af7a0ab5669f4 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -542,12 +542,12 @@ static int __maybe_unused rt711_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt711->disable_irq_lock); if (rt711->disable_irq == true) { - mutex_lock(&rt711->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); rt711->disable_irq = false; - mutex_unlock(&rt711->disable_irq_lock); } + mutex_unlock(&rt711->disable_irq_lock); goto regmap_sync; } -- GitLab From 7ff957cea8af6af4d6f1079f10aafe721d71f144 Mon Sep 17 00:00:00 2001 From: Stephen Lee Date: Mon, 25 Mar 2024 18:01:31 -0700 Subject: [PATCH 1551/2290] ASoC: ops: Fix wraparound for mask in snd_soc_get_volsw [ Upstream commit fc563aa900659a850e2ada4af26b9d7a3de6c591 ] In snd_soc_info_volsw(), mask is generated by figuring out the index of the most significant bit set in max and converting the index to a bitmask through bit shift 1. Unintended wraparound occurs when max is an integer value with msb bit set. Since the bit shift value 1 is treated as an integer type, the left shift operation will wraparound and set mask to 0 instead of all 1's. In order to fix this, we type cast 1 as `1ULL` to prevent the wraparound. Fixes: 7077148fb50a ("ASoC: core: Split ops out of soc-core.c") Signed-off-by: Stephen Lee Link: https://msgid.link/r/20240326010131.6211-1-slee08177@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 2d25748ca7066..b27e89ff6a167 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol, int max = mc->max; int min = mc->min; int sign_bit = mc->sign_bit; - unsigned int mask = (1 << fls(max)) - 1; + unsigned int mask = (1ULL << fls(max)) - 1; unsigned int invert = mc->invert; int val; int ret; -- GitLab From 7171d6aef1f1d96213786443a16d3baca73c7c32 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Mar 2024 15:53:37 +0100 Subject: [PATCH 1552/2290] ata: sata_sx4: fix pdc20621_get_from_dimm() on 64-bit [ Upstream commit 52f80bb181a9a1530ade30bc18991900bbb9697f ] gcc warns about a memcpy() with overlapping pointers because of an incorrect size calculation: In file included from include/linux/string.h:369, from drivers/ata/sata_sx4.c:66: In function 'memcpy_fromio', inlined from 'pdc20621_get_from_dimm.constprop' at drivers/ata/sata_sx4.c:962:2: include/linux/fortify-string.h:97:33: error: '__builtin_memcpy' accessing 4294934464 bytes at offsets 0 and [16, 16400] overlaps 6442385281 bytes at offset -2147450817 [-Werror=restrict] 97 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:620:9: note: in expansion of macro '__underlying_memcpy' 620 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:665:26: note: in expansion of macro '__fortify_memcpy_chk' 665 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ include/asm-generic/io.h:1184:9: note: in expansion of macro 'memcpy' 1184 | memcpy(buffer, __io_virt(addr), size); | ^~~~~~ The problem here is the overflow of an unsigned 32-bit number to a negative that gets converted into a signed 'long', keeping a large positive number. Replace the complex calculation with a more readable min() variant that avoids the warning. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Arnd Bergmann Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/sata_sx4.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 6ceec59cb2913..fa1966638c060 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -958,8 +958,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, offset -= (idx * window_size); idx++; - dist = ((long) (window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_fromio(psource, dimm_mmio + offset / 4, dist); psource += dist; @@ -1006,8 +1005,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource, readl(mmio + PDC_DIMM_WINDOW_CTLR); offset -= (idx * window_size); idx++; - dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_toio(dimm_mmio + offset / 4, psource, dist); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); -- GitLab From 4b87c1bc25593b5915b398d3998b3e3aff4d7dc7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Mar 2024 23:38:06 +0100 Subject: [PATCH 1553/2290] scsi: mylex: Fix sysfs buffer lengths [ Upstream commit 1197c5b2099f716b3de327437fb50900a0b936c9 ] The myrb and myrs drivers use an odd way of implementing their sysfs files, calling snprintf() with a fixed length of 32 bytes to print into a page sized buffer. One of the strings is actually longer than 32 bytes, which clang can warn about: drivers/scsi/myrb.c:1906:10: error: 'snprintf' will always be truncated; specified size is 32, but format string expands to at least 34 [-Werror,-Wformat-truncation] drivers/scsi/myrs.c:1089:10: error: 'snprintf' will always be truncated; specified size is 32, but format string expands to at least 34 [-Werror,-Wformat-truncation] These could all be plain sprintf() without a length as the buffer is always long enough. On the other hand, sysfs files should not be overly long either, so just double the length to make sure the longest strings don't get truncated here. Fixes: 77266186397c ("scsi: myrs: Add Mylex RAID controller (SCSI interface)") Fixes: 081ff398c56c ("scsi: myrb: Add Mylex RAID controller (block interface)") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240326223825.4084412-8-arnd@kernel.org Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/myrb.c | 20 ++++++++++---------- drivers/scsi/myrs.c | 24 ++++++++++++------------ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index e885c1dbf61f9..e2f1b186efd00 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrb_devstate_name(ldev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); } else { struct myrb_pdev_state *pdev_info = sdev->hostdata; @@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev, else name = myrb_devstate_name(pdev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->state); } return ret; @@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev, name = myrb_raidlevel_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } - return snprintf(buf, 32, "Physical Drive\n"); + return snprintf(buf, 64, "Physical Drive\n"); } static DEVICE_ATTR_RO(raid_level); @@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < myrb_logical_channel(sdev->host)) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); status = myrb_get_rbld_progress(cb, &rbld_buf); if (rbld_buf.ldev_num != sdev->id || status != MYRB_STATUS_SUCCESS) - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); - return snprintf(buf, 32, "rebuilding block %u of %u\n", + return snprintf(buf, 64, "rebuilding block %u of %u\n", rbld_buf.ldev_size - rbld_buf.blocks_left, rbld_buf.ldev_size); } diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 7eb8c39da3663..95e7c00cb7e54 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrs_devstate_name(ldev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else { struct myrs_pdev_info *pdev_info; @@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev, pdev_info = sdev->hostdata; name = myrs_devstate_name(pdev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->dev_state); } return ret; @@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev, ldev_info = sdev->hostdata; name = myrs_raid_level_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else name = myrs_raid_level_name(MYRS_RAID_PHYSICAL); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } static DEVICE_ATTR_RO(raid_level); @@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); ldev_info = sdev->hostdata; ldev_num = ldev_info->ldev_num; @@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev, return -EIO; } if (ldev_info->rbld_active) { - return snprintf(buf, 32, "rebuilding block %zu of %zu\n", + return snprintf(buf, 64, "rebuilding block %zu of %zu\n", (size_t)ldev_info->rbld_lba, (size_t)ldev_info->cfg_devsize); } else - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); } static ssize_t rebuild_store(struct device *dev, @@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev, unsigned short ldev_num; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not checking\n"); + return snprintf(buf, 64, "physical device - not checking\n"); ldev_info = sdev->hostdata; if (!ldev_info) @@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev, ldev_num = ldev_info->ldev_num; myrs_get_ldev_info(cs, ldev_num, ldev_info); if (ldev_info->cc_active) - return snprintf(buf, 32, "checking block %zu of %zu\n", + return snprintf(buf, 64, "checking block %zu of %zu\n", (size_t)ldev_info->cc_lba, (size_t)ldev_info->cfg_devsize); else - return snprintf(buf, 32, "not checking\n"); + return snprintf(buf, 64, "not checking\n"); } static ssize_t consistency_check_store(struct device *dev, -- GitLab From 9adcfd56703cfcdbeb5f076c714594873f02ec72 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 8 Dec 2023 16:23:35 +0800 Subject: [PATCH 1554/2290] scsi: sd: Unregister device if device_add_disk() failed in sd_probe() [ Upstream commit 0296bea01cfa6526be6bd2d16dc83b4e7f1af91f ] "if device_add() succeeds, you should call device_del() when you want to get rid of it." In sd_probe(), device_add_disk() fails when device_add() has already succeeded, so change put_device() to device_unregister() to ensure device resources are released. Fixes: 2a7a891f4c40 ("scsi: sd: Add error handling support for add_disk()") Signed-off-by: Li Nan Link: https://lore.kernel.org/r/20231208082335.1754205-1-linan666@huaweicloud.com Reviewed-by: Bart Van Assche Reviewed-by: Yu Kuai Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c793bca882236..f32236c3f81c6 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3636,7 +3636,7 @@ static int sd_probe(struct device *dev) error = device_add_disk(dev, gd, NULL); if (error) { - put_device(&sdkp->disk_dev); + device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } -- GitLab From 78942ac754990d200ba5454678e3d8cb2e327d59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Apr 2024 10:11:35 +0100 Subject: [PATCH 1555/2290] cifs: Fix caching to try to do open O_WRONLY as rdwr on server [ Upstream commit e9e62243a3e2322cf639f653a0b0a88a76446ce7 ] When we're engaged in local caching of a cifs filesystem, we cannot perform caching of a partially written cache granule unless we can read the rest of the granule. This can result in unexpected access errors being reported to the user. Fix this by the following: if a file is opened O_WRONLY locally, but the mount was given the "-o fsc" flag, try first opening the remote file with GENERIC_READ|GENERIC_WRITE and if that returns -EACCES, try dropping the GENERIC_READ and doing the open again. If that last succeeds, invalidate the cache for that file as for O_DIRECT. Fixes: 70431bfd825d ("cifs: Support fscache indexing rewrite") Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/dir.c | 15 +++++++++++++ fs/smb/client/file.c | 48 ++++++++++++++++++++++++++++++++--------- fs/smb/client/fscache.h | 6 ++++++ 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index e382b794acbed..863c7bc3db86f 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -180,6 +180,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; *oplock = 0; if (tcon->ses->server->oplocks) @@ -191,6 +192,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned return PTR_ERR(full_path); } + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & @@ -267,6 +272,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned desired_access |= GENERIC_READ; /* is this too little? */ if (OPEN_FMODE(oflags) & FMODE_WRITE) desired_access |= GENERIC_WRITE; + if (rdwr_for_fscache == 1) + desired_access |= GENERIC_READ; disposition = FILE_OVERWRITE_IF; if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -295,6 +302,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -308,8 +316,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) { cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access &= ~GENERIC_READ; + rdwr_for_fscache = 2; + goto retry_open; + } goto out; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 0f3405e0f2e48..c240cea7ca349 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -77,12 +77,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) */ } -static inline int cifs_convert_flags(unsigned int flags) +static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) { if ((flags & O_ACCMODE) == O_RDONLY) return GENERIC_READ; else if ((flags & O_ACCMODE) == O_WRONLY) - return GENERIC_WRITE; + return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; else if ((flags & O_ACCMODE) == O_RDWR) { /* GENERIC_ALL is too much permission to request can cause unnecessary access denied on create */ @@ -219,11 +219,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ int create_options = CREATE_NOT_DIR; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; if (!server->ops->open) return -ENOSYS; - desired_access = cifs_convert_flags(f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); /********************************************************************* * open flag mapping table: @@ -260,6 +265,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ if (f_flags & O_DIRECT) create_options |= CREATE_NO_BUFFER; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -271,8 +277,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ }; rc = server->ops->open(xid, &oparms, oplock, buf); - if (rc) + if (rc) { + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } return rc; + } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); /* TODO: Add support for calling posix query info but with passing in fid */ if (tcon->unix_ext) @@ -705,11 +719,11 @@ int cifs_open(struct inode *inode, struct file *file) use_cache: fscache_use_cookie(cifs_inode_cookie(file_inode(file)), file->f_mode & FMODE_WRITE); - if (file->f_flags & O_DIRECT && - (!((file->f_flags & O_ACCMODE) != O_RDONLY) || - file->f_flags & O_APPEND)) - cifs_invalidate_cache(file_inode(file), - FSCACHE_INVAL_DIO_WRITE); + if (!(file->f_flags & O_DIRECT)) + goto out; + if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) + goto out; + cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); out: free_dentry_path(page); @@ -774,6 +788,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; xid = get_xid(); mutex_lock(&cfile->fh_mutex); @@ -837,7 +852,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ - desired_access = cifs_convert_flags(cfile->f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) @@ -849,6 +868,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -874,6 +894,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) /* indicate that we need to relock the file */ oparms.reconnect = true; } + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(cfile->f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } if (rc) { mutex_unlock(&cfile->fh_mutex); @@ -882,6 +907,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) goto reopen_error_exit; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY reopen_success: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h index 67b601041f0a3..c691b98b442a6 100644 --- a/fs/smb/client/fscache.h +++ b/fs/smb/client/fscache.h @@ -108,6 +108,11 @@ static inline void cifs_readpage_to_fscache(struct inode *inode, __cifs_readpage_to_fscache(inode, page); } +static inline bool cifs_fscache_enabled(struct inode *inode) +{ + return fscache_cookie_enabled(cifs_inode_cookie(inode)); +} + #else /* CONFIG_CIFS_FSCACHE */ static inline void cifs_fscache_fill_coherency(struct inode *inode, @@ -123,6 +128,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {} static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} +static inline bool cifs_fscache_enabled(struct inode *inode) { return false; } static inline int cifs_fscache_query_occupancy(struct inode *inode, pgoff_t first, unsigned int nr_pages, -- GitLab From 12059cf0487ff5299f03e3e3bc0f71213c99a3de Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:48 +0200 Subject: [PATCH 1556/2290] ata: sata_mv: Fix PCI device ID table declaration compilation warning [ Upstream commit 3137b83a90646917c90951d66489db466b4ae106 ] Building with W=1 shows a warning for an unused variable when CONFIG_PCI is diabled: drivers/ata/sata_mv.c:790:35: error: unused variable 'mv_pci_tbl' [-Werror,-Wunused-const-variable] static const struct pci_device_id mv_pci_tbl[] = { Move the table into the same block that containsn the pci_driver definition. Fixes: 7bb3c5290ca0 ("sata_mv: Remove PCI dependency") Signed-off-by: Arnd Bergmann Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/sata_mv.c | 63 +++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 17f9062b0eaa5..9cf540017a5e5 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = { }, }; -static const struct pci_device_id mv_pci_tbl[] = { - { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, - { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, - /* RocketRAID 1720/174x have different identifiers */ - { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, - - { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, - { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, - { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, - - { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, - - /* Adaptec 1430SA */ - { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, - - /* Marvell 7042 support */ - { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, - - /* Highpoint RocketRAID PCIe series */ - { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, - { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, - - { } /* terminate list */ -}; - static const struct mv_hw_ops mv5xxx_ops = { .phy_errata = mv5_phy_errata, .enable_leds = mv5_enable_leds, @@ -4301,6 +4270,36 @@ static int mv_pci_init_one(struct pci_dev *pdev, static int mv_pci_device_resume(struct pci_dev *pdev); #endif +static const struct pci_device_id mv_pci_tbl[] = { + { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, + { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, + /* RocketRAID 1720/174x have different identifiers */ + { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, + + { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, + { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, + { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, + + { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, + + /* Adaptec 1430SA */ + { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, + + /* Marvell 7042 support */ + { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, + + /* Highpoint RocketRAID PCIe series */ + { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, + { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, + + { } /* terminate list */ +}; static struct pci_driver mv_pci_driver = { .name = DRV_NAME, @@ -4313,6 +4312,7 @@ static struct pci_driver mv_pci_driver = { #endif }; +MODULE_DEVICE_TABLE(pci, mv_pci_tbl); /** * mv_print_info - Dump key info to kernel log for perusal. @@ -4485,7 +4485,6 @@ static void __exit mv_exit(void) MODULE_AUTHOR("Brett Russ"); MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers"); MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, mv_pci_tbl); MODULE_VERSION(DRV_VERSION); MODULE_ALIAS("platform:" DRV_NAME); -- GitLab From c19715ec258d4be92e0995660d31ae2612439f0b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Apr 2024 13:56:18 -0400 Subject: [PATCH 1557/2290] nfsd: hold a lighter-weight client reference over CB_RECALL_ANY [ Upstream commit 10396f4df8b75ff6ab0aa2cd74296565466f2c8d ] Currently the CB_RECALL_ANY job takes a cl_rpc_users reference to the client. While a callback job is technically an RPC that counter is really more for client-driven RPCs, and this has the effect of preventing the client from being unhashed until the callback completes. If nfsd decides to send a CB_RECALL_ANY just as the client reboots, we can end up in a situation where the callback can't complete on the (now dead) callback channel, but the new client can't connect because the old client can't be unhashed. This usually manifests as a NFS4ERR_DELAY return on the CREATE_SESSION operation. The job is only holding a reference to the client so it can clear a flag after the RPC completes. Fix this by having CB_RECALL_ANY instead hold a reference to the cl_nfsdfs.cl_ref. Typically we only take that sort of reference when dealing with the nfsdfs info files, but it should work appropriately here to ensure that the nfs4_client doesn't disappear. Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition") Reported-by: Vladimir Benes Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4state.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e4522e86e984e..8d15959004ad2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2889,12 +2889,9 @@ static void nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - spin_lock(&nn->client_lock); clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); - put_client_renew_locked(clp); - spin_unlock(&nn->client_lock); + drop_client(clp); } static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { @@ -6231,7 +6228,7 @@ deleg_reaper(struct nfsd_net *nn) list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ - atomic_inc(&clp->cl_rpc_users); + kref_get(&clp->cl_nfsdfs.cl_ref); set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); clp->cl_ra_time = ktime_get_boottime_seconds(); } -- GitLab From 7ef6a7f9b32fdfc8bec0a10e6d5ac5374d4f02e7 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 5 Apr 2024 16:46:37 +0200 Subject: [PATCH 1558/2290] x86/retpoline: Add NOENDBR annotation to the SRSO dummy return thunk commit b377c66ae3509ccea596512d6afb4777711c4870 upstream. srso_alias_untrain_ret() is special code, even if it is a dummy which is called in the !SRSO case, so annotate it like its real counterpart, to address the following objtool splat: vmlinux.o: warning: objtool: .export_symbol+0x2b290: data relocation to !ENDBR: srso_alias_untrain_ret+0x0 Fixes: 4535e1a4174c ("x86/bugs: Fix the SRSO mitigation on Zen3/4") Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240405144637.17908-1-bp@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index a96e816e5ccd7..055955c9bfcb7 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -261,6 +261,7 @@ SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR ANNOTATE_UNRET_SAFE + ANNOTATE_NOENDBR ret int3 SYM_CODE_END(__x86_return_thunk) -- GitLab From 2e5f8dc1dec824d9df6d5f5eb9f1b5a73b57574f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Mar 2024 21:58:26 +0900 Subject: [PATCH 1559/2290] ksmbd: don't send oplock break if rename fails commit c1832f67035dc04fb89e6b591b64e4d515843cda upstream. Don't send oplock break if rename fails. This patch fix smb2.oplock.batch20 test. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 39fc078284c8e..c02b1772cb807 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5579,8 +5579,9 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; - smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); + if (!rc) + smb_break_all_levII_oplock(work, fp, 0); out: kfree(new_name); return rc; -- GitLab From 51a6c2af9d20203ddeeaf73314ba8854b38d01bd Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Mar 2024 21:59:10 +0900 Subject: [PATCH 1560/2290] ksmbd: validate payload size in ipc response commit a677ebd8ca2f2632ccdecbad7b87641274e15aac upstream. If installing malicious ksmbd-tools, ksmbd.mountd can return invalid ipc response to ksmbd kernel server. ksmbd should validate payload size of ipc response from ksmbd.mountd to avoid memory overrun or slab-out-of-bounds. This patch validate 3 ipc response that has payload. Cc: stable@vger.kernel.org Reported-by: Chao Ma Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/ksmbd_netlink.h | 3 ++- fs/smb/server/mgmt/share_config.c | 7 +++++- fs/smb/server/transport_ipc.c | 37 +++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 0ebf91ffa2361..4464a62228cf3 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -166,7 +166,8 @@ struct ksmbd_share_config_response { __u16 force_uid; __u16 force_gid; __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; - __u32 reserved[112]; /* Reserved room */ + __u32 reserved[111]; /* Reserved room */ + __u32 payload_sz; __u32 veto_list_sz; __s8 ____payload[]; }; diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index 328a412259dc1..a2f0a2edceb8a 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, share->name = kstrdup(name, GFP_KERNEL); if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) { - share->path = kstrdup(ksmbd_share_config_path(resp), + int path_len = PATH_MAX; + + if (resp->payload_sz) + path_len = resp->payload_sz - resp->veto_list_sz; + + share->path = kstrndup(ksmbd_share_config_path(resp), path_len, GFP_KERNEL); if (share->path) share->path_sz = strlen(share->path); diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index f29bb03f0dc47..8752ac82c557b 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -65,6 +65,7 @@ struct ipc_msg_table_entry { struct hlist_node ipc_table_hlist; void *response; + unsigned int msg_sz; }; static struct delayed_work ipc_timer_work; @@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz) } memcpy(entry->response, payload, sz); + entry->msg_sz = sz; wake_up_interruptible(&entry->wait); ret = 0; break; @@ -453,6 +455,34 @@ out: return ret; } +static int ipc_validate_msg(struct ipc_msg_table_entry *entry) +{ + unsigned int msg_sz = entry->msg_sz; + + if (entry->type == KSMBD_EVENT_RPC_REQUEST) { + struct ksmbd_rpc_command *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz; + } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) { + struct ksmbd_spnego_authen_response *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_spnego_authen_response) + + resp->session_key_len + resp->spnego_blob_len; + } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) { + struct ksmbd_share_config_response *resp = entry->response; + + if (resp->payload_sz) { + if (resp->payload_sz < resp->veto_list_sz) + return -EINVAL; + + msg_sz = sizeof(struct ksmbd_share_config_response) + + resp->payload_sz; + } + } + + return entry->msg_sz != msg_sz ? -EINVAL : 0; +} + static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle) { struct ipc_msg_table_entry entry; @@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle ret = wait_event_interruptible_timeout(entry.wait, entry.response != NULL, IPC_WAIT_TIMEOUT); + if (entry.response) { + ret = ipc_validate_msg(&entry); + if (ret) { + kvfree(entry.response); + entry.response = NULL; + } + } out: down_write(&ipc_msg_table_lock); hash_del(&entry.ipc_table_hlist); -- GitLab From 883e072e83f1f322ebfebbcede7655420c6cf0ab Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 2 Apr 2024 09:31:22 +0900 Subject: [PATCH 1561/2290] ksmbd: do not set SMB2_GLOBAL_CAP_ENCRYPTION for SMB 3.1.1 commit 5ed11af19e56f0434ce0959376d136005745a936 upstream. SMB2_GLOBAL_CAP_ENCRYPTION flag should be used only for 3.0 and 3.0.2 dialects. This flags set cause compatibility problems with other SMB clients. Reported-by: James Christopher Adduono Tested-by: James Christopher Adduono Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2ops.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c index 27a9dce3e03ab..8600f32c981a1 100644 --- a/fs/smb/server/smb2ops.c +++ b/fs/smb/server/smb2ops.c @@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || + (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; } @@ -275,11 +280,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_DIRECTORY_LEASING; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || - (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && - conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; -- GitLab From 2ff8f06550575e60b05ea650047ab9a9bfb32d66 Mon Sep 17 00:00:00 2001 From: Christoffer Sandberg Date: Thu, 28 Mar 2024 11:27:57 +0100 Subject: [PATCH 1562/2290] ALSA: hda/realtek - Fix inactive headset mic jack commit daf6c4681a74034d5723e2fb761e0d7f3a1ca18f upstream. This patch adds the existing fixup to certain TF platforms implementing the ALC274 codec with a headset jack. It fixes/activates the inactive microphone of the headset. Signed-off-by: Christoffer Sandberg Signed-off-by: Werner Sembach Cc: Message-ID: <20240328102757.50310-1-wse@tuxedocomputers.com> Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fb12034d464ee..24ab799b7ef13 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10121,6 +10121,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), -- GitLab From f35d7ede62d989b65d37a49ccbfc3b1b06182deb Mon Sep 17 00:00:00 2001 From: I Gede Agastya Darma Laksana Date: Tue, 2 Apr 2024 00:46:02 +0700 Subject: [PATCH 1563/2290] ALSA: hda/realtek: Update Panasonic CF-SZ6 quirk to support headset with microphone commit 1576f263ee2147dc395531476881058609ad3d38 upstream. This patch addresses an issue with the Panasonic CF-SZ6's existing quirk, specifically its headset microphone functionality. Previously, the quirk used ALC269_FIXUP_HEADSET_MODE, which does not support the CF-SZ6's design of a single 3.5mm jack for both mic and audio output effectively. The device uses pin 0x19 for the headset mic without jack detection. Following verification on the CF-SZ6 and discussions with the original patch author, i determined that the update to ALC269_FIXUP_ASPIRE_HEADSET_MIC is the appropriate solution. This change is custom-designed for the CF-SZ6's unique hardware setup, which includes a single 3.5mm jack for both mic and audio output, connecting the headset microphone to pin 0x19 without the use of jack detection. Fixes: 0fca97a29b83 ("ALSA: hda/realtek - Add Panasonic CF-SZ6 headset jack quirk") Signed-off-by: I Gede Agastya Darma Laksana Cc: Message-ID: <20240401174602.14133-1-gedeagas22@gmail.com> Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 24ab799b7ef13..e8cf38dc8a5e0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9905,7 +9905,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), - SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), + SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), -- GitLab From 9406d598a13ad4e0f13b63d3a2bdbaf30d73af44 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 25 Mar 2024 16:21:25 +0100 Subject: [PATCH 1564/2290] driver core: Introduce device_link_wait_removal() commit 0462c56c290a99a7f03e817ae5b843116dfb575c upstream. The commit 80dd33cf72d1 ("drivers: base: Fix device link removal") introduces a workqueue to release the consumer and supplier devices used in the devlink. In the job queued, devices are release and in turn, when all the references to these devices are dropped, the release function of the device itself is called. Nothing is present to provide some synchronisation with this workqueue in order to ensure that all ongoing releasing operations are done and so, some other operations can be started safely. For instance, in the following sequence: 1) of_platform_depopulate() 2) of_overlay_remove() During the step 1, devices are released and related devlinks are removed (jobs pushed in the workqueue). During the step 2, OF nodes are destroyed but, without any synchronisation with devlink removal jobs, of_overlay_remove() can raise warnings related to missing of_node_put(): ERROR: memory leak, expected refcount 1 instead of 2 Indeed, the missing of_node_put() call is going to be done, too late, from the workqueue job execution. Introduce device_link_wait_removal() to offer a way to synchronize operations waiting for the end of devlink removals (i.e. end of workqueue jobs). Also, as a flushing operation is done on the workqueue, the workqueue used is moved from a system-wide workqueue to a local one. Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Tested-by: Luca Ceresoli Reviewed-by: Nuno Sa Reviewed-by: Saravana Kannan Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240325152140.198219-2-herve.codina@bootlin.com Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 26 +++++++++++++++++++++++--- include/linux/device.h | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3078f44dc1861..8d87808cdb8aa 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -56,6 +56,7 @@ static bool fw_devlink_is_permissive(void); static void __fw_devlink_link_to_consumers(struct device *dev); static bool fw_devlink_drv_reg_done; static bool fw_devlink_best_effort; +static struct workqueue_struct *device_link_wq; /** * __fwnode_link_add - Create a link between two fwnode_handles. @@ -585,12 +586,26 @@ static void devlink_dev_release(struct device *dev) /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or - * supplier devices get deleted when it runs, so put it into the "long" - * workqueue. + * supplier devices get deleted when it runs, so put it into the + * dedicated workqueue. */ - queue_work(system_long_wq, &link->rm_work); + queue_work(device_link_wq, &link->rm_work); } +/** + * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate + */ +void device_link_wait_removal(void) +{ + /* + * devlink removal jobs are queued in the dedicated work queue. + * To be sure that all removal jobs are terminated, ensure that any + * scheduled work has run to completion. + */ + flush_workqueue(device_link_wq); +} +EXPORT_SYMBOL_GPL(device_link_wait_removal); + static struct class devlink_class = { .name = "devlink", .owner = THIS_MODULE, @@ -4132,9 +4147,14 @@ int __init devices_init(void) sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; + device_link_wq = alloc_workqueue("device_link_wq", 0, 0); + if (!device_link_wq) + goto wq_err; return 0; + wq_err: + kobject_put(sysfs_dev_char_kobj); char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: diff --git a/include/linux/device.h b/include/linux/device.h index 5520bb546a4ac..f88b498ee9da4 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1099,6 +1099,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); +void device_link_wait_removal(void); extern __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); -- GitLab From 7b6df050c45a1ea158fd50bc32a8e1447dd1e951 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 25 Mar 2024 16:21:26 +0100 Subject: [PATCH 1565/2290] of: dynamic: Synchronize of_changeset_destroy() with the devlink removals commit 8917e7385346bd6584890ed362985c219fe6ae84 upstream. In the following sequence: 1) of_platform_depopulate() 2) of_overlay_remove() During the step 1, devices are destroyed and devlinks are removed. During the step 2, OF nodes are destroyed but __of_changeset_entry_destroy() can raise warnings related to missing of_node_put(): ERROR: memory leak, expected refcount 1 instead of 2 ... Indeed, during the devlink removals performed at step 1, the removal itself releasing the device (and the attached of_node) is done by a job queued in a workqueue and so, it is done asynchronously with respect to function calls. When the warning is present, of_node_put() will be called but wrongly too late from the workqueue job. In order to be sure that any ongoing devlink removals are done before the of_node destruction, synchronize the of_changeset_destroy() with the devlink removals. Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal") Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Reviewed-by: Saravana Kannan Tested-by: Luca Ceresoli Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240325152140.198219-3-herve.codina@bootlin.com Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/dynamic.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 9bb9fe0fad07c..e2a9651014c6e 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "OF: " fmt +#include #include #include #include @@ -679,6 +680,17 @@ void of_changeset_destroy(struct of_changeset *ocs) { struct of_changeset_entry *ce, *cen; + /* + * When a device is deleted, the device links to/from it are also queued + * for deletion. Until these device links are freed, the devices + * themselves aren't freed. If the device being deleted is due to an + * overlay change, this device might be holding a reference to a device + * node that will be freed. So, wait until all already pending device + * links are deleted before freeing a device node. This ensures we don't + * free any device node that has a non-zero reference count. + */ + device_link_wait_removal(); + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) __of_changeset_entry_destroy(ce); } -- GitLab From 97e93367e82752e475a33839a80b33bdbef1209f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 3 Apr 2024 23:21:30 +0200 Subject: [PATCH 1566/2290] x86/mm/pat: fix VM_PAT handling in COW mappings commit 04c35ab3bdae7fefbd7c7a7355f29fa03a035221 upstream. PAT handling won't do the right thing in COW mappings: the first PTE (or, in fact, all PTEs) can be replaced during write faults to point at anon folios. Reliably recovering the correct PFN and cachemode using follow_phys() from PTEs will not work in COW mappings. Using follow_phys(), we might just get the address+protection of the anon folio (which is very wrong), or fail on swap/nonswap entries, failing follow_phys() and triggering a WARN_ON_ONCE() in untrack_pfn() and track_pfn_copy(), not properly calling free_pfn_range(). In free_pfn_range(), we either wouldn't call memtype_free() or would call it with the wrong range, possibly leaking memory. To fix that, let's update follow_phys() to refuse returning anon folios, and fallback to using the stored PFN inside vma->vm_pgoff for COW mappings if we run into that. We will now properly handle untrack_pfn() with COW mappings, where we don't need the cachemode. We'll have to fail fork()->track_pfn_copy() if the first page was replaced by an anon folio, though: we'd have to store the cachemode in the VMA to make this work, likely growing the VMA size. For now, lets keep it simple and let track_pfn_copy() just fail in that case: it would have failed in the past with swap/nonswap entries already, and it would have done the wrong thing with anon folios. Simple reproducer to trigger the WARN_ON_ONCE() in untrack_pfn(): <--- C reproducer ---> #include #include #include #include int main(void) { struct io_uring_params p = {}; int ring_fd; size_t size; char *map; ring_fd = io_uring_setup(1, &p); if (ring_fd < 0) { perror("io_uring_setup"); return 1; } size = p.sq_off.array + p.sq_entries * sizeof(unsigned); /* Map the submission queue ring MAP_PRIVATE */ map = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, ring_fd, IORING_OFF_SQ_RING); if (map == MAP_FAILED) { perror("mmap"); return 1; } /* We have at least one page. Let's COW it. */ *map = 0; pause(); return 0; } <--- C reproducer ---> On a system with 16 GiB RAM and swap configured: # ./iouring & # memhog 16G # killall iouring [ 301.552930] ------------[ cut here ]------------ [ 301.553285] WARNING: CPU: 7 PID: 1402 at arch/x86/mm/pat/memtype.c:1060 untrack_pfn+0xf4/0x100 [ 301.553989] Modules linked in: binfmt_misc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_g [ 301.558232] CPU: 7 PID: 1402 Comm: iouring Not tainted 6.7.5-100.fc38.x86_64 #1 [ 301.558772] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebu4 [ 301.559569] RIP: 0010:untrack_pfn+0xf4/0x100 [ 301.559893] Code: 75 c4 eb cf 48 8b 43 10 8b a8 e8 00 00 00 3b 6b 28 74 b8 48 8b 7b 30 e8 ea 1a f7 000 [ 301.561189] RSP: 0018:ffffba2c0377fab8 EFLAGS: 00010282 [ 301.561590] RAX: 00000000ffffffea RBX: ffff9208c8ce9cc0 RCX: 000000010455e047 [ 301.562105] RDX: 07fffffff0eb1e0a RSI: 0000000000000000 RDI: ffff9208c391d200 [ 301.562628] RBP: 0000000000000000 R08: ffffba2c0377fab8 R09: 0000000000000000 [ 301.563145] R10: ffff9208d2292d50 R11: 0000000000000002 R12: 00007fea890e0000 [ 301.563669] R13: 0000000000000000 R14: ffffba2c0377fc08 R15: 0000000000000000 [ 301.564186] FS: 0000000000000000(0000) GS:ffff920c2fbc0000(0000) knlGS:0000000000000000 [ 301.564773] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 301.565197] CR2: 00007fea88ee8a20 CR3: 00000001033a8000 CR4: 0000000000750ef0 [ 301.565725] PKRU: 55555554 [ 301.565944] Call Trace: [ 301.566148] [ 301.566325] ? untrack_pfn+0xf4/0x100 [ 301.566618] ? __warn+0x81/0x130 [ 301.566876] ? untrack_pfn+0xf4/0x100 [ 301.567163] ? report_bug+0x171/0x1a0 [ 301.567466] ? handle_bug+0x3c/0x80 [ 301.567743] ? exc_invalid_op+0x17/0x70 [ 301.568038] ? asm_exc_invalid_op+0x1a/0x20 [ 301.568363] ? untrack_pfn+0xf4/0x100 [ 301.568660] ? untrack_pfn+0x65/0x100 [ 301.568947] unmap_single_vma+0xa6/0xe0 [ 301.569247] unmap_vmas+0xb5/0x190 [ 301.569532] exit_mmap+0xec/0x340 [ 301.569801] __mmput+0x3e/0x130 [ 301.570051] do_exit+0x305/0xaf0 ... Link: https://lkml.kernel.org/r/20240403212131.929421-3-david@redhat.com Signed-off-by: David Hildenbrand Reported-by: Wupeng Ma Closes: https://lkml.kernel.org/r/20240227122814.3781907-1-mawupeng1@huawei.com Fixes: b1a86e15dc03 ("x86, pat: remove the dependency on 'vm_pgoff' in track/untrack pfn vma routines") Fixes: 5899329b1910 ("x86: PAT: implement track/untrack of pfnmap regions for x86 - v3") Acked-by: Ingo Molnar Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pat/memtype.c | 49 ++++++++++++++++++++++++++++----------- mm/memory.c | 4 ++++ 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 66a209f7eb86d..d6fe9093ea919 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -997,6 +997,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -1007,20 +1039,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1095,7 +1120,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1103,11 +1127,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/mm/memory.c b/mm/memory.c index fb83cf56377ab..301c74c444385 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5593,6 +5593,10 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = *ptep; + /* Never return PFNs of anon folios in COW mappings. */ + if (vm_normal_folio(vma, address, pte)) + goto unlock; + if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; -- GitLab From 20a915154ccb88da08986ab6c9fc4c1cf6259de2 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 13 Mar 2024 14:48:27 +0100 Subject: [PATCH 1567/2290] x86/mce: Make sure to grab mce_sysfs_mutex in set_bank() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3ddf944b32f88741c303f0b21459dbb3872b8bc5 upstream. Modifying a MCA bank's MCA_CTL bits which control which error types to be reported is done over /sys/devices/system/machinecheck/ ├── machinecheck0 │   ├── bank0 │   ├── bank1 │   ├── bank10 │   ├── bank11 ... sysfs nodes by writing the new bit mask of events to enable. When the write is accepted, the kernel deletes all current timers and reinits all banks. Doing that in parallel can lead to initializing a timer which is already armed and in the timer wheel, i.e., in use already: ODEBUG: init active (active state 0) object: ffff888063a28000 object type: timer_list hint: mce_timer_fn+0x0/0x240 arch/x86/kernel/cpu/mce/core.c:2642 WARNING: CPU: 0 PID: 8120 at lib/debugobjects.c:514 debug_print_object+0x1a0/0x2a0 lib/debugobjects.c:514 Fix that by grabbing the sysfs mutex as the rest of the MCA sysfs code does. Reported by: Yue Sun Reported by: xingwei lee Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/CAEkJfYNiENwQY8yV1LYJ9LjJs%2Bx_-PqMv98gKig55=2vbzffRw@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mce/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index cad6ea1911e9b..359218bc1b34b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2471,12 +2471,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } -- GitLab From 22943e4fe4b3a2dcbadc3d38d5bf840bbdbfe374 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 26 Mar 2024 17:07:35 +0100 Subject: [PATCH 1568/2290] x86/coco: Require seeding RNG with RDRAND on CoCo systems commit 99485c4c026f024e7cb82da84c7951dbe3deb584 upstream. There are few uses of CoCo that don't rely on working cryptography and hence a working RNG. Unfortunately, the CoCo threat model means that the VM host cannot be trusted and may actively work against guests to extract secrets or manipulate computation. Since a malicious host can modify or observe nearly all inputs to guests, the only remaining source of entropy for CoCo guests is RDRAND. If RDRAND is broken -- due to CPU hardware fault -- the RNG as a whole is meant to gracefully continue on gathering entropy from other sources, but since there aren't other sources on CoCo, this is catastrophic. This is mostly a concern at boot time when initially seeding the RNG, as after that the consequences of a broken RDRAND are much more theoretical. So, try at boot to seed the RNG using 256 bits of RDRAND output. If this fails, panic(). This will also trigger if the system is booted without RDRAND, as RDRAND is essential for a safe CoCo boot. Add this deliberately to be "just a CoCo x86 driver feature" and not part of the RNG itself. Many device drivers and platforms have some desire to contribute something to the RNG, and add_device_randomness() is specifically meant for this purpose. Any driver can call it with seed data of any quality, or even garbage quality, and it can only possibly make the quality of the RNG better or have no effect, but can never make it worse. Rather than trying to build something into the core of the RNG, consider the particular CoCo issue just a CoCo issue, and therefore separate it all out into driver (well, arch/platform) code. [ bp: Massage commit message. ] Signed-off-by: Jason A. Donenfeld Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Elena Reshetova Reviewed-by: Kirill A. Shutemov Reviewed-by: Theodore Ts'o Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240326160735.73531-1-Jason@zx2c4.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/core.c | 41 +++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/coco.h | 2 ++ arch/x86/kernel/setup.c | 2 ++ 3 files changed, 45 insertions(+) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 1d3ad275c3664..801e943fd2b29 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -3,13 +3,17 @@ * Confidential Computing Platform Capability checks * * Copyright (C) 2021 Advanced Micro Devices, Inc. + * Copyright (C) 2024 Jason A. Donenfeld . All Rights Reserved. * * Author: Tom Lendacky */ #include #include +#include +#include +#include #include #include @@ -128,3 +132,40 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); + +__init void cc_random_init(void) +{ + /* + * The seed is 32 bytes (in units of longs), which is 256 bits, which + * is the security level that the RNG is targeting. + */ + unsigned long rng_seed[32 / sizeof(long)]; + size_t i, longs; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * Since the CoCo threat model includes the host, the only reliable + * source of entropy that can be neither observed nor manipulated is + * RDRAND. Usually, RDRAND failure is considered tolerable, but since + * CoCo guests have no other unobservable source of entropy, it's + * important to at least ensure the RNG gets some initial random seeds. + */ + for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { + longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); + + /* + * A zero return value means that the guest doesn't have RDRAND + * or the CPU is physically broken, and in both cases that + * means most crypto inside of the CoCo instance will be + * broken, defeating the purpose of CoCo in the first place. So + * just panic here because it's absolutely unsafe to continue + * executing. + */ + if (longs == 0) + panic("RDRAND is defective."); + } + add_device_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); +} diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 60bb26097da1a..1f97d00ad8588 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -23,6 +23,7 @@ static inline void cc_set_mask(u64 mask) u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); +void cc_random_init(void); #else static inline u64 cc_mkenc(u64 val) { @@ -33,6 +34,7 @@ static inline u64 cc_mkdec(u64 val) { return val; } +static inline void cc_random_init(void) { } #endif #endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d1ffac9ad611d..18a034613d94d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1132,6 +1133,7 @@ void __init setup_arch(char **cmdline_p) * memory size. */ sev_setup_arch(); + cc_random_init(); efi_fake_memmap(); efi_find_mirror(); -- GitLab From 447d844a3e10e3751f9a2ba4965d6f48b909671c Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Tue, 26 Mar 2024 18:12:13 +0100 Subject: [PATCH 1569/2290] s390/entry: align system call table on 8 bytes commit 378ca2d2ad410a1cd5690d06b46c5e2297f4c8c0 upstream. Align system call table on 8 bytes. With sys_call_table entry size of 8 bytes that eliminates the possibility of a system call pointer crossing cache line boundary. Cc: stable@kernel.org Suggested-by: Ulrich Weigand Reviewed-by: Alexander Gordeev Signed-off-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d2a1f2f4f5b88..c9799dec92793 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -699,6 +699,7 @@ ENDPROC(stack_overflow) .Lthis_cpu: .short 0 .Lstosm_tmp: .byte 0 .section .rodata, "a" + .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame .globl sys_call_table sys_call_table: -- GitLab From c88f7a709512b7ef384ffae20c26743f4f33d904 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 11 Mar 2024 19:19:13 -0700 Subject: [PATCH 1570/2290] riscv: Fix spurious errors from __get/put_kernel_nofault commit d080a08b06b6266cc3e0e86c5acfd80db937cb6b upstream. These macros did not initialize __kr_err, so they could fail even if the access did not fault. Cc: stable@vger.kernel.org Fixes: d464118cdc41 ("riscv: implement __get_kernel_nofault and __put_user_nofault") Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240312022030.320789-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index ec0cab9fbddd0..72ec1d9bd3f31 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ if (unlikely(__kr_err)) \ @@ -328,7 +328,7 @@ do { \ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ -- GitLab From f6583444d7e78dae750798552b65a2519ff3ca84 Mon Sep 17 00:00:00 2001 From: Stefan O'Rear Date: Wed, 27 Mar 2024 02:12:58 -0400 Subject: [PATCH 1571/2290] riscv: process: Fix kernel gp leakage commit d14fa1fcf69db9d070e75f1c4425211fa619dfc8 upstream. childregs represents the registers which are active for the new thread in user context. For a kernel thread, childregs->gp is never used since the kernel gp is not touched by switch_to. For a user mode helper, the gp value can be observed in user space after execve or possibly by other means. [From the email thread] The /* Kernel thread */ comment is somewhat inaccurate in that it is also used for user_mode_helper threads, which exec a user process, e.g. /sbin/init or when /proc/sys/kernel/core_pattern is a pipe. Such threads do not have PF_KTHREAD set and are valid targets for ptrace etc. even before they exec. childregs is the *user* context during syscall execution and it is observable from userspace in at least five ways: 1. kernel_execve does not currently clear integer registers, so the starting register state for PID 1 and other user processes started by the kernel has sp = user stack, gp = kernel __global_pointer$, all other integer registers zeroed by the memset in the patch comment. This is a bug in its own right, but I'm unwilling to bet that it is the only way to exploit the issue addressed by this patch. 2. ptrace(PTRACE_GETREGSET): you can PTRACE_ATTACH to a user_mode_helper thread before it execs, but ptrace requires SIGSTOP to be delivered which can only happen at user/kernel boundaries. 3. /proc/*/task/*/syscall: this is perfectly happy to read pt_regs for user_mode_helpers before the exec completes, but gp is not one of the registers it returns. 4. PERF_SAMPLE_REGS_USER: LOCKDOWN_PERF normally prevents access to kernel addresses via PERF_SAMPLE_REGS_INTR, but due to this bug kernel addresses are also exposed via PERF_SAMPLE_REGS_USER which is permitted under LOCKDOWN_PERF. I have not attempted to write exploit code. 5. Much of the tracing infrastructure allows access to user registers. I have not attempted to determine which forms of tracing allow access to user registers without already allowing access to kernel registers. Fixes: 7db91e57a0ac ("RISC-V: Task implementation") Cc: stable@vger.kernel.org Signed-off-by: Stefan O'Rear Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327061258.2370291-1-sorear@fastmail.com Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/process.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 8955f2432c2d8..6906cc0e57875 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -25,8 +25,6 @@ #include #include -register unsigned long gp_in_global __asm__("gp"); - #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include unsigned long __stack_chk_guard __read_mostly; @@ -170,7 +168,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; -- GitLab From 8c99dfb49bdc17edffc7ff3d46b400c8c291686c Mon Sep 17 00:00:00 2001 From: Ritvik Budhiraja Date: Tue, 2 Apr 2024 14:01:28 -0500 Subject: [PATCH 1572/2290] smb3: retrying on failed server close commit 173217bd73365867378b5e75a86f0049e1069ee8 upstream. In the current implementation, CIFS close sends a close to the server and does not check for the success of the server close. This patch adds functionality to check for server close return status and retries in case of an EBUSY or EAGAIN error. This can help avoid handle leaks Cc: stable@vger.kernel.org Signed-off-by: Ritvik Budhiraja Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 6 ++-- fs/smb/client/cifsfs.c | 11 +++++++ fs/smb/client/cifsglob.h | 7 +++-- fs/smb/client/file.c | 63 ++++++++++++++++++++++++++++++++++---- fs/smb/client/smb1ops.c | 4 +-- fs/smb/client/smb2ops.c | 9 +++--- fs/smb/client/smb2pdu.c | 2 +- 7 files changed, 85 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 86fe433b1d324..f4ad343b06c1f 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -370,6 +370,7 @@ smb2_close_cached_fid(struct kref *ref) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); + int rc; spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->on_list) { @@ -383,9 +384,10 @@ smb2_close_cached_fid(struct kref *ref) cfid->dentry = NULL; if (cfid->is_open) { - SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, + rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, cfid->fid.volatile_fid); - atomic_dec(&cfid->tcon->num_remote_opens); + if (rc != -EBUSY && rc != -EAGAIN) + atomic_dec(&cfid->tcon->num_remote_opens); } free_cached_dir(cfid); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 7286a56aebfa9..0a79771c8f33b 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -154,6 +154,7 @@ struct workqueue_struct *decrypt_wq; struct workqueue_struct *fileinfo_put_wq; struct workqueue_struct *cifsoplockd_wq; struct workqueue_struct *deferredclose_wq; +struct workqueue_struct *serverclose_wq; __u32 cifs_lock_secret; /* @@ -1866,6 +1867,13 @@ init_cifs(void) goto out_destroy_cifsoplockd_wq; } + serverclose_wq = alloc_workqueue("serverclose", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!serverclose_wq) { + rc = -ENOMEM; + goto out_destroy_serverclose_wq; + } + rc = cifs_init_inodecache(); if (rc) goto out_destroy_deferredclose_wq; @@ -1940,6 +1948,8 @@ out_destroy_decrypt_wq: destroy_workqueue(decrypt_wq); out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); +out_destroy_serverclose_wq: + destroy_workqueue(serverclose_wq); out_clean_proc: cifs_proc_clean(); return rc; @@ -1969,6 +1979,7 @@ exit_cifs(void) destroy_workqueue(cifsoplockd_wq); destroy_workqueue(decrypt_wq); destroy_workqueue(fileinfo_put_wq); + destroy_workqueue(serverclose_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 58bb54994e22a..5cabf144e485b 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -389,10 +389,10 @@ struct smb_version_operations { /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ - void (*close)(const unsigned int, struct cifs_tcon *, + int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* close a file, returning file attributes and timestamps */ - void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, + int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *pfile_info); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); @@ -1359,6 +1359,7 @@ struct cifsFileInfo { bool invalidHandle:1; /* file closed via session abend */ bool swapfile:1; bool oplock_break_cancelled:1; + bool offload:1; /* offload final part of _put to a wq */ unsigned int oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; @@ -1367,6 +1368,7 @@ struct cifsFileInfo { struct cifs_search_info srch_inf; struct work_struct oplock_break; /* work for oplock breaks */ struct work_struct put; /* work for the final part of _put */ + struct work_struct serverclose; /* work for serverclose */ struct delayed_work deferred; bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ char *symlink_target; @@ -2005,6 +2007,7 @@ extern struct workqueue_struct *decrypt_wq; extern struct workqueue_struct *fileinfo_put_wq; extern struct workqueue_struct *cifsoplockd_wq; extern struct workqueue_struct *deferredclose_wq; +extern struct workqueue_struct *serverclose_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_mid_poolp; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index c240cea7ca349..d23dfc83de507 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -330,6 +330,7 @@ cifs_down_write(struct rw_semaphore *sem) } static void cifsFileInfo_put_work(struct work_struct *work); +void serverclose_work(struct work_struct *work); struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock, @@ -376,6 +377,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); INIT_WORK(&cfile->put, cifsFileInfo_put_work); + INIT_WORK(&cfile->serverclose, serverclose_work); INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); @@ -467,6 +469,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) cifsFileInfo_put_final(cifs_file); } +void serverclose_work(struct work_struct *work) +{ + struct cifsFileInfo *cifs_file = container_of(work, + struct cifsFileInfo, serverclose); + + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + + struct TCP_Server_Info *server = tcon->ses->server; + int rc = 0; + int retries = 0; + int MAX_RETRIES = 4; + + do { + if (server->ops->close_getattr) + rc = server->ops->close_getattr(0, tcon, cifs_file); + else if (server->ops->close) + rc = server->ops->close(0, tcon, &cifs_file->fid); + + if (rc == -EBUSY || rc == -EAGAIN) { + retries++; + msleep(250); + } + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) + ); + + if (retries == MAX_RETRIES) + pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); + + if (cifs_file->offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); +} + /** * cifsFileInfo_put - release a reference of file priv data * @@ -507,10 +543,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, struct cifs_fid fid = {}; struct cifs_pending_open open; bool oplock_break_cancelled; + bool serverclose_offloaded = false; spin_lock(&tcon->open_file_lock); spin_lock(&cifsi->open_file_lock); spin_lock(&cifs_file->file_info_lock); + + cifs_file->offload = offload; if (--cifs_file->count > 0) { spin_unlock(&cifs_file->file_info_lock); spin_unlock(&cifsi->open_file_lock); @@ -552,13 +591,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; + int rc = 0; xid = get_xid(); if (server->ops->close_getattr) - server->ops->close_getattr(xid, tcon, cifs_file); + rc = server->ops->close_getattr(xid, tcon, cifs_file); else if (server->ops->close) - server->ops->close(xid, tcon, &cifs_file->fid); + rc = server->ops->close(xid, tcon, &cifs_file->fid); _free_xid(xid); + + if (rc == -EBUSY || rc == -EAGAIN) { + // Server close failed, hence offloading it as an async op + queue_work(serverclose_wq, &cifs_file->serverclose); + serverclose_offloaded = true; + } } if (oplock_break_cancelled) @@ -566,10 +612,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, cifs_del_pending_open(&open); - if (offload) - queue_work(fileinfo_put_wq, &cifs_file->put); - else - cifsFileInfo_put_final(cifs_file); + // if serverclose has been offloaded to wq (on failure), it will + // handle offloading put as well. If serverclose not offloaded, + // we need to handle offloading put here. + if (!serverclose_offloaded) { + if (offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); + } } int cifs_open(struct inode *inode, struct file *file) diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 7d1b3fc014d94..d4045925f8577 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -750,11 +750,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } -static void +static int cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - CIFSSMBClose(xid, tcon, fid->netfid); + return CIFSSMBClose(xid, tcon, fid->netfid); } static int diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 34d1262004dfb..3c471dc90659b 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1392,14 +1392,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) memcpy(cfile->fid.create_guid, fid->create_guid, 16); } -static void +static int smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } -static void +static int smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile) { @@ -1410,7 +1410,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, &file_inf); if (rc) - return; + return rc; inode = d_inode(cfile->dentry); @@ -1436,6 +1436,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, /* End of file and Attributes should not have to be updated on close */ spin_unlock(&inode->i_lock); + return rc; } static int diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 4c1231496a725..cc425a616899a 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3452,9 +3452,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, memcpy(&pbuf->network_open_info, &rsp->network_open_info, sizeof(pbuf->network_open_info)); + atomic_dec(&tcon->num_remote_opens); } - atomic_dec(&tcon->num_remote_opens); close_exit: SMB2_close_free(&rqst); free_rsp_buf(resp_buftype, rsp); -- GitLab From 229042314602db62559ecacba127067c22ee7b88 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:53 -0300 Subject: [PATCH 1573/2290] smb: client: fix potential UAF in cifs_debug_files_proc_show() commit ca545b7f0823f19db0f1148d59bc5e1a56634502 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_debug.c | 2 ++ fs/smb/client/cifsglob.h | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 5df8d93233376..fa45b3e7efe8f 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -186,6 +186,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); list_for_each_entry(cfile, &tcon->openFileList, tlist) { diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 5cabf144e485b..e5a72f9c793ef 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2178,4 +2178,14 @@ static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg, return sg; } +static inline bool cifs_ses_exiting(struct cifs_ses *ses) +{ + bool ret; + + spin_lock(&ses->ses_lock); + ret = ses->ses_status == SES_EXITING; + spin_unlock(&ses->ses_lock); + return ret; +} + #endif /* _CIFS_GLOB_H */ -- GitLab From 8fefd166fcb368c5fcf48238e3f7c8af829e0a72 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:55 -0300 Subject: [PATCH 1574/2290] smb: client: fix potential UAF in cifs_stats_proc_write() commit d3da25c5ac84430f89875ca7485a3828150a7e0a upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index fa45b3e7efe8f..6a9320a747e22 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -568,6 +568,8 @@ static ssize_t cifs_stats_proc_write(struct file *file, } #endif /* CONFIG_CIFS_STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { atomic_set(&tcon->num_smbs_sent, 0); spin_lock(&tcon->stat_lock); -- GitLab From 16b7d785775eb03929766819415055e367398f49 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:56 -0300 Subject: [PATCH 1575/2290] smb: client: fix potential UAF in cifs_stats_proc_show() commit 0865ffefea197b437ba78b5dd8d8e256253efd65 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 6a9320a747e22..a2afdf9c5f80b 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -648,6 +648,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) } #endif /* STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { i++; seq_printf(m, "\n%d) %s", i, tcon->tree_name); -- GitLab From 84488466b7a69570bdbf76dd9576847ab97d54e7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:59 -0300 Subject: [PATCH 1576/2290] smb: client: fix potential UAF in smb2_is_valid_oplock_break() commit 22863485a4626ec6ecf297f4cc0aef709bc862e4 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2misc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 15fa022e79993..4d2e8b390e106 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -697,6 +697,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); -- GitLab From c868cabdf6fdd61bea54532271f4708254e57fc5 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:58 -0300 Subject: [PATCH 1577/2290] smb: client: fix potential UAF in smb2_is_valid_lease_break() commit 705c76fbf726c7a2f6ff9143d4013b18daaaebf1 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2misc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 4d2e8b390e106..8c149cb531d3f 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); cifs_stats_inc( -- GitLab From 494c91e1e9413b407d12166a61b84200d4d54fac Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:34:00 -0300 Subject: [PATCH 1578/2290] smb: client: fix potential UAF in is_valid_oplock_break() commit 69ccf040acddf33a3a85ec0f6b45ef84b0f7ec29 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/misc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 41290c12d0bcc..3826f71766086 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -476,6 +476,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != buf->Tid) continue; -- GitLab From f9414004798d9742c1af23a1d839fe6a9503751c Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:34:02 -0300 Subject: [PATCH 1579/2290] smb: client: fix potential UAF in smb2_is_network_name_deleted() commit 63981561ffd2d4987807df4126f96a11e18b0c1d upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 3c471dc90659b..2291081653a85 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2437,6 +2437,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { spin_lock(&tcon->tc_lock); -- GitLab From 7e8360ac8774e19b0b25f44fff84a105bb2417e4 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:34:04 -0300 Subject: [PATCH 1580/2290] smb: client: fix potential UAF in cifs_signal_cifsd_for_reconnect() commit e0e50401cc3921c9eaf1b0e667db174519ea939f upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/connect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 49fdc6dfdcf8d..8c2a784200ec2 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -216,6 +216,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { spin_lock(&ses->chans[i].server->srv_lock); -- GitLab From 08ef93ebc73c12d8c6249b28bf729924e8ff9142 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 29 Mar 2024 13:08:53 +0100 Subject: [PATCH 1581/2290] selftests: mptcp: join: fix dev in check_endpoint commit 40061817d95bce6dd5634a61a65cd5922e6ccc92 upstream. There's a bug in pm_nl_check_endpoint(), 'dev' didn't be parsed correctly. If calling it in the 2nd test of endpoint_tests() too, it fails with an error like this: creation [FAIL] expected '10.0.2.2 id 2 subflow dev dev' \ found '10.0.2.2 id 2 subflow dev ns2eth2' The reason is '$2' should be set to 'dev', not '$1'. This patch fixes it. Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240329-upstream-net-20240329-fallback-mib-v1-2-324a8981da48@kernel.org Signed-off-by: Jakub Kicinski [ Conflicts in mptcp_join.sh: only the fix has been added, not the verification because this modified subtest is quite different in v6.1: to add this verification, we would need to change a bit the subtest: pm_nl_check_endpoint() takes an extra argument for the title, the next chk_subflow_nr() will no longer need the title, etc. Easier with only the fix without the extra test. ] Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a20dca9d26d68..0b433606a298d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -725,7 +725,7 @@ pm_nl_check_endpoint() [ -n "$_flags" ]; flags="flags $_flags" shift elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $1" + [ -n "$2" ]; dev="dev $2" shift elif [ $1 = "id" ]; then _id=$2 -- GitLab From d1fefedc1a0e31cc8001f7702a2c51d07343a499 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 29 Mar 2024 13:08:52 +0100 Subject: [PATCH 1582/2290] mptcp: don't account accept() of non-MPC client as fallback to TCP commit 7a1b3490f47e88ec4cbde65f1a77a0f4bc972282 upstream. Current MPTCP servers increment MPTcpExtMPCapableFallbackACK when they accept non-MPC connections. As reported by Christoph, this is "surprising" because the counter might become greater than MPTcpExtMPCapableSYNRX. MPTcpExtMPCapableFallbackACK counter's name suggests it should only be incremented when a connection was seen using MPTCP options, then a fallback to TCP has been done. Let's do that by incrementing it when the subflow context of an inbound MPC connection attempt is dropped. Also, update mptcp_connect.sh kselftest, to ensure that the above MIB does not increment in case a pure TCP client connects to a MPTCP server. Fixes: fc518953bc9c ("mptcp: add and use MIB counter infrastructure") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/449 Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240329-upstream-net-20240329-fallback-mib-v1-1-324a8981da48@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 3 --- net/mptcp/subflow.c | 2 ++ tools/testing/selftests/net/mptcp/mptcp_connect.sh | 7 +++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3bc21581486ae..c652c8ca765c2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3349,9 +3349,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - } else { - MPTCP_INC_STATS(sock_net(sk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } out: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 891c2f4fed080..f1d422396b28b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -816,6 +816,8 @@ dispose_child: return child; fallback: + if (fallback) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); mptcp_subflow_drop_ctx(child); return child; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 18c9b00ca058e..dacf4cf2246da 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -439,6 +439,7 @@ do_transfer() local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_tcpfb_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -504,6 +505,7 @@ do_transfer() local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local stat_tcpfb_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -548,6 +550,11 @@ do_transfer() fi fi + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + printf "[ FAIL ]\nunexpected fallback to TCP" + rets=1 + fi + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then printf "[ OK ]" fi -- GitLab From a6dc534c073b529dfd39683f63dd87ab30261f4b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:11 -0700 Subject: [PATCH 1583/2290] selftests: mptcp: display simult in extra_msg commit 629b35a225b0d49fbcff3b5c22e3b983c7c7b36f upstream. Just like displaying "invert" after "Info: ", "simult" should be displayed too when rm_subflow_nr doesn't match the expect value in chk_rm_nr(): syn [ ok ] synack [ ok ] ack [ ok ] add [ ok ] echo [ ok ] rm [ ok ] rmsf [ ok ] 3 in [2:4] Info: invert simult syn [ ok ] synack [ ok ] ack [ ok ] add [ ok ] echo [ ok ] rm [ ok ] rmsf [ ok ] Info: invert Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-10-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 0b433606a298d..635a1624b47dc 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1771,7 +1771,10 @@ chk_rm_nr() # in case of simult flush, the subflow removal count on each side is # unreliable count=$((count + cnt)) - [ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" + if [ "$count" != "$rm_subflow_nr" ]; then + suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" + extra_msg="$extra_msg simult" + fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then echo -n "[ ok ] $suffix" -- GitLab From 9c2b4b657739ecda38e3b383354a29566955ac48 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 26 Mar 2024 15:32:08 +0100 Subject: [PATCH 1584/2290] mm/secretmem: fix GUP-fast succeeding on secretmem folios commit 65291dcfcf8936e1b23cfd7718fdfde7cfaf7706 upstream. folio_is_secretmem() currently relies on secretmem folios being LRU folios, to save some cycles. However, folios might reside in a folio batch without the LRU flag set, or temporarily have their LRU flag cleared. Consequently, the LRU flag is unreliable for this purpose. In particular, this is the case when secretmem_fault() allocates a fresh page and calls filemap_add_folio()->folio_add_lru(). The folio might be added to the per-cpu folio batch and won't get the LRU flag set until the batch was drained using e.g., lru_add_drain(). Consequently, folio_is_secretmem() might not detect secretmem folios and GUP-fast can succeed in grabbing a secretmem folio, crashing the kernel when we would later try reading/writing to the folio, because the folio has been unmapped from the directmap. Fix it by removing that unreliable check. Link: https://lkml.kernel.org/r/20240326143210.291116-2-david@redhat.com Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") Signed-off-by: David Hildenbrand Reported-by: xingwei lee Reported-by: yue sun Closes: https://lore.kernel.org/lkml/CABOYnLyevJeravW=QrH0JUPYEcDN160aZFb7kwndm-J2rmz0HQ@mail.gmail.com/ Debugged-by: Miklos Szeredi Tested-by: Miklos Szeredi Reviewed-by: Mike Rapoport (IBM) Cc: Lorenzo Stoakes Cc: Signed-off-by: Andrew Morton Signed-off-by: David Hildenbrand Signed-off-by: Greg Kroah-Hartman --- include/linux/secretmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 988528b5da438..48ffe325184c0 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -14,10 +14,10 @@ static inline bool page_is_secretmem(struct page *page) * Using page_mapping() is quite slow because of the actual call * instruction and repeated compound_head(page) inside the * page_mapping() function. - * We know that secretmem pages are not compound and LRU so we can + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (PageCompound(page) || !PageLRU(page)) + if (PageCompound(page)) return false; mapping = (struct address_space *) -- GitLab From 5e6898b8544dbb52755cee40c0c360a4bfde69e6 Mon Sep 17 00:00:00 2001 From: "min15.li" Date: Fri, 26 May 2023 17:06:56 +0000 Subject: [PATCH 1585/2290] nvme: fix miss command type check commit 31a5978243d24d77be4bacca56c78a0fbc43b00d upstream. In the function nvme_passthru_end(), only the value of the command opcode is checked, without checking the command type (IO command or Admin command). When we send a Dataset Management command (The opcode of the Dataset Management command is the same as the Set Feature command), kernel thinks it is a set feature command, then sets the controller's keep alive interval, and calls nvme_keep_alive_work(). Signed-off-by: min15.li Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Fixes: b58da2d270db ("nvme: update keep alive interval when kato is modified") Signed-off-by: Tokunori Ikegami Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 4 +++- drivers/nvme/host/ioctl.c | 3 ++- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/target/passthru.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d7516e99275b6..20160683e8685 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1151,7 +1151,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return effects; } -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { @@ -1167,6 +1167,8 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); } + if (ns) + return; switch (cmd->common.opcode) { case nvme_admin_set_features: diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 91e6d03475798..b3e322e4ade38 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -147,6 +147,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, bool vec) { + struct nvme_ns *ns = q->queuedata; struct nvme_ctrl *ctrl; struct request *req; void *meta = NULL; @@ -181,7 +182,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, blk_mq_free_request(req); if (effects) - nvme_passthru_end(ctrl, effects, cmd, ret); + nvme_passthru_end(ctrl, ns, effects, cmd, ret); return ret; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a892d679e3389..8e28d2de45c0e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1063,7 +1063,7 @@ static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); int nvme_execute_passthru_rq(struct request *rq, u32 *effects); -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index adc0958755d66..a0a292d49588c 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -216,6 +216,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); struct request *rq = req->p.rq; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; + struct nvme_ns *ns = rq->q->queuedata; u32 effects; int status; @@ -242,7 +243,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) blk_mq_free_request(rq); if (effects) - nvme_passthru_end(ctrl, effects, req->cmd, status); + nvme_passthru_end(ctrl, ns, effects, req->cmd, status); } static enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq, -- GitLab From fd52c0397b53ebcd4931981b3bc38f3b760b74df Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 5 Apr 2024 11:14:13 -0700 Subject: [PATCH 1586/2290] x86/bugs: Change commas to semicolons in 'spectre_v2' sysfs file commit 0cd01ac5dcb1e18eb18df0f0d05b5de76522a437 upstream. Change the format of the 'spectre_v2' vulnerabilities sysfs file slightly by converting the commas to semicolons, so that mitigations for future variants can be grouped together and separated by commas. Signed-off-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e3fec47a800bf..4a1b4dee1425f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2674,15 +2674,15 @@ static char *stibp_state(void) switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2691,10 +2691,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2704,11 +2704,11 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } @@ -2727,9 +2727,9 @@ static ssize_t spectre_v2_show_state(char *buf) return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), spectre_v2_module_string()); } -- GitLab From 74fcb181772e5b8a8f1244c7393c56ae6d03c330 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 3 Apr 2024 16:36:44 -0700 Subject: [PATCH 1587/2290] x86/syscall: Don't force use of indirect calls for system calls commit 1e3ad78334a69b36e107232e337f9d693dcc9df2 upstream. Make build a switch statement instead, and the compiler can either decide to generate an indirect jump, or - more likely these days due to mitigations - just a series of conditional branches. Yes, the conditional branches also have branch prediction, but the branch prediction is much more controlled, in that it just causes speculatively running the wrong system call (harmless), rather than speculatively running possibly wrong random less controlled code gadgets. This doesn't mitigate other indirect calls, but the system call indirection is the first and most easily triggered case. Signed-off-by: Linus Torvalds Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 6 +++--- arch/x86/entry/syscall_32.c | 21 +++++++++++++++++++-- arch/x86/entry/syscall_64.c | 19 +++++++++++++++++-- arch/x86/entry/syscall_x32.c | 10 +++++++--- arch/x86/include/asm/syscall.h | 10 ++++------ 5 files changed, 50 insertions(+), 16 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 9c0b26ae51069..a60d19228890c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -48,7 +48,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) if (likely(unr < NR_syscalls)) { unr = array_index_nospec(unr, NR_syscalls); - regs->ax = sys_call_table[unr](regs); + regs->ax = x64_sys_call(regs, unr); return true; } return false; @@ -65,7 +65,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call_table[xnr](regs); + regs->ax = x32_sys_call(regs, xnr); return true; } return false; @@ -114,7 +114,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) if (likely(unr < IA32_NR_syscalls)) { unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call_table[unr](regs); + regs->ax = ia32_sys_call(regs, unr); } else if (nr != -1) { regs->ax = __ia32_sys_ni_syscall(regs); } diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cfc9bc73e7f8..c2235bae17ef6 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -18,8 +18,25 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ +#ifdef CONFIG_X86_32 #define __SYSCALL(nr, sym) __ia32_##sym, - -__visible const sys_call_ptr_t ia32_sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL +#endif + +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __ia32_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index be120eec1fc9f..33b3f09e6f151 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -11,8 +11,23 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ #define __SYSCALL(nr, sym) __x64_##sym, - -asmlinkage const sys_call_ptr_t sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL + +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index bdd0e03a1265d..03de4a9321318 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -11,8 +11,12 @@ #include #undef __SYSCALL -#define __SYSCALL(nr, sym) __x64_##sym, +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { -#include +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } }; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 5b85987a5e97c..8f0da479c77d1 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,19 +16,17 @@ #include /* for TS_COMPAT */ #include +/* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#else /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ -extern const sys_call_ptr_t ia32_sys_call_table[]; -extern const sys_call_ptr_t x32_sys_call_table[]; -#endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. -- GitLab From 07dbb10f153f483e8249acebdffedf922e2ec2e1 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:56:58 -0700 Subject: [PATCH 1588/2290] x86/bhi: Add support for clearing branch history at syscall entry commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5 upstream. Branch History Injection (BHI) attacks may allow a malicious application to influence indirect branch prediction in kernel by poisoning the branch history. eIBRS isolates indirect branch targets in ring0. The BHB can still influence the choice of indirect branch predictor entry, and although branch predictor entries are isolated between modes when eIBRS is enabled, the BHB itself is not isolated between modes. Alder Lake and new processors supports a hardware control BHI_DIS_S to mitigate BHI. For older processors Intel has released a software sequence to clear the branch history on parts that don't support BHI_DIS_S. Add support to execute the software sequence at syscall entry and VMexit to overwrite the branch history. For now, branch history is not cleared at interrupt entry, as malicious applications are not believed to have sufficient control over the registers, since previous register state is cleared at interrupt entry. Researchers continue to poke at this area and it may become necessary to clear at interrupt entry as well in the future. This mitigation is only defined here. It is enabled later. Signed-off-by: Pawan Gupta Co-developed-by: Daniel Sneddon Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 4 +- arch/x86/entry/entry_64.S | 61 ++++++++++++++++++++++++++++ arch/x86/entry/entry_64_compat.S | 16 ++++++++ arch/x86/include/asm/cpufeatures.h | 3 +- arch/x86/include/asm/nospec-branch.h | 12 ++++++ arch/x86/include/asm/syscall.h | 1 + arch/x86/kvm/vmx/vmenter.S | 2 + 7 files changed, 96 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a60d19228890c..e72dac092245a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void) } /** - * int80_emulation - 32-bit legacy syscall entry + * do_int80_emulation - 32-bit legacy syscall C entry from asm * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in @@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void) * eax: system call number * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 */ -DEFINE_IDTENTRY_RAW(int80_emulation) +__visible noinstr void do_int80_emulation(struct pt_regs *regs) { int nr; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c2383c2880ec6..6624806e6904b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -1539,3 +1540,63 @@ SYM_CODE_START(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4bcd009a232bf..b14b8cd85eb23 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -210,6 +211,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -278,3 +280,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) + +/* + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries + * point to C routines, however since this is a system call interface the branch + * history needs to be scrubbed to protect against BHI attacks, and that + * scrubbing needs to take place in assembly code prior to entering any C + * routines. + */ +SYM_CODE_START(int80_emulation) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + CLEAR_BRANCH_HISTORY + jmp do_int80_emulation +SYM_CODE_END(int80_emulation) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1280daa729757..d1e1796538211 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -434,11 +434,12 @@ /* * Extended auxiliary flags: Linux defined - for features scattered in various - * CPUID levels like 0x80000022, etc. + * CPUID levels like 0x80000022, etc and Linux defined features. * * Reuse free bits when adding new feature flags! */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ /* * BUG word(s) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 47e4e06a47d76..33e365cbe15bf 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -215,6 +215,14 @@ .Lskip_verw_\@: .endm +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm +#else +#define CLEAR_BRANCH_HISTORY +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -243,6 +251,10 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8f0da479c77d1..2725a4502321b 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -127,6 +127,7 @@ static inline int syscall_get_arch(struct task_struct *task) void do_syscall_64(struct pt_regs *regs, int nr); void do_int80_syscall_32(struct pt_regs *regs); long do_fast_syscall_32(struct pt_regs *regs); +void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0b2cad66dee12..de4490e331fa2 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -242,6 +242,8 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX -- GitLab From 29c50bb6fbe4598d313ddb7ddb183e8b3d7bdf80 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 13 Mar 2024 09:47:57 -0700 Subject: [PATCH 1589/2290] x86/bhi: Define SPEC_CTRL_BHI_DIS_S commit 0f4a837615ff925ba62648d280a861adf1582df7 upstream. Newer processors supports a hardware control BHI_DIS_S to mitigate Branch History Injection (BHI). Setting BHI_DIS_S protects the kernel from userspace BHI attacks without having to manually overwrite the branch history. Define MSR_SPEC_CTRL bit BHI_DIS_S and its enumeration CPUID.BHI_CTRL. Mitigation is enabled later. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 5 ++++- arch/x86/kernel/cpu/scattered.c | 1 + arch/x86/kvm/reverse_cpuid.h | 3 ++- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d1e1796538211..20ba328d25276 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -440,6 +440,7 @@ */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 33a19ef23644d..9b3d17d90bdec 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -55,10 +55,13 @@ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) /* A mask for bits which the kernel toggles when controlling mitigations */ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ - | SPEC_CTRL_RRSBA_DIS_S) + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 94e0a42528dcb..28c357cf7c75e 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index d2f6703a2633e..e43909d6504af 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -42,7 +42,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) -#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) struct cpuid_reg { @@ -106,6 +106,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(SGX1); KVM_X86_TRANSLATE_FEATURE(SGX2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); default: return x86_feature; } -- GitLab From 42196bdec0824900b02bc21e02e9bb139197ca14 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:03 -0700 Subject: [PATCH 1590/2290] x86/bhi: Enumerate Branch History Injection (BHI) bug commit be482ff9500999f56093738f9219bbabc729d163 upstream. Mitigation for BHI is selected based on the bug enumeration. Add bits needed to enumerate BHI bug. Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 4 ++++ arch/x86/kernel/cpu/common.c | 24 ++++++++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 20ba328d25276..0f1e0c73c4748 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -489,4 +489,5 @@ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9b3d17d90bdec..681e8401b8a35 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -160,6 +160,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ca243d7ba0ea5..08fe77d2a3f90 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1144,6 +1144,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1206,18 +1207,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; @@ -1454,6 +1455,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(ia32_cap)) setup_force_cpu_bug(X86_BUG_RFDS); + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(ia32_cap & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; -- GitLab From bb8384b6dfbc49be230071d1e844a8741982b1ec Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:05 -0700 Subject: [PATCH 1591/2290] x86/bhi: Add BHI mitigation knob commit ec9404e40e8f36421a2b66ecb76dc2209fe7f3ef upstream. Branch history clearing software sequences and hardware control BHI_DIS_S were defined to mitigate Branch History Injection (BHI). Add cmdline spectre_bhi={on|off|auto} to control BHI mitigation: auto - Deploy the hardware mitigation BHI_DIS_S, if available. on - Deploy the hardware mitigation BHI_DIS_S, if available, otherwise deploy the software sequence at syscall entry and VMexit. off - Turn off BHI mitigation. The default is auto mode which does not deploy the software sequence mitigation. This is because of the hardening done in the syscall dispatch path, which is the likely target of BHI. Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 45 ++++++++-- .../admin-guide/kernel-parameters.txt | 11 +++ arch/x86/Kconfig | 25 ++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 90 ++++++++++++++++++- 5 files changed, 165 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 32a8893e56177..62c7902c66fd6 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically, the BHB might be shared across privilege levels even in the presence of Enhanced IBRS. -Currently the only known real-world BHB attack vector is via -unprivileged eBPF. Therefore, it's highly recommended to not enable -unprivileged eBPF, especially when eIBRS is used (without retpolines). -For a full mitigation against BHB attacks, it's recommended to use -retpolines (or eIBRS combined with retpolines). +Previously the only known real-world BHB attack vector was via unprivileged +eBPF. Further research has found attacks that don't require unprivileged eBPF. +For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or +use the BHB clearing sequence. Attack scenarios ---------------- @@ -430,6 +429,21 @@ The possible values in this file are: 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB =========================== ======================================================= + - Branch History Injection (BHI) protection status: + +.. list-table:: + + * - BHI: Not affected + - System is not affected + * - BHI: Retpoline + - System is protected by retpoline + * - BHI: BHI_DIS_S + - System is protected by BHI_DIS_S + * - BHI: SW loop + - System is protected by software clearing sequence + * - BHI: Syscall hardening + - Syscalls are hardened against BHI + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. @@ -484,7 +498,11 @@ Spectre variant 2 Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at boot, by setting the IBRS bit, and they're automatically protected against - Spectre v2 variant attacks. + some Spectre v2 variant attacks. The BHB can still influence the choice of + indirect branch predictor entry, and although branch predictor entries are + isolated between modes when eIBRS is enabled, the BHB itself is not isolated + between modes. Systems which support BHI_DIS_S will set it to protect against + BHI attacks. On Intel's enhanced IBRS systems, this includes cross-thread branch target injections on SMT systems (STIBP). In other words, Intel eIBRS enables @@ -638,6 +656,21 @@ kernel command line. spectre_v2=off. Spectre variant 1 mitigations cannot be disabled. + spectre_bhi= + + [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. Syscalls are hardened against BHI + regardless of this setting. This setting affects the deployment + of the HW BHI control and the SW BHB clearing sequence. + + on + unconditionally enable. + off + unconditionally disable. + auto + enable if hardware mitigation + control(BHI_DIS_S) is available. + For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt Mitigation selection guide diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 88dffaf8f0a99..13866591af4d2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5733,6 +5733,17 @@ sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/admin-guide/laptops/sonypi.rst + spectre_bhi= [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. Syscalls are hardened against BHI + reglardless of this setting. This setting affects the + deployment of the HW BHI control and the SW BHB + clearing sequence. + + on - unconditionally enable. + off - unconditionally disable. + auto - (default) enable only if hardware mitigation + control(BHI_DIS_S) is available. + spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. The default operation protects the kernel from diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bea53385d31e3..35ba10c28ca31 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2563,6 +2563,31 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also +choice + prompt "Clear branch history" + depends on CPU_SUP_INTEL + default SPECTRE_BHI_AUTO + help + Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks + where the branch history buffer is poisoned to speculatively steer + indirect branches. + See + +config SPECTRE_BHI_ON + bool "on" + help + Equivalent to setting spectre_bhi=on command line parameter. +config SPECTRE_BHI_OFF + bool "off" + help + Equivalent to setting spectre_bhi=off command line parameter. +config SPECTRE_BHI_AUTO + bool "auto" + help + Equivalent to setting spectre_bhi=auto command line parameter. + +endchoice + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0f1e0c73c4748..a00b541644c79 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -441,6 +441,7 @@ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ /* * BUG word(s) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4a1b4dee1425f..cfd73f5921fe3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1584,6 +1584,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, + BHI_MITIGATION_AUTO, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : + IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : + BHI_MITIGATION_AUTO; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else if (!strcmp(str, "auto")) + bhi_mitigation = BHI_MITIGATION_AUTO; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && + !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + return; + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + if (bhi_mitigation == BHI_MITIGATION_AUTO) + return; + + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1694,6 +1762,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -2712,6 +2783,21 @@ static char *pbrsb_eibrs_state(void) } } +static const char * const spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + return "; BHI: Retpoline"; + + return "; BHI: Vulnerable (Syscall hardening enabled)"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2724,13 +2810,15 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ spectre_v2_module_string()); } -- GitLab From 43704e993ae54b8caf821501229dd2534ecb0e56 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:09 -0700 Subject: [PATCH 1592/2290] x86/bhi: Mitigate KVM by default commit 95a6ccbdc7199a14b71ad8901cb788ba7fb5167b upstream. BHI mitigation mode spectre_bhi=auto does not deploy the software mitigation by default. In a cloud environment, it is a likely scenario where userspace is trusted but the guests are not trusted. Deploying system wide mitigation in such cases is not desirable. Update the auto mode to unconditionally mitigate against malicious guests. Deploy the software sequence at VMexit in auto mode also, when hardware mitigation is not available. Unlike the force =on mode, software sequence is not deployed at syscalls in auto mode. Suggested-by: Alexandre Chartre Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 7 +++++-- Documentation/admin-guide/kernel-parameters.txt | 5 +++-- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 5 +++++ arch/x86/kernel/cpu/bugs.c | 9 ++++++++- arch/x86/kvm/vmx/vmenter.S | 2 +- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 62c7902c66fd6..9edb2860a3e19 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -439,10 +439,12 @@ The possible values in this file are: - System is protected by retpoline * - BHI: BHI_DIS_S - System is protected by BHI_DIS_S - * - BHI: SW loop + * - BHI: SW loop; KVM SW loop - System is protected by software clearing sequence * - BHI: Syscall hardening - Syscalls are hardened against BHI + * - BHI: Syscall hardening; KVM: SW loop + - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will @@ -669,7 +671,8 @@ kernel command line. unconditionally disable. auto enable if hardware mitigation - control(BHI_DIS_S) is available. + control(BHI_DIS_S) is available, otherwise + enable alternate mitigation in KVM. For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 13866591af4d2..b2c7b2f012e90 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5741,8 +5741,9 @@ on - unconditionally enable. off - unconditionally disable. - auto - (default) enable only if hardware mitigation - control(BHI_DIS_S) is available. + auto - (default) enable hardware mitigation + (BHI_DIS_S) if available, otherwise enable + alternate mitigation in KVM. spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a00b541644c79..7ded926724147 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -442,6 +442,7 @@ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ /* * BUG word(s) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 33e365cbe15bf..1e481d308e188 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -219,8 +219,13 @@ .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP .endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT +.endm #else #define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT #endif #else /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cfd73f5921fe3..96bd3ee83a484 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1645,9 +1645,14 @@ static void __init bhi_select_mitigation(void) if (!IS_ENABLED(CONFIG_X86_64)) return; + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + if (bhi_mitigation == BHI_MITIGATION_AUTO) return; + /* Mitigate syscalls when the mitigation is forced =on */ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); } @@ -2790,10 +2795,12 @@ static const char * const spectre_bhi_state(void) else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) return "; BHI: BHI_DIS_S"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) - return "; BHI: SW loop"; + return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Syscall hardening, KVM: SW loop"; return "; BHI: Vulnerable (Syscall hardening enabled)"; } diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index de4490e331fa2..b4f8937226c21 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -242,7 +242,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host - CLEAR_BRANCH_HISTORY + CLEAR_BRANCH_HISTORY_VMEXIT /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX -- GitLab From 3e4283b77107d1105a378859eb196e3ba5661270 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 13 Mar 2024 09:49:17 -0700 Subject: [PATCH 1593/2290] KVM: x86: Add BHI_NO Intel processors that aren't vulnerable to BHI will set commit ed2e8d49b54d677f3123668a21a57822d679651f upstream. MSR_IA32_ARCH_CAPABILITIES[BHI_NO] = 1;. Guests may use this BHI_NO bit to determine if they need to implement BHI mitigations or not. Allow this bit to be passed to the guests. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0e6e63a8f0949..f724765032bc4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1614,7 +1614,7 @@ static unsigned int num_msr_based_features; ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) static u64 kvm_get_arch_capabilities(void) { -- GitLab From e21838dfd0844b093a92d4cdd4db836b473c912d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 9 Apr 2024 19:32:41 +0200 Subject: [PATCH 1594/2290] x86: set SPECTRE_BHI_ON as default commit 2bb69f5fc72183e1c62547d900f560d0e9334925 upstream. Part of a merge commit from Linus that adjusted the default setting of SPECTRE_BHI_ON. Cc: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 35ba10c28ca31..ba815ac474a1b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2566,7 +2566,7 @@ config MITIGATION_RFDS choice prompt "Clear branch history" depends on CPU_SUP_INTEL - default SPECTRE_BHI_AUTO + default SPECTRE_BHI_ON help Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks where the branch history buffer is poisoned to speculatively steer -- GitLab From bf1e3b1cb1e002ed1590c91f1a24433b59322368 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Apr 2024 16:28:36 +0200 Subject: [PATCH 1595/2290] Linux 6.1.85 Link: https://lore.kernel.org/r/20240408125256.218368873@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Kelsey Steele Tested-by: Salvatore Bonaccorso Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Conor Dooley Tested-by: Mark Brown Tested-by: Sven Joachim Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20240409172805.638917723@linuxfoundation.org Tested-by: kernelci.org bot Link: https://lore.kernel.org/r/20240409173524.517362803@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0e33150db2bfc..5dff9ff999981 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 84 +SUBLEVEL = 85 EXTRAVERSION = NAME = Curry Ramen -- GitLab From e6768c6737f4c02cba193a3339f0cc2907f0b86a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 12 Apr 2024 06:11:25 +1000 Subject: [PATCH 1596/2290] amdkfd: use calloc instead of kzalloc to avoid integer overflow commit 3b0daecfeac0103aba8b293df07a0cbaf8b43f29 upstream. This uses calloc instead of doing the multiplication which might overflow. Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index e191d38f3da62..3f403afd6de83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -765,8 +765,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, * nodes, but not more than args->num_of_nodes as that is * the amount of memory allocated by user */ - pa = kzalloc((sizeof(struct kfd_process_device_apertures) * - args->num_of_nodes), GFP_KERNEL); + pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures), + GFP_KERNEL); if (!pa) return -ENOMEM; -- GitLab From 7dd4831c3ecb5e8f7f26fe69edd56ed791e06243 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Sun, 17 Dec 2023 13:29:03 +0200 Subject: [PATCH 1597/2290] wifi: ath9k: fix LNA selection in ath_ant_try_scan() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d6b27eb997ef9a2aa51633b3111bc4a04748e6d3 ] In 'ath_ant_try_scan()', (most likely) the 2nd LNA's signal strength should be used in comparison against RSSI when selecting first LNA as the main one. Compile tested only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Dmitry Antipov Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://msgid.link/20231211172502.25202-1-dmantipov@yandex.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/antenna.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/antenna.c b/drivers/net/wireless/ath/ath9k/antenna.c index 988222cea9dfe..acc84e6711b0e 100644 --- a/drivers/net/wireless/ath/ath9k/antenna.c +++ b/drivers/net/wireless/ath/ath9k/antenna.c @@ -643,7 +643,7 @@ static void ath_ant_try_scan(struct ath_ant_comb *antcomb, conf->main_lna_conf = ATH_ANT_DIV_COMB_LNA1; conf->alt_lna_conf = ATH_ANT_DIV_COMB_LNA1_PLUS_LNA2; } else if (antcomb->rssi_sub > - antcomb->rssi_lna1) { + antcomb->rssi_lna2) { /* set to A-B */ conf->main_lna_conf = ATH_ANT_DIV_COMB_LNA1; conf->alt_lna_conf = ATH_ANT_DIV_COMB_LNA1_MINUS_LNA2; -- GitLab From d6b0472d5fbda3b1b28d325a21aee1f435063270 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 25 Jan 2024 20:10:48 -0800 Subject: [PATCH 1598/2290] bnx2x: Fix firmware version string character counts [ Upstream commit 5642c82b9463c3263c086efb002516244bd4c668 ] A potential string truncation was reported in bnx2x_fill_fw_str(), when a long bp->fw_ver and a long phy_fw_ver might coexist, but seems unlikely with real-world hardware. Use scnprintf() to indicate the intent that truncations are tolerated. While reading this code, I found a collection of various buffer size counting issues. None looked like they might lead to a buffer overflow with current code (the small buffers are 20 bytes and might only ever consume 10 bytes twice with a trailing %NUL). However, early truncation (due to a %NUL in the middle of the string) might be happening under likely rare conditions. Regardless fix the formatters and related functions: - Switch from a separate strscpy() to just adding an additional "%s" to the format string that immediately follows it in bnx2x_fill_fw_str(). - Use sizeof() universally instead of using unbound defines. - Fix bnx2x_7101_format_ver() and bnx2x_null_format_ver() to report the number of characters written, not including the trailing %NUL (as already done with the other firmware formatting functions). - Require space for at least 1 byte in bnx2x_get_ext_phy_fw_version() for the trailing %NUL. - Correct the needed buffer size in bnx2x_3_seq_format_ver(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401260858.jZN6vD1k-lkp@intel.com/ Cc: Ariel Elior Cc: Sudarsana Kalluru Cc: Manish Chopra Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20240126041044.work.220-kees@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 9 +++++---- .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 14 +++++++------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 4950fde82d175..b04c5b51eb598 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -147,10 +147,11 @@ void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len) phy_fw_ver[0] = '\0'; bnx2x_get_ext_phy_fw_version(&bp->link_params, - phy_fw_ver, PHY_FW_VER_LEN); - strscpy(buf, bp->fw_ver, buf_len); - snprintf(buf + strlen(bp->fw_ver), 32 - strlen(bp->fw_ver), - "bc %d.%d.%d%s%s", + phy_fw_ver, sizeof(phy_fw_ver)); + /* This may become truncated. */ + scnprintf(buf, buf_len, + "%sbc %d.%d.%d%s%s", + bp->fw_ver, (bp->common.bc_ver & 0xff0000) >> 16, (bp->common.bc_ver & 0xff00) >> 8, (bp->common.bc_ver & 0xff), diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index bda3ccc28eca6..f920976c36f0c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -1132,7 +1132,7 @@ static void bnx2x_get_drvinfo(struct net_device *dev, } memset(version, 0, sizeof(version)); - bnx2x_fill_fw_str(bp, version, ETHTOOL_FWVERS_LEN); + bnx2x_fill_fw_str(bp, version, sizeof(version)); strlcat(info->fw_version, version, sizeof(info->fw_version)); strscpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info)); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 02808513ffe45..ea310057fe3af 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -6163,8 +6163,8 @@ static void bnx2x_link_int_ack(struct link_params *params, static int bnx2x_null_format_ver(u32 spirom_ver, u8 *str, u16 *len) { - str[0] = '\0'; - (*len)--; + if (*len) + str[0] = '\0'; return 0; } @@ -6173,7 +6173,7 @@ static int bnx2x_format_ver(u32 num, u8 *str, u16 *len) u16 ret; if (*len < 10) { - /* Need more than 10chars for this format */ + /* Need more than 10 chars for this format */ bnx2x_null_format_ver(num, str, len); return -EINVAL; } @@ -6188,8 +6188,8 @@ static int bnx2x_3_seq_format_ver(u32 num, u8 *str, u16 *len) { u16 ret; - if (*len < 10) { - /* Need more than 10chars for this format */ + if (*len < 9) { + /* Need more than 9 chars for this format */ bnx2x_null_format_ver(num, str, len); return -EINVAL; } @@ -6208,7 +6208,7 @@ int bnx2x_get_ext_phy_fw_version(struct link_params *params, u8 *version, int status = 0; u8 *ver_p = version; u16 remain_len = len; - if (version == NULL || params == NULL) + if (version == NULL || params == NULL || len == 0) return -EINVAL; bp = params->bp; @@ -11546,7 +11546,7 @@ static int bnx2x_7101_format_ver(u32 spirom_ver, u8 *str, u16 *len) str[2] = (spirom_ver & 0xFF0000) >> 16; str[3] = (spirom_ver & 0xFF000000) >> 24; str[4] = '\0'; - *len -= 5; + *len -= 4; return 0; } -- GitLab From fd5fdacae93a00a0c80c9a190ee623e26cd8a0a3 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Sun, 21 Jan 2024 15:18:26 +0800 Subject: [PATCH 1599/2290] wifi: rtw89: pci: enlarge RX DMA buffer to consider size of RX descriptor [ Upstream commit c108b4a50dd7650941d4f4ec5c161655a73711db ] Hardware puts RX descriptor and packet in RX DMA buffer, so it could be over one buffer size if packet size is 11454, and then it will be split into two segments. WiFi 7 chips use larger size of RX descriptor, so enlarge DMA buffer size according to RX descriptor to have better performance and simple flow. Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://msgid.link/20240121071826.10159-5-pkshih@realtek.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw89/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h index 179740607778a..d982c5dc0889f 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.h +++ b/drivers/net/wireless/realtek/rtw89/pci.h @@ -546,7 +546,7 @@ #define RTW89_PCI_TXWD_NUM_MAX 512 #define RTW89_PCI_TXWD_PAGE_SIZE 128 #define RTW89_PCI_ADDRINFO_MAX 4 -#define RTW89_PCI_RX_BUF_SIZE 11460 +#define RTW89_PCI_RX_BUF_SIZE (11454 + 40) /* +40 for rtw89_rxdesc_long_v2 */ #define RTW89_PCI_POLL_BDRAM_RST_CNT 100 #define RTW89_PCI_MULTITAG 8 -- GitLab From feacd430b42bbfa9ab3ed9e4f38b86c43e348c75 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 5 Jan 2024 08:40:00 -0800 Subject: [PATCH 1600/2290] VMCI: Fix memcpy() run-time warning in dg_dispatch_as_host() [ Upstream commit 19b070fefd0d024af3daa7329cbc0d00de5302ec ] Syzkaller hit 'WARNING in dg_dispatch_as_host' bug. memcpy: detected field-spanning write (size 56) of single field "&dg_info->msg" at drivers/misc/vmw_vmci/vmci_datagram.c:237 (size 24) WARNING: CPU: 0 PID: 1555 at drivers/misc/vmw_vmci/vmci_datagram.c:237 dg_dispatch_as_host+0x88e/0xa60 drivers/misc/vmw_vmci/vmci_datagram.c:237 Some code commentry, based on my understanding: 544 #define VMCI_DG_SIZE(_dg) (VMCI_DG_HEADERSIZE + (size_t)(_dg)->payload_size) /// This is 24 + payload_size memcpy(&dg_info->msg, dg, dg_size); Destination = dg_info->msg ---> this is a 24 byte structure(struct vmci_datagram) Source = dg --> this is a 24 byte structure (struct vmci_datagram) Size = dg_size = 24 + payload_size {payload_size = 56-24 =32} -- Syzkaller managed to set payload_size to 32. 35 struct delayed_datagram_info { 36 struct datagram_entry *entry; 37 struct work_struct work; 38 bool in_dg_host_queue; 39 /* msg and msg_payload must be together. */ 40 struct vmci_datagram msg; 41 u8 msg_payload[]; 42 }; So those extra bytes of payload are copied into msg_payload[], a run time warning is seen while fuzzing with Syzkaller. One possible way to fix the warning is to split the memcpy() into two parts -- one -- direct assignment of msg and second taking care of payload. Gustavo quoted: "Under FORTIFY_SOURCE we should not copy data across multiple members in a structure." Reported-by: syzkaller Suggested-by: Vegard Nossum Suggested-by: Gustavo A. R. Silva Signed-off-by: Harshit Mogalapalli Reviewed-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20240105164001.2129796-2-harshit.m.mogalapalli@oracle.com Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- drivers/misc/vmw_vmci/vmci_datagram.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c index f50d22882476f..d1d8224c8800c 100644 --- a/drivers/misc/vmw_vmci/vmci_datagram.c +++ b/drivers/misc/vmw_vmci/vmci_datagram.c @@ -234,7 +234,8 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) dg_info->in_dg_host_queue = true; dg_info->entry = dst_entry; - memcpy(&dg_info->msg, dg, dg_size); + dg_info->msg = *dg; + memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); -- GitLab From 8d80e092027a82dd58a30fb46a98c075135ba1fd Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Mon, 29 Jan 2024 21:22:00 +0200 Subject: [PATCH 1601/2290] wifi: iwlwifi: pcie: Add the PCI device id for new hardware [ Upstream commit 6770eee75148ba10c0c051885379714773e00b48 ] Add the support for a new PCI device id. Signed-off-by: Mukesh Sisodiya Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240129211905.fde32107e0a3.I597cff4f340e4bed12b7568a0ad504bd4b2c1cf8@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 4d4db5f6836be..7f30e6add9933 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -505,6 +505,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* Bz devices */ {IWL_PCI_DEVICE(0x2727, PCI_ANY_ID, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0x272D, PCI_ANY_ID, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0x272b, PCI_ANY_ID, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0xA840, PCI_ANY_ID, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0x7740, PCI_ANY_ID, iwl_bz_trans_cfg)}, -- GitLab From 94b016b28bdd417cf19f57da818bed28e7f3aaf1 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 7 Feb 2024 14:47:02 +0106 Subject: [PATCH 1602/2290] panic: Flush kernel log buffer at the end [ Upstream commit d988d9a9b9d180bfd5c1d353b3b176cb90d6861b ] If the kernel crashes in a context where printk() calls always defer printing (such as in NMI or inside a printk_safe section) then the final panic messages will be deferred to irq_work. But if irq_work is not available, the messages will not get printed unless explicitly flushed. The result is that the final "end Kernel panic" banner does not get printed. Add one final flush after the last printk() call to make sure the final panic messages make it out as well. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240207134103.1357162-14-john.ogness@linutronix.de Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/panic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index 63e94f3bd8dcd..e6c2bf04a32c0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -441,6 +441,14 @@ void panic(const char *fmt, ...) /* Do not scroll important messages printed above */ suppress_printk = 1; + + /* + * The final messages may not have been printed if in a context that + * defers printing (such as NMI) and irq_work is not available. + * Explicitly flush the kernel log buffer one last time. + */ + console_flush_on_panic(CONSOLE_FLUSH_PENDING); + local_irq_enable(); for (i = 0; ; i += PANIC_TIMER_STEP) { touch_softlockup_watchdog(); -- GitLab From b510fbe3a7bf26a6b8fcaa23d0275bb383f95dfd Mon Sep 17 00:00:00 2001 From: C Cheng Date: Tue, 19 Dec 2023 11:14:42 +0800 Subject: [PATCH 1603/2290] cpuidle: Avoid potential overflow in integer multiplication [ Upstream commit 88390dd788db485912ee7f9a8d3d56fc5265d52f ] In detail: In C language, when you perform a multiplication operation, if both operands are of int type, the multiplication operation is performed on the int type, and then the result is converted to the target type. This means that if the product of int type multiplication exceeds the range that int type can represent, an overflow will occur even if you store the result in a variable of int64_t type. For a multiplication of two int values, it is better to use mul_u32_u32() rather than s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC to avoid potential overflow happenning. Signed-off-by: C Cheng Signed-off-by: Bo Ye Reviewed-by: AngeloGioacchino Del Regno [ rjw: New subject ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpuidle/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index f70aa17e2a8e0..c594e28adddf3 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -185,7 +186,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) s->target_residency_ns = 0; if (s->exit_latency > 0) - s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; + s->exit_latency_ns = mul_u32_u32(s->exit_latency, NSEC_PER_USEC); else if (s->exit_latency_ns < 0) s->exit_latency_ns = 0; } -- GitLab From a2920489347653d586d8710ed1d315ed5c2bef75 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 31 Jan 2024 22:17:08 +0100 Subject: [PATCH 1604/2290] arm64: dts: rockchip: fix rk3328 hdmi ports node [ Upstream commit 1d00ba4700d1e0f88ae70d028d2e17e39078fa1c ] Fix rk3328 hdmi ports node so that it matches the rockchip,dw-hdmi.yaml binding. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/e5dea3b7-bf84-4474-9530-cc2da3c41104@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 905a50aa5dc38..d42846efff2fe 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -741,11 +741,20 @@ status = "disabled"; ports { - hdmi_in: port { + #address-cells = <1>; + #size-cells = <0>; + + hdmi_in: port@0 { + reg = <0>; + hdmi_in_vop: endpoint { remote-endpoint = <&vop_out_hdmi>; }; }; + + hdmi_out: port@1 { + reg = <1>; + }; }; }; -- GitLab From 6925d11fbda285c1b9613c43b30c3614a5552fc0 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 31 Jan 2024 22:17:31 +0100 Subject: [PATCH 1605/2290] arm64: dts: rockchip: fix rk3399 hdmi ports node [ Upstream commit f051b6ace7ffcc48d6d1017191f167c0a85799f6 ] Fix rk3399 hdmi ports node so that it matches the rockchip,dw-hdmi.yaml binding. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/a6ab6f75-3b80-40b1-bd30-3113e14becdd@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index a7e6eccb14cc6..8363cc13ec517 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1906,6 +1906,7 @@ hdmi: hdmi@ff940000 { compatible = "rockchip,rk3399-dw-hdmi"; reg = <0x0 0xff940000 0x0 0x20000>; + reg-io-width = <4>; interrupts = ; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, @@ -1914,13 +1915,16 @@ <&cru PLL_VPLL>; clock-names = "iahb", "isfr", "cec", "grf", "ref"; power-domains = <&power RK3399_PD_HDCP>; - reg-io-width = <4>; rockchip,grf = <&grf>; #sound-dai-cells = <0>; status = "disabled"; ports { - hdmi_in: port { + #address-cells = <1>; + #size-cells = <0>; + + hdmi_in: port@0 { + reg = <0>; #address-cells = <1>; #size-cells = <0>; @@ -1933,6 +1937,10 @@ remote-endpoint = <&vopl_out_hdmi>; }; }; + + hdmi_out: port@1 { + reg = <1>; + }; }; }; -- GitLab From b8ca15861c88cbc85e2fbc418dc829803b76a859 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 14 Feb 2024 09:59:01 -0800 Subject: [PATCH 1606/2290] ionic: set adminq irq affinity [ Upstream commit c699f35d658f3c21b69ed24e64b2ea26381e941d ] We claim to have the AdminQ on our irq0 and thus cpu id 0, but we need to be sure we set the affinity hint to try to keep it there. Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Jacob Keller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index fcc3faecb0600..d33cf8ee7c336 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3216,9 +3216,12 @@ static int ionic_lif_adminq_init(struct ionic_lif *lif) napi_enable(&qcq->napi); - if (qcq->flags & IONIC_QCQ_F_INTR) + if (qcq->flags & IONIC_QCQ_F_INTR) { + irq_set_affinity_hint(qcq->intr.vector, + &qcq->intr.affinity_mask); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_CLEAR); + } qcq->flags |= IONIC_QCQ_F_INITED; -- GitLab From 8af60bb2b215f478b886f1d6d302fefa7f0b917d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 16 Feb 2024 12:36:57 +0100 Subject: [PATCH 1607/2290] net: skbuff: add overflow debug check to pull/push helpers [ Upstream commit 219eee9c0d16f1b754a8b85275854ab17df0850a ] syzbot managed to trigger following splat: BUG: KASAN: use-after-free in __skb_flow_dissect+0x4a3b/0x5e50 Read of size 1 at addr ffff888208a4000e by task a.out/2313 [..] __skb_flow_dissect+0x4a3b/0x5e50 __skb_get_hash+0xb4/0x400 ip_tunnel_xmit+0x77e/0x26f0 ipip_tunnel_xmit+0x298/0x410 .. Analysis shows that the skb has a valid ->head, but bogus ->data pointer. skb->data gets its bogus value via the neigh layer, which does: 1556 __skb_pull(skb, skb_network_offset(skb)); ... and the skb was already dodgy at this point: skb_network_offset(skb) returns a negative value due to an earlier overflow of skb->network_header (u16). __skb_pull thus "adjusts" skb->data by a huge offset, pointing outside skb->head area. Allow debug builds to splat when we try to pull/push more than INT_MAX bytes. After this, the syzkaller reproducer yields a more precise splat before the flow dissector attempts to read off skb->data memory: WARNING: CPU: 5 PID: 2313 at include/linux/skbuff.h:2653 neigh_connected_output+0x28e/0x400 ip_finish_output2+0xb25/0xed0 iptunnel_xmit+0x4ff/0x870 ipgre_xmit+0x78e/0xbb0 Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240216113700.23013-1-fw@strlen.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c4a8520dc748f..1326a935b6fad 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2603,6 +2603,8 @@ static inline void skb_put_u8(struct sk_buff *skb, u8 val) void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->data -= len; skb->len += len; return skb->data; @@ -2611,6 +2613,8 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len) void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->len -= len; if (unlikely(skb->len < skb->data_len)) { #if defined(CONFIG_DEBUG_NET) @@ -2634,6 +2638,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + if (likely(len <= skb_headlen(skb))) return true; if (unlikely(len > skb->len)) -- GitLab From 161d6b803778c5a825d50bed14cfafe9b29e52ad Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 25 Dec 2023 20:03:56 +0100 Subject: [PATCH 1608/2290] firmware: tegra: bpmp: Return directly after a failed kzalloc() in get_filename() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1315848f1f8a0100cb6f8a7187bc320c5d98947f ] The kfree() function was called in one case by the get_filename() function during error handling even if the passed variable contained a null pointer. This issue was detected by using the Coccinelle software. Thus return directly after a call of the function “kzalloc” failed at the beginning. Signed-off-by: Markus Elfring Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/firmware/tegra/bpmp-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c index 9d3874cdaaeef..34e4152477f3b 100644 --- a/drivers/firmware/tegra/bpmp-debugfs.c +++ b/drivers/firmware/tegra/bpmp-debugfs.c @@ -81,7 +81,7 @@ static const char *get_filename(struct tegra_bpmp *bpmp, root_path_buf = kzalloc(root_path_buf_len, GFP_KERNEL); if (!root_path_buf) - goto out; + return NULL; root_path = dentry_path(bpmp->debugfs_mirror, root_path_buf, root_path_buf_len); -- GitLab From 8ead0a04a7f3dc1b475759599b31f450fa64ad3d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 22:36:49 +0100 Subject: [PATCH 1609/2290] wifi: brcmfmac: Add DMI nvram filename quirk for ACEPC W5 Pro [ Upstream commit 32167707aa5e7ae4b160c18be79d85a7b4fdfcfb ] The ACEPC W5 Pro HDMI stick contains quite generic names in the sys_vendor and product_name DMI strings, without this patch brcmfmac will try to load: "brcmfmac43455-sdio.$(DEFAULT_STRING)-$(DEFAULT_STRING).txt" as nvram file which is both too generic and messy with the $ symbols in the name. The ACEPC W5 Pro uses the same Ampak AP6255 module as the ACEPC T8 and the nvram for the T8 is already in linux-firmware, so point the new DMI nvram filename quirk to the T8 nvram file. Signed-off-by: Hans de Goede Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240216213649.251718-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c index 86ff174936a9a..c3a602197662b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c @@ -82,6 +82,15 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&acepc_t8_data, }, + { + /* ACEPC W5 Pro Cherry Trail Z8350 HDMI stick, same wifi as the T8 */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "T3 MRD"), + DMI_MATCH(DMI_CHASSIS_TYPE, "3"), + DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), + }, + .driver_data = (void *)&acepc_t8_data, + }, { /* Chuwi Hi8 Pro with D2D3_Hi8Pro.233 BIOS */ .matches = { -- GitLab From 635594cca59f9d7a8e96187600c34facb8bc0682 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 18:02:06 +0800 Subject: [PATCH 1610/2290] pstore/zone: Add a null pointer check to the psz_kmsg_read [ Upstream commit 98bc7e26e14fbb26a6abf97603d59532475e97f8 ] kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240118100206.213928-1-chentao@kylinos.cn Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- fs/pstore/zone.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 2770746bb7aa1..abca117725c81 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -973,6 +973,8 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n", kmsg_dump_reason_str(record->reason), record->count); + if (!buf) + return -ENOMEM; hlen = strlen(buf); record->buf = krealloc(buf, hlen + size, GFP_KERNEL); if (!record->buf) { -- GitLab From 51a9b20a047dd9d8f5dda99d55d5c3e15f92e2a4 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Tue, 13 Feb 2024 16:19:56 -0800 Subject: [PATCH 1611/2290] tools/power x86_energy_perf_policy: Fix file leak in get_pkg_num() [ Upstream commit f85450f134f0b4ca7e042dc3dc89155656a2299d ] In function get_pkg_num() if fopen_or_die() succeeds it returns a file pointer to be used. But fclose() is never called before returning from the function. Signed-off-by: Samasth Norway Ananda Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 5fd9e594079cf..ebda9c366b2ba 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -1241,6 +1241,7 @@ unsigned int get_pkg_num(int cpu) retval = fscanf(fp, "%d\n", &pkg); if (retval != 1) errx(1, "%s: failed to parse", pathname); + fclose(fp); return pkg; } -- GitLab From a3f6045ce3c947093305c91889415cdca412c33c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 22 Feb 2024 20:58:22 +0300 Subject: [PATCH 1612/2290] net: pcs: xpcs: Return EINVAL in the internal methods [ Upstream commit f5151005d379d9ce42e327fd3b2d2aaef61cda81 ] In particular the xpcs_soft_reset() and xpcs_do_config() functions currently return -1 if invalid auto-negotiation mode is specified. That value might be then passed to the generic kernel subsystems which require a standard kernel errno value. Even though the erroneous conditions are very specific (memory corruption or buggy driver implementation) using a hard-coded -1 literal doesn't seem correct anyway especially when it comes to passing it higher to the network subsystem or printing to the system log. Convert the hard-coded error values to -EINVAL then. Signed-off-by: Serge Semin Tested-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/pcs/pcs-xpcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 3f882bce37f42..d126273daab4f 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -262,7 +262,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs, dev = MDIO_MMD_VEND2; break; default: - return -1; + return -EINVAL; } ret = xpcs_write(xpcs, dev, MDIO_CTRL1, MDIO_CTRL1_RESET); @@ -904,7 +904,7 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, return ret; break; default: - return -1; + return -EINVAL; } if (compat->pma_config) { -- GitLab From 4e0cfb25d49da2e6261ad582f58ffa5b5dd8c8e9 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Wed, 21 Feb 2024 16:17:21 -0800 Subject: [PATCH 1613/2290] dma-direct: Leak pages on dma_set_decrypted() failure [ Upstream commit b9fa16949d18e06bdf728a560f5c8af56d2bdcaf ] On TDX it is possible for the untrusted host to cause set_memory_encrypted() or set_memory_decrypted() to fail such that an error is returned and the resulting memory is shared. Callers need to take care to handle these errors to avoid returning decrypted (shared) memory to the page allocator, which could lead to functional or security issues. DMA could free decrypted/shared pages if dma_set_decrypted() fails. This should be a rare case. Just leak the pages in this case instead of freeing them. Signed-off-by: Rick Edgecombe Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- kernel/dma/direct.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 63859a101ed83..d4215739efc71 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -296,7 +296,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, } else { ret = page_address(page); if (dma_set_decrypted(dev, ret, size)) - goto out_free_pages; + goto out_leak_pages; } memset(ret, 0, size); @@ -317,6 +317,8 @@ out_encrypt_pages: out_free_pages: __dma_direct_free_pages(dev, page, size); return NULL; +out_leak_pages: + return NULL; } void dma_direct_free(struct device *dev, size_t size, @@ -379,12 +381,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, ret = page_address(page); if (dma_set_decrypted(dev, ret, size)) - goto out_free_pages; + goto out_leak_pages; memset(ret, 0, size); *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; -out_free_pages: - __dma_direct_free_pages(dev, page, size); +out_leak_pages: return NULL; } -- GitLab From 6597a6687af54e2cb58371cf8f6ee4dd85c537de Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 23 Feb 2024 13:31:11 +0800 Subject: [PATCH 1614/2290] wifi: ath11k: decrease MHI channel buffer length to 8KB [ Upstream commit 1cca1bddf9ef080503c15378cecf4877f7510015 ] Currently buf_len field of ath11k_mhi_config_qca6390 is assigned with 0, making MHI use a default size, 64KB, to allocate channel buffers. This is likely to fail in some scenarios where system memory is highly fragmented and memory compaction or reclaim is not allowed. There is a fail report which is caused by it: kworker/u32:45: page allocation failure: order:4, mode:0x40c00(GFP_NOIO|__GFP_COMP), nodemask=(null),cpuset=/,mems_allowed=0 CPU: 0 PID: 19318 Comm: kworker/u32:45 Not tainted 6.8.0-rc3-1.gae4495f-default #1 openSUSE Tumbleweed (unreleased) 493b6d5b382c603654d7a81fc3c144d59a1dfceb Workqueue: events_unbound async_run_entry_fn Call Trace: dump_stack_lvl+0x47/0x60 warn_alloc+0x13a/0x1b0 ? srso_alias_return_thunk+0x5/0xfbef5 ? __alloc_pages_direct_compact+0xab/0x210 __alloc_pages_slowpath.constprop.0+0xd3e/0xda0 __alloc_pages+0x32d/0x350 ? mhi_prepare_channel+0x127/0x2d0 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] __kmalloc_large_node+0x72/0x110 __kmalloc+0x37c/0x480 ? mhi_map_single_no_bb+0x77/0xf0 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] ? mhi_prepare_channel+0x127/0x2d0 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] mhi_prepare_channel+0x127/0x2d0 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] __mhi_prepare_for_transfer+0x44/0x80 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] ? __pfx_____mhi_prepare_for_transfer+0x10/0x10 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] device_for_each_child+0x5c/0xa0 ? __pfx_pci_pm_resume+0x10/0x10 ath11k_core_resume+0x65/0x100 [ath11k a5094e22d7223135c40d93c8f5321cf09fd85e4e] ? srso_alias_return_thunk+0x5/0xfbef5 ath11k_pci_pm_resume+0x32/0x60 [ath11k_pci 830b7bfc3ea80ebef32e563cafe2cb55e9cc73ec] ? srso_alias_return_thunk+0x5/0xfbef5 dpm_run_callback+0x8c/0x1e0 device_resume+0x104/0x340 ? __pfx_dpm_watchdog_handler+0x10/0x10 async_resume+0x1d/0x30 async_run_entry_fn+0x32/0x120 process_one_work+0x168/0x330 worker_thread+0x2f5/0x410 ? __pfx_worker_thread+0x10/0x10 kthread+0xe8/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Actually those buffers are used only by QMI target -> host communication. And for WCN6855 and QCA6390, the largest packet size for that is less than 6KB. So change buf_len field to 8KB, which results in order 1 allocation if page size is 4KB. In this way, we can at least save some memory, and as well as decrease the possibility of allocation failure in those scenarios. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.30 Reported-by: Vlastimil Babka Closes: https://lore.kernel.org/ath11k/96481a45-3547-4d23-ad34-3a8f1d90c1cd@suse.cz/ Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20240223053111.29170-1-quic_bqiang@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/mhi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index a62ee05c54097..4bea36cc71085 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -105,7 +105,7 @@ static struct mhi_controller_config ath11k_mhi_config_qca6390 = { .max_channels = 128, .timeout_ms = 2000, .use_bounce_buf = false, - .buf_len = 0, + .buf_len = 8192, .num_channels = ARRAY_SIZE(ath11k_mhi_channels_qca6390), .ch_cfg = ath11k_mhi_channels_qca6390, .num_events = ARRAY_SIZE(ath11k_mhi_events_qca6390), -- GitLab From 50bd749c60ceb5180f5bf9e37782b5afa04169b6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 29 Feb 2024 13:42:07 +0530 Subject: [PATCH 1615/2290] cpufreq: Don't unregister cpufreq cooling on CPU hotplug [ Upstream commit c4d61a529db788d2e52654f5b02c8d1de4952c5b ] Offlining a CPU and bringing it back online is a common operation and it happens frequently during system suspend/resume, where the non-boot CPUs are hotplugged out during suspend and brought back at resume. The cpufreq core already tries to make this path as fast as possible as the changes are only temporary in nature and full cleanup of resources isn't required in this case. For example the drivers can implement online()/offline() callbacks to avoid a lot of tear down of resources. On similar lines, there is no need to unregister the cpufreq cooling device during suspend / resume, but only while the policy is getting removed. Moreover, unregistering the cpufreq cooling device is resulting in an unwanted outcome, where the system suspend is eventually aborted in the process. Currently, during system suspend the cpufreq core unregisters the cooling device, which in turn removes a kobject using device_del() and that generates a notification to the userspace via uevent broadcast. This causes system suspend to abort in some setups. This was also earlier reported (indirectly) by Roman [1]. Maybe there is another way around to fixing that problem properly, but this change makes sense anyways. Move the registering and unregistering of the cooling device to policy creation and removal times onlyy. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218521 Reported-by: Manaf Meethalavalappu Pallikunhi Reported-by: Roman Stratiienko Link: https://patchwork.kernel.org/project/linux-pm/patch/20220710164026.541466-1-r.stratiienko@gmail.com/ [1] Tested-by: Manaf Meethalavalappu Pallikunhi Signed-off-by: Viresh Kumar Reviewed-by: Dhruva Gole Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c8912756fc06d..91efa23e0e8f3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1525,7 +1525,8 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->ready) cpufreq_driver->ready(policy); - if (cpufreq_thermal_control_enabled(cpufreq_driver)) + /* Register cpufreq cooling only for a new policy */ + if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); pr_debug("initialization complete\n"); @@ -1609,11 +1610,6 @@ static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy) else policy->last_policy = policy->policy; - if (cpufreq_thermal_control_enabled(cpufreq_driver)) { - cpufreq_cooling_unregister(policy->cdev); - policy->cdev = NULL; - } - if (has_target()) cpufreq_exit_governor(policy); @@ -1674,6 +1670,15 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return; } + /* + * Unregister cpufreq cooling once all the CPUs of the policy are + * removed. + */ + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { + cpufreq_cooling_unregister(policy->cdev); + policy->cdev = NULL; + } + /* We did light-weight exit earlier, do full tear down now */ if (cpufreq_driver->offline) cpufreq_driver->exit(policy); -- GitLab From 36c2a2863bc3896243eb724dc3fd4cf9aea633f2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 23 Jan 2024 23:42:29 +0100 Subject: [PATCH 1616/2290] btrfs: handle chunk tree lookup error in btrfs_relocate_sys_chunks() [ Upstream commit 7411055db5ce64f836aaffd422396af0075fdc99 ] The unhandled case in btrfs_relocate_sys_chunks() loop is a corruption, as it could be caused only by two impossible conditions: - at first the search key is set up to look for a chunk tree item, with offset -1, this is an inexact search and the key->offset will contain the correct offset upon a successful search, a valid chunk tree item cannot have an offset -1 - after first successful search, the found_key corresponds to a chunk item, the offset is decremented by 1 before the next loop, it's impossible to find a chunk item there due to alignment and size constraints Reviewed-by: Josef Bacik Reviewed-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 03cfb425ea4ea..ab5d410d560e7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3381,7 +3381,17 @@ again: mutex_unlock(&fs_info->reclaim_bgs_lock); goto error; } - BUG_ON(ret == 0); /* Corruption */ + if (ret == 0) { + /* + * On the first search we would find chunk tree with + * offset -1, which is not possible. On subsequent + * loops this would find an existing item on an invalid + * offset (one less than the previous one, wrong + * alignment and size). + */ + ret = -EUCLEAN; + goto error; + } ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); -- GitLab From 0f30f95b918eb63c4ba6c687d843683d3850bf3a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 19 Jan 2024 21:19:18 +0100 Subject: [PATCH 1617/2290] btrfs: export: handle invalid inode or root reference in btrfs_get_parent() [ Upstream commit 26b66d1d366a375745755ca7365f67110bbf6bd5 ] The get_parent handler looks up a parent of a given dentry, this can be either a subvolume or a directory. The search is set up with offset -1 but it's never expected to find such item, as it would break allowed range of inode number or a root id. This means it's a corruption (ext4 also returns this error code). Reviewed-by: Josef Bacik Reviewed-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/export.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index fab7eb76e53b2..58b0f04d7123f 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -161,8 +161,15 @@ struct dentry *btrfs_get_parent(struct dentry *child) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto fail; + if (ret == 0) { + /* + * Key with offset of -1 found, there would have to exist an + * inode with such number or a root with such id. + */ + ret = -EUCLEAN; + goto fail; + } - BUG_ON(ret == 0); /* Key with offset of -1 found */ if (path->slots[0] == 0) { ret = -ENOENT; goto fail; -- GitLab From 9ae356c627b493323e1433dcb27a26917668c07c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 6 Feb 2024 22:47:13 +0100 Subject: [PATCH 1618/2290] btrfs: send: handle path ref underflow in header iterate_inode_ref() [ Upstream commit 3c6ee34c6f9cd12802326da26631232a61743501 ] Change BUG_ON to proper error handling if building the path buffer fails. The pointers are not printed so we don't accidentally leak kernel addresses. Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/send.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9f7ffd9ef6fd7..754a9fb0165fa 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1015,7 +1015,15 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, ret = PTR_ERR(start); goto out; } - BUG_ON(start < p->buf); + if (unlikely(start < p->buf)) { + btrfs_err(root->fs_info, + "send: path ref buffer underflow for key (%llu %u %llu)", + found_key->objectid, + found_key->type, + found_key->offset); + ret = -EINVAL; + goto out; + } } p->start = start; } else { -- GitLab From 5693dd6d3d01f0eea24401f815c98b64cb315b67 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 16 Feb 2024 14:06:37 -0800 Subject: [PATCH 1619/2290] ice: use relative VSI index for VFs instead of PF VSI number [ Upstream commit 11fbb1bfb5bc8c98b2d7db9da332b5e568f4aaab ] When initializing over virtchnl, the PF is required to pass a VSI ID to the VF as part of its capabilities exchange. The VF driver reports this value back to the PF in a variety of commands. The PF driver validates that this value matches the value it sent to the VF. Some hardware families such as the E700 series could use this value when reading RSS registers or communicating directly with firmware over the Admin Queue. However, E800 series hardware does not support any of these interfaces and the VF's only use for this value is to report it back to the PF. Thus, there is no requirement that this value be an actual VSI ID value of any kind. The PF driver already does not trust that the VF sends it a real VSI ID. The VSI structure is always looked up from the VF structure. The PF does validate that the VSI ID provided matches a VSI associated with the VF, but otherwise does not use the VSI ID for any purpose. Instead of reporting the VSI number relative to the PF space, report a fixed value of 1. When communicating with the VF over virtchnl, validate that the VSI number is returned appropriately. This avoids leaking information about the firmware of the PF state. Currently the ice driver only supplies a VF with a single VSI. However, it appears that virtchnl has some support for allowing multiple VSIs. I did not attempt to implement this. However, space is left open to allow further relative indexes if additional VSIs are provided in future feature development. For this reason, keep the ice_vc_isvalid_vsi_id function in place to allow extending it for multiple VSIs in the future. This change will also simplify handling of live migration in a future series. Since we no longer will provide a real VSI number to the VF, there will be no need to keep track of this number when migrating to a new host. Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 9 ++------- drivers/net/ethernet/intel/ice/ice_virtchnl.h | 9 +++++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 4b71392f60df1..e64bef490a174 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -493,7 +493,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE; vfres->max_mtu = ice_vc_get_max_frame_size(vf); - vfres->vsi_res[0].vsi_id = vf->lan_vsi_num; + vfres->vsi_res[0].vsi_id = ICE_VF_VSI_ID; vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV; vfres->vsi_res[0].num_queue_pairs = vsi->num_txq; ether_addr_copy(vfres->vsi_res[0].default_mac_addr, @@ -539,12 +539,7 @@ static void ice_vc_reset_vf_msg(struct ice_vf *vf) */ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) { - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - - vsi = ice_find_vsi(pf, vsi_id); - - return (vsi && (vsi->vf == vf)); + return vsi_id == ICE_VF_VSI_ID; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index b5a3fd8adbb4e..6073d3b2d2d65 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -18,6 +18,15 @@ */ #define ICE_MAX_MACADDR_PER_VF 18 +/* VFs only get a single VSI. For ice hardware, the VF does not need to know + * its VSI index. However, the virtchnl interface requires a VSI number, + * mainly due to legacy hardware. + * + * Since the VF doesn't need this information, report a static value to the VF + * instead of leaking any information about the PF or hardware setup. + */ +#define ICE_VF_VSI_ID 1 + struct ice_virtchnl_ops { int (*get_ver_msg)(struct ice_vf *vf, u8 *msg); int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg); -- GitLab From d7ee3bf0caf599c14db0bf4af7aacd6206ef8a23 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 2 Mar 2024 10:07:44 +0000 Subject: [PATCH 1620/2290] net/smc: reduce rtnl pressure in smc_pnet_create_pnetids_list() [ Upstream commit 00af2aa93b76b1bade471ad0d0525d4d29ca5cc0 ] Many syzbot reports show extreme rtnl pressure, and many of them hint that smc acquires rtnl in netns creation for no good reason [1] This patch returns early from smc_pnet_net_init() if there is no netdevice yet. I am not even sure why smc_pnet_create_pnetids_list() even exists, because smc_pnet_netdev_event() is also calling smc_pnet_add_base_pnetid() when handling NETDEV_UP event. [1] extract of typical syzbot reports 2 locks held by syz-executor.3/12252: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.4/12253: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.1/12257: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.2/12261: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.0/12265: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.3/12268: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.4/12271: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.1/12274: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.2/12280: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 Signed-off-by: Eric Dumazet Cc: Wenjia Zhang Cc: Jan Karcher Cc: "D. Wythe" Cc: Tony Lu Cc: Wen Gu Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20240302100744.3868021-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/smc/smc_pnet.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 25fb2fd186e22..21b8bf23e4ee6 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -802,6 +802,16 @@ static void smc_pnet_create_pnetids_list(struct net *net) u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *dev; + /* Newly created netns do not have devices. + * Do not even acquire rtnl. + */ + if (list_empty(&net->dev_base_head)) + return; + + /* Note: This might not be needed, because smc_pnet_netdev_event() + * is also calling smc_pnet_add_base_pnetid() when handling + * NETDEV_UP event. + */ rtnl_lock(); for_each_netdev(net, dev) smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); -- GitLab From b19fe5eea619d54eea59bb8a37c0f8d00ef0e912 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Thu, 18 Jan 2024 12:40:34 +0800 Subject: [PATCH 1621/2290] Bluetooth: btintel: Fix null ptr deref in btintel_read_version [ Upstream commit b79e040910101b020931ba0c9a6b77e81ab7f645 ] If hci_cmd_sync_complete() is triggered and skb is NULL, then hdev->req_skb is NULL, which will cause this issue. Reported-and-tested-by: syzbot+830d9e3fa61968246abd@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btintel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index bbad1207cdfd8..c77c06b84d86c 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -405,7 +405,7 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver) struct sk_buff *skb; skb = __hci_cmd_sync(hdev, 0xfc05, 0, NULL, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) { + if (IS_ERR_OR_NULL(skb)) { bt_dev_err(hdev, "Reading Intel version information failed (%ld)", PTR_ERR(skb)); return PTR_ERR(skb); -- GitLab From a83a7728e4871b69e40ccc255e075700820c87bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 Feb 2024 11:29:14 +0100 Subject: [PATCH 1622/2290] Bluetooth: btmtk: Add MODULE_FIRMWARE() for MT7922 [ Upstream commit 3e465a07cdf444140f16bc57025c23fcafdde997 ] Since dracut refers to the module info for defining the required firmware files and btmtk driver doesn't provide the firmware info for MT7922, the generate initrd misses the firmware, resulting in the broken Bluetooth. This patch simply adds the MODULE_FIRMWARE() for the missing entry for covering that. Link: https://bugzilla.suse.com/show_bug.cgi?id=1214133 Signed-off-by: Takashi Iwai Reviewed-by: Paul Menzel Reviewed-by: Matthias Brugger Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btmtk.c | 1 + drivers/bluetooth/btmtk.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 809762d64fc65..b77e337778a44 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -288,4 +288,5 @@ MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE_MT7622); MODULE_FIRMWARE(FIRMWARE_MT7663); MODULE_FIRMWARE(FIRMWARE_MT7668); +MODULE_FIRMWARE(FIRMWARE_MT7922); MODULE_FIRMWARE(FIRMWARE_MT7961); diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h index 2a88ea8e475e8..ee0b1d27aa5c0 100644 --- a/drivers/bluetooth/btmtk.h +++ b/drivers/bluetooth/btmtk.h @@ -4,6 +4,7 @@ #define FIRMWARE_MT7622 "mediatek/mt7622pr2h.bin" #define FIRMWARE_MT7663 "mediatek/mt7663pr2h.bin" #define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin" +#define FIRMWARE_MT7922 "mediatek/BT_RAM_CODE_MT7922_1_1_hdr.bin" #define FIRMWARE_MT7961 "mediatek/BT_RAM_CODE_MT7961_1_2_hdr.bin" #define HCI_EV_WMT 0xe4 -- GitLab From 48bfb4b03c5ff6e1fa1dc73fb915e150b0968c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Fri, 5 Jan 2024 14:58:36 -0300 Subject: [PATCH 1623/2290] drm/vc4: don't check if plane->state->fb == state->fb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5ee0d47dcf33efd8950b347dcf4d20bab12a3fa9 ] Currently, when using non-blocking commits, we can see the following kernel warning: [ 110.908514] ------------[ cut here ]------------ [ 110.908529] refcount_t: underflow; use-after-free. [ 110.908620] WARNING: CPU: 0 PID: 1866 at lib/refcount.c:87 refcount_dec_not_one+0xb8/0xc0 [ 110.908664] Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device cmac algif_hash aes_arm64 aes_generic algif_skcipher af_alg bnep hid_logitech_hidpp vc4 brcmfmac hci_uart btbcm brcmutil bluetooth snd_soc_hdmi_codec cfg80211 cec drm_display_helper drm_dma_helper drm_kms_helper snd_soc_core snd_compress snd_pcm_dmaengine fb_sys_fops sysimgblt syscopyarea sysfillrect raspberrypi_hwmon ecdh_generic ecc rfkill libaes i2c_bcm2835 binfmt_misc joydev snd_bcm2835(C) bcm2835_codec(C) bcm2835_isp(C) v4l2_mem2mem videobuf2_dma_contig snd_pcm bcm2835_v4l2(C) raspberrypi_gpiomem bcm2835_mmal_vchiq(C) videobuf2_v4l2 snd_timer videobuf2_vmalloc videobuf2_memops videobuf2_common snd videodev vc_sm_cma(C) mc hid_logitech_dj uio_pdrv_genirq uio i2c_dev drm fuse dm_mod drm_panel_orientation_quirks backlight ip_tables x_tables ipv6 [ 110.909086] CPU: 0 PID: 1866 Comm: kodi.bin Tainted: G C 6.1.66-v8+ #32 [ 110.909104] Hardware name: Raspberry Pi 3 Model B Rev 1.2 (DT) [ 110.909114] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 110.909132] pc : refcount_dec_not_one+0xb8/0xc0 [ 110.909152] lr : refcount_dec_not_one+0xb4/0xc0 [ 110.909170] sp : ffffffc00913b9c0 [ 110.909177] x29: ffffffc00913b9c0 x28: 000000556969bbb0 x27: 000000556990df60 [ 110.909205] x26: 0000000000000002 x25: 0000000000000004 x24: ffffff8004448480 [ 110.909230] x23: ffffff800570b500 x22: ffffff802e03a7bc x21: ffffffecfca68c78 [ 110.909257] x20: ffffff8002b42000 x19: ffffff802e03a600 x18: 0000000000000000 [ 110.909283] x17: 0000000000000011 x16: ffffffffffffffff x15: 0000000000000004 [ 110.909308] x14: 0000000000000fff x13: ffffffed577e47e0 x12: 0000000000000003 [ 110.909333] x11: 0000000000000000 x10: 0000000000000027 x9 : c912d0d083728c00 [ 110.909359] x8 : c912d0d083728c00 x7 : 65646e75203a745f x6 : 746e756f63666572 [ 110.909384] x5 : ffffffed579f62ee x4 : ffffffed579eb01e x3 : 0000000000000000 [ 110.909409] x2 : 0000000000000000 x1 : ffffffc00913b750 x0 : 0000000000000001 [ 110.909434] Call trace: [ 110.909441] refcount_dec_not_one+0xb8/0xc0 [ 110.909461] vc4_bo_dec_usecnt+0x4c/0x1b0 [vc4] [ 110.909903] vc4_cleanup_fb+0x44/0x50 [vc4] [ 110.910315] drm_atomic_helper_cleanup_planes+0x88/0xa4 [drm_kms_helper] [ 110.910669] vc4_atomic_commit_tail+0x390/0x9dc [vc4] [ 110.911079] commit_tail+0xb0/0x164 [drm_kms_helper] [ 110.911397] drm_atomic_helper_commit+0x1d0/0x1f0 [drm_kms_helper] [ 110.911716] drm_atomic_commit+0xb0/0xdc [drm] [ 110.912569] drm_mode_atomic_ioctl+0x348/0x4b8 [drm] [ 110.913330] drm_ioctl_kernel+0xec/0x15c [drm] [ 110.914091] drm_ioctl+0x24c/0x3b0 [drm] [ 110.914850] __arm64_sys_ioctl+0x9c/0xd4 [ 110.914873] invoke_syscall+0x4c/0x114 [ 110.914897] el0_svc_common+0xd0/0x118 [ 110.914917] do_el0_svc+0x38/0xd0 [ 110.914936] el0_svc+0x30/0x8c [ 110.914958] el0t_64_sync_handler+0x84/0xf0 [ 110.914979] el0t_64_sync+0x18c/0x190 [ 110.914996] ---[ end trace 0000000000000000 ]--- This happens because, although `prepare_fb` and `cleanup_fb` are perfectly balanced, we cannot guarantee consistency in the check plane->state->fb == state->fb. This means that sometimes we can increase the refcount in `prepare_fb` and don't decrease it in `cleanup_fb`. The opposite can also be true. In fact, the struct drm_plane .state shouldn't be accessed directly but instead, the `drm_atomic_get_new_plane_state()` helper function should be used. So, we could stick to this check, but using `drm_atomic_get_new_plane_state()`. But actually, this check is not really needed. We can increase and decrease the refcount symmetrically without problems. This is going to make the code more simple and consistent. Signed-off-by: Maíra Canal Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240105175908.242000-1-mcanal@igalia.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_plane.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index eb08020154f30..7e6648b277b25 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -1415,9 +1415,6 @@ static int vc4_prepare_fb(struct drm_plane *plane, drm_gem_plane_helper_prepare_fb(plane, state); - if (plane->state->fb == state->fb) - return 0; - return vc4_bo_inc_usecnt(bo); } @@ -1426,7 +1423,7 @@ static void vc4_cleanup_fb(struct drm_plane *plane, { struct vc4_bo *bo; - if (plane->state->fb == state->fb || !state->fb) + if (!state->fb) return; bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); -- GitLab From ac1e0f080a768e085c35878397e7caeafc48b47d Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 11:37:59 -0800 Subject: [PATCH 1624/2290] Input: synaptics-rmi4 - fail probing if memory allocation for "phys" fails [ Upstream commit bc4996184d56cfaf56d3811ac2680c8a0e2af56e ] While input core can work with input->phys set to NULL userspace might depend on it, so better fail probing if allocation fails. The system must be in a pretty bad shape for it to happen anyway. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240117073124.143636-1-chentao@kylinos.cn Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/rmi4/rmi_driver.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 258d5fe3d395c..aa32371f04af6 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -1196,7 +1196,11 @@ static int rmi_driver_probe(struct device *dev) } rmi_driver_set_input_params(rmi_dev, data->input); data->input->phys = devm_kasprintf(dev, GFP_KERNEL, - "%s/input0", dev_name(dev)); + "%s/input0", dev_name(dev)); + if (!data->input->phys) { + retval = -ENOMEM; + goto err; + } } retval = rmi_init_functions(data); -- GitLab From c87dd159189a582906ba6a7302d966ac56b80f13 Mon Sep 17 00:00:00 2001 From: Samuel Dionne-Riel Date: Thu, 21 Dec 2023 22:01:50 -0500 Subject: [PATCH 1625/2290] drm: panel-orientation-quirks: Add quirk for GPD Win Mini [ Upstream commit 2f862fdc0fd802e728b6ca96bc78ec3f01bf161e ] This adds a DMI orientation quirk for the GPD Win Mini panel. Signed-off-by: Samuel Dionne-Riel Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20231222030149.3740815-2-samuel@dionne-riel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index d5c15292ae937..3fe5e6439c401 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -117,6 +117,12 @@ static const struct drm_dmi_panel_orientation_data lcd1080x1920_leftside_up = { .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd1080x1920_rightside_up = { + .width = 1080, + .height = 1920, + .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, +}; + static const struct drm_dmi_panel_orientation_data lcd1200x1920_rightside_up = { .width = 1200, .height = 1920, @@ -279,6 +285,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1618-03") }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Mini */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1617-01") + }, + .driver_data = (void *)&lcd1080x1920_rightside_up, }, { /* I.T.Works TW891 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), -- GitLab From 5238e1c2bd3142c10fd41c16891244b20b928894 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Jan 2024 14:43:38 +0100 Subject: [PATCH 1626/2290] pinctrl: renesas: checker: Limit cfg reg enum checks to provided IDs [ Upstream commit 3803584a4e9b65bb5b013f862f55c5055aa86c25 ] If the number of provided enum IDs in a variable width config register description does not match the expected number, the checker uses the expected number for validating the individual enum IDs. However, this may cause out-of-bounds accesses on the array holding the enum IDs, leading to bogus enum_id conflict warnings. Worse, if the bug is an incorrect bit field description (e.g. accidentally using "12" instead of "-12" for a reserved field), thousands of warnings may be printed, overflowing the kernel log buffer. Fix this by limiting the enum ID check to the number of provided enum IDs. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/c7385f44f2faebb8856bcbb4e908d846fc1531fb.1705930809.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/pinctrl/renesas/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c index c91102d3f1d15..1c7f8caf7f7cd 100644 --- a/drivers/pinctrl/renesas/core.c +++ b/drivers/pinctrl/renesas/core.c @@ -921,9 +921,11 @@ static void __init sh_pfc_check_cfg_reg(const char *drvname, sh_pfc_err("reg 0x%x: var_field_width declares %u instead of %u bits\n", cfg_reg->reg, rw, cfg_reg->reg_width); - if (n != cfg_reg->nr_enum_ids) + if (n != cfg_reg->nr_enum_ids) { sh_pfc_err("reg 0x%x: enum_ids[] has %u instead of %u values\n", cfg_reg->reg, cfg_reg->nr_enum_ids, n); + n = cfg_reg->nr_enum_ids; + } check_enum_ids: sh_pfc_check_reg_enums(drvname, cfg_reg->reg, cfg_reg->enum_ids, n); -- GitLab From 53cb1e52c9db618c08335984d1ca80db220ccf09 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 10 Apr 2023 21:04:50 +0900 Subject: [PATCH 1627/2290] sysv: don't call sb_bread() with pointers_lock held [ Upstream commit f123dc86388cb669c3d6322702dc441abc35c31e ] syzbot is reporting sleep in atomic context in SysV filesystem [1], for sb_bread() is called with rw_spinlock held. A "write_lock(&pointers_lock) => read_lock(&pointers_lock) deadlock" bug and a "sb_bread() with write_lock(&pointers_lock)" bug were introduced by "Replace BKL for chain locking with sysvfs-private rwlock" in Linux 2.5.12. Then, "[PATCH] err1-40: sysvfs locking fix" in Linux 2.6.8 fixed the former bug by moving pointers_lock lock to the callers, but instead introduced a "sb_bread() with read_lock(&pointers_lock)" bug (which made this problem easier to hit). Al Viro suggested that why not to do like get_branch()/get_block()/ find_shared() in Minix filesystem does. And doing like that is almost a revert of "[PATCH] err1-40: sysvfs locking fix" except that get_branch() from with find_shared() is called without write_lock(&pointers_lock). Reported-by: syzbot Link: https://syzkaller.appspot.com/bug?extid=69b40dc5fd40f32c199f Suggested-by: Al Viro Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/0d195f93-a22a-49a2-0020-103534d6f7f6@I-love.SAKURA.ne.jp Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/sysv/itree.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 9925cfe571595..17c7d76770a0a 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -82,9 +82,6 @@ static inline sysv_zone_t *block_end(struct buffer_head *bh) return (sysv_zone_t*)((char*)bh->b_data + bh->b_size); } -/* - * Requires read_lock(&pointers_lock) or write_lock(&pointers_lock) - */ static Indirect *get_branch(struct inode *inode, int depth, int offsets[], @@ -104,15 +101,18 @@ static Indirect *get_branch(struct inode *inode, bh = sb_bread(sb, block); if (!bh) goto failure; + read_lock(&pointers_lock); if (!verify_chain(chain, p)) goto changed; add_chain(++p, bh, (sysv_zone_t*)bh->b_data + *++offsets); + read_unlock(&pointers_lock); if (!p->key) goto no_block; } return NULL; changed: + read_unlock(&pointers_lock); brelse(bh); *err = -EAGAIN; goto no_block; @@ -218,9 +218,7 @@ static int get_block(struct inode *inode, sector_t iblock, struct buffer_head *b goto out; reread: - read_lock(&pointers_lock); partial = get_branch(inode, depth, offsets, chain, &err); - read_unlock(&pointers_lock); /* Simplest case - block found, no allocation needed */ if (!partial) { @@ -290,9 +288,9 @@ static Indirect *find_shared(struct inode *inode, *top = 0; for (k = depth; k > 1 && !offsets[k-1]; k--) ; + partial = get_branch(inode, k, offsets, chain, &err); write_lock(&pointers_lock); - partial = get_branch(inode, k, offsets, chain, &err); if (!partial) partial = chain + k-1; /* -- GitLab From ee0b5f96b6d66a1e6698228dcb41df11ec7f352f Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Wed, 31 Jan 2024 10:50:57 -0800 Subject: [PATCH 1628/2290] scsi: lpfc: Fix possible memory leak in lpfc_rcv_padisc() [ Upstream commit 2ae917d4bcab80ab304b774d492e2fcd6c52c06b ] The call to lpfc_sli4_resume_rpi() in lpfc_rcv_padisc() may return an unsuccessful status. In such cases, the elsiocb is not issued, the completion is not called, and thus the elsiocb resource is leaked. Check return value after calling lpfc_sli4_resume_rpi() and conditionally release the elsiocb resource. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240131185112.149731-3-justintee8345@gmail.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nportdisc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index b86ff9fcdf0c6..f21396a0ba9d0 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -748,8 +748,10 @@ lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, /* Save the ELS cmd */ elsiocb->drvrTimeout = cmd; - lpfc_sli4_resume_rpi(ndlp, - lpfc_mbx_cmpl_resume_rpi, elsiocb); + if (lpfc_sli4_resume_rpi(ndlp, + lpfc_mbx_cmpl_resume_rpi, + elsiocb)) + kfree(elsiocb); goto out; } } -- GitLab From 13701b0f01b6524cfd405bca7ed7587859fd57fa Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Wed, 7 Feb 2024 19:21:32 -0700 Subject: [PATCH 1629/2290] isofs: handle CDs with bad root inode but good Joliet root directory [ Upstream commit 4243bf80c79211a8ca2795401add9c4a3b1d37ca ] I have a CD copy of the original Tom Clancy's Ghost Recon game from 2001. The disc mounts without error on Windows, but on Linux mounting fails with the message "isofs_fill_super: get root inode failed". The error originates in isofs_read_inode, which returns -EIO because de_len is 0. The superblock on this disc appears to be intentionally corrupt as a form of copy protection. When the root inode is unusable, instead of giving up immediately, try to continue with the Joliet file table. This fixes the Ghost Recon CD and probably other copy-protected CDs too. Signed-off-by: Alex Henrie Signed-off-by: Jan Kara Message-Id: <20240208022134.451490-1-alexhenrie24@gmail.com> Signed-off-by: Sasha Levin --- fs/isofs/inode.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index df9d70588b600..8a6c7fdc1d5fc 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -908,8 +908,22 @@ root_found: * we then decide whether to use the Joliet descriptor. */ inode = isofs_iget(s, sbi->s_firstdatazone, 0); - if (IS_ERR(inode)) - goto out_no_root; + + /* + * Fix for broken CDs with a corrupt root inode but a correct Joliet + * root directory. + */ + if (IS_ERR(inode)) { + if (joliet_level && sbi->s_firstdatazone != first_data_zone) { + printk(KERN_NOTICE + "ISOFS: root inode is unusable. " + "Disabling Rock Ridge and switching to Joliet."); + sbi->s_rock = 0; + inode = NULL; + } else { + goto out_no_root; + } + } /* * Fix for broken CDs with Rock Ridge and empty ISO root directory but -- GitLab From eae948ecd5f019b65e78ed9e6f75945cf4e793e3 Mon Sep 17 00:00:00 2001 From: mosomate Date: Thu, 8 Feb 2024 10:55:40 -0600 Subject: [PATCH 1630/2290] ASoC: Intel: common: DMI remap for rebranded Intel NUC M15 (LAPRC710) laptops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c13e03126a5be90781084437689724254c8226e1 ] Added DMI quirk to handle the rebranded variants of Intel NUC M15 (LAPRC710) laptops. The DMI matching is based on motherboard attributes. Link: https://github.com/thesofproject/linux/issues/4218 Signed-off-by: Máté Mosonyi Reviewed-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20240208165545.93811-20-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/soundwire/dmi-quirks.c | 8 ++++++++ sound/soc/intel/boards/sof_sdw.c | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c index 9ebdd0cd0b1cf..91ab97a456fa9 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -130,6 +130,14 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { }, .driver_data = (void *)intel_rooks_county, }, + { + /* quirk used for NUC15 LAPRC710 skew */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "LAPRC710"), + }, + .driver_data = (void *)intel_rooks_county, + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 985012f2003e2..d1e6e4208c376 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -224,6 +224,17 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_SDW_PCH_DMIC | RT711_JD2_100K), }, + { + /* NUC15 LAPRC710 skews */ + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "LAPRC710"), + }, + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_SDW_PCH_DMIC | + RT711_JD2_100K), + }, /* TigerLake-SDCA devices */ { .callback = sof_sdw_quirk_cb, -- GitLab From 3f3c1e735d3e9169df46f724eeb41a0c0e458051 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Dec 2023 09:33:29 -0800 Subject: [PATCH 1631/2290] rcu-tasks: Repair RCU Tasks Trace quiescence check [ Upstream commit 2eb52fa8900e642b3b5054c4bf9776089d2a935f ] The context-switch-time check for RCU Tasks Trace quiescence expects current->trc_reader_special.b.need_qs to be zero, and if so, updates it to TRC_NEED_QS_CHECKED. This is backwards, because if this value is zero, there is no RCU Tasks Trace grace period in flight, an thus no need for a quiescent state. Instead, when a grace period starts, this field is set to TRC_NEED_QS. This commit therefore changes the check from zero to TRC_NEED_QS. Reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney Tested-by: Steven Rostedt (Google) Signed-off-by: Boqun Feng Signed-off-by: Sasha Levin --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 319698087d66a..6858cae98da9e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -205,9 +205,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); do { \ int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ \ - if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ + if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \ likely(!___rttq_nesting)) { \ - rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ + rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \ } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ rcu_tasks_trace_qs_blkd(t); \ -- GitLab From 2e2177f94c0e0bc41323d7b6975a5f4820ed347e Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 14 Feb 2024 15:57:53 -0500 Subject: [PATCH 1632/2290] Julia Lawall reported this null pointer dereference, this should fix it. [ Upstream commit 9bf93dcfc453fae192fe5d7874b89699e8f800ac ] Signed-off-by: Mike Marshall Signed-off-by: Sasha Levin --- fs/orangefs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 5254256a224d7..4ca8ed410c3cf 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -527,7 +527,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL); if (!ORANGEFS_SB(sb)) { d = ERR_PTR(-ENOMEM); - goto free_sb_and_op; + goto free_op; } ret = orangefs_fill_sb(sb, -- GitLab From 84ed33a08218582ecda3c82d93d1efa9aadf7770 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:54:47 +0100 Subject: [PATCH 1633/2290] media: sta2x11: fix irq handler cast [ Upstream commit 3de49ae81c3a0f83a554ecbce4c08e019f30168e ] clang-16 warns about casting incompatible function pointers: drivers/media/pci/sta2x11/sta2x11_vip.c:1057:6: error: cast from 'irqreturn_t (*)(int, struct sta2x11_vip *)' (aka 'enum irqreturn (*)(int, struct sta2x11_vip *)') to 'irq_handler_t' (aka 'enum irqreturn (*)(int, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] Change the prototype of the irq handler to the regular version with a local variable to adjust the argument type. Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil [hverkuil: update argument documentation] Signed-off-by: Sasha Levin --- drivers/media/pci/sta2x11/sta2x11_vip.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c index 8535e49a4c4f9..1f7ab56de4a00 100644 --- a/drivers/media/pci/sta2x11/sta2x11_vip.c +++ b/drivers/media/pci/sta2x11/sta2x11_vip.c @@ -756,7 +756,7 @@ static const struct video_device video_dev_template = { /** * vip_irq - interrupt routine * @irq: Number of interrupt ( not used, correct number is assumed ) - * @vip: local data structure containing all information + * @data: local data structure containing all information * * check for both frame interrupts set ( top and bottom ). * check FIFO overflow, but limit number of log messages after open. @@ -766,8 +766,9 @@ static const struct video_device video_dev_template = { * * IRQ_HANDLED, interrupt done. */ -static irqreturn_t vip_irq(int irq, struct sta2x11_vip *vip) +static irqreturn_t vip_irq(int irq, void *data) { + struct sta2x11_vip *vip = data; unsigned int status; status = reg_read(vip, DVP_ITS); @@ -1049,9 +1050,7 @@ static int sta2x11_vip_init_one(struct pci_dev *pdev, spin_lock_init(&vip->slock); - ret = request_irq(pdev->irq, - (irq_handler_t) vip_irq, - IRQF_SHARED, KBUILD_MODNAME, vip); + ret = request_irq(pdev->irq, vip_irq, IRQF_SHARED, KBUILD_MODNAME, vip); if (ret) { dev_err(&pdev->dev, "request_irq failed\n"); ret = -ENODEV; -- GitLab From b75395ec4b7a63840cd7c7071158f70d508a5965 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 18 Feb 2024 16:41:27 +0900 Subject: [PATCH 1634/2290] ALSA: firewire-lib: handle quirk to calculate payload quadlets as data block counter [ Upstream commit 4a486439d2ca85752c46711f373b6ddc107bb35d ] Miglia Harmony Audio (OXFW970) has a quirk to put the number of accumulated quadlets in CIP payload into the dbc field of CIP header. This commit handles the quirk in the packet processing layer. Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20240218074128.95210-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/firewire/amdtp-stream.c | 12 ++++++++---- sound/firewire/amdtp-stream.h | 4 ++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index f8b644cb9157a..8753125683692 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -771,10 +771,14 @@ static int check_cip_header(struct amdtp_stream *s, const __be32 *buf, } else { unsigned int dbc_interval; - if (*data_blocks > 0 && s->ctx_data.tx.dbc_interval > 0) - dbc_interval = s->ctx_data.tx.dbc_interval; - else - dbc_interval = *data_blocks; + if (!(s->flags & CIP_DBC_IS_PAYLOAD_QUADLETS)) { + if (*data_blocks > 0 && s->ctx_data.tx.dbc_interval > 0) + dbc_interval = s->ctx_data.tx.dbc_interval; + else + dbc_interval = *data_blocks; + } else { + dbc_interval = payload_length / sizeof(__be32); + } lost = dbc != ((*data_block_counter + dbc_interval) & 0xff); } diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index 1f957c946c956..cf9ab347277f2 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -37,6 +37,9 @@ * the value of current SYT_INTERVAL; e.g. initial value is not zero. * @CIP_UNAWARE_SYT: For outgoing packet, the value in SYT field of CIP is 0xffff. * For incoming packet, the value in SYT field of CIP is not handled. + * @CIP_DBC_IS_PAYLOAD_QUADLETS: Available for incoming packet, and only effective with + * CIP_DBC_IS_END_EVENT flag. The value of dbc field is the number of accumulated quadlets + * in CIP payload, instead of the number of accumulated data blocks. */ enum cip_flags { CIP_NONBLOCKING = 0x00, @@ -51,6 +54,7 @@ enum cip_flags { CIP_NO_HEADER = 0x100, CIP_UNALIGHED_DBC = 0x200, CIP_UNAWARE_SYT = 0x400, + CIP_DBC_IS_PAYLOAD_QUADLETS = 0x800, }; /** -- GitLab From baba35106855929782841b4126669b890f9cbdcb Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Fri, 19 Jan 2024 14:11:54 +0800 Subject: [PATCH 1635/2290] ext4: add a hint for block bitmap corrupt state in mb_groups [ Upstream commit 68ee261fb15457ecb17e3683cb4e6a4792ca5b71 ] If one group is marked as block bitmap corrupted, its free blocks cannot be used and its free count is also deducted from the global sbi->s_freeclusters_counter. User might be confused about the absent free space because we can't query the information about corrupted block groups except unreliable error messages in syslog. So add a hint to show block bitmap corrupted groups in mb_groups. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240119061154.1525781-1-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bc0ca45a5d817..a843f964332c2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2905,7 +2905,10 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); - seq_puts(seq, " ]\n"); + seq_puts(seq, " ]"); + if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info)) + seq_puts(seq, " Block bitmap corrupted!"); + seq_puts(seq, "\n"); return 0; } -- GitLab From aba664845af3dab8280338e1ed9751a58c9e5130 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 19 Jan 2024 14:29:08 +0800 Subject: [PATCH 1636/2290] ext4: forbid commit inconsistent quota data when errors=remount-ro [ Upstream commit d8b945fa475f13d787df00c26a6dc45a3e2e1d1d ] There's issue as follows When do IO fault injection test: Quota error (device dm-3): find_block_dqentry: Quota for id 101 referenced but not present Quota error (device dm-3): qtree_read_dquot: Can't read quota structure for id 101 Quota error (device dm-3): do_check_range: Getting block 2021161007 out of range 1-186 Quota error (device dm-3): qtree_read_dquot: Can't read quota structure for id 661 Now, ext4_write_dquot()/ext4_acquire_dquot()/ext4_release_dquot() may commit inconsistent quota data even if process failed. This may lead to filesystem corruption. To ensure filesystem consistent when errors=remount-ro there is need to call ext4_handle_error() to abort journal. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240119062908.3598806-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/super.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 601e097e17207..274542d869d0c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6751,6 +6751,10 @@ static int ext4_write_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to commit dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6767,6 +6771,10 @@ static int ext4_acquire_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to acquire dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6786,6 +6794,10 @@ static int ext4_release_dquot(struct dquot *dquot) return PTR_ERR(handle); } ret = dquot_release(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to release dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; -- GitLab From 354a5d7bb740781079e79cab692a94dd9dd38f36 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Thu, 29 Aug 2019 11:53:52 -0400 Subject: [PATCH 1637/2290] drm/amd/display: Fix nanosec stat overflow [ Upstream commit 14d68acfd04b39f34eea7bea65dda652e6db5bf6 ] [Why] Nanosec stats can overflow on long running systems potentially causing statistic logging issues. [How] Use 64bit types for nanosec stats to ensure no overflow. Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/modules/inc/mod_stats.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h index 4220fd8fdd60c..54cd86060f4d6 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h @@ -57,10 +57,10 @@ void mod_stats_update_event(struct mod_stats *mod_stats, unsigned int length); void mod_stats_update_flip(struct mod_stats *mod_stats, - unsigned long timestamp_in_ns); + unsigned long long timestamp_in_ns); void mod_stats_update_vupdate(struct mod_stats *mod_stats, - unsigned long timestamp_in_ns); + unsigned long long timestamp_in_ns); void mod_stats_update_freesync(struct mod_stats *mod_stats, unsigned int v_total_min, -- GitLab From 3a94feab045f51bdb3991a248f0a84f339f17a65 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 23 Feb 2024 17:08:16 +0530 Subject: [PATCH 1638/2290] drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eb4f139888f636614dab3bcce97ff61cefc4b3a7 ] This ensures that the memory mapped by ioremap for adev->rmmio, is properly handled in amdgpu_device_init(). If the function exits early due to an error, the memory is unmapped. If the function completes successfully, the memory remains mapped. Reported by smatch: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4337 amdgpu_device_init() warn: 'adev->rmmio' from ioremap() not released on lines: 4035,4045,4051,4058,4068,4337 Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b11690a816e73..e4eb906806a51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3713,8 +3713,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); - if (!adev->reset_domain) - return -ENOMEM; + if (!adev->reset_domain) { + r = -ENOMEM; + goto unmap_memory; + } /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); @@ -3722,18 +3724,18 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - return r; + goto unmap_memory; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - return r; + goto unmap_memory; /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) - return r; + goto unmap_memory; /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -3743,7 +3745,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); if (r) - return r; + goto unmap_memory; } /* enable PCIE atomic ops */ @@ -3999,6 +4001,8 @@ release_ras_con: failed: amdgpu_vf_error_trans_all(adev); +unmap_memory: + iounmap(adev->rmmio); return r; } -- GitLab From 43be051f35f934a2f348ea7d83de70acee578f05 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Tue, 30 Jan 2024 11:38:25 -0800 Subject: [PATCH 1639/2290] SUNRPC: increase size of rpc_wait_queue.qlen from unsigned short to unsigned int [ Upstream commit 2c35f43b5a4b9cdfaa6fdd946f5a212615dac8eb ] When the NFS client is under extreme load the rpc_wait_queue.qlen counter can be overflowed. Here is an instant of the backlog queue overflow in a real world environment shown by drgn helper: rpc_task_stats(rpc_clnt): ------------------------- rpc_clnt: 0xffff92b65d2bae00 rpc_xprt: 0xffff9275db64f000 Queue: sending[64887] pending[524] backlog[30441] binding[0] XMIT task: 0xffff925c6b1d8e98 WRITE: 750654 __dta_call_status_580: 65463 __dta_call_transmit_status_579: 1 call_reserveresult: 685189 nfs_client_init_is_complete: 1 COMMIT: 584 call_reserveresult: 573 __dta_call_status_580: 11 ACCESS: 1 __dta_call_status_580: 1 GETATTR: 10 __dta_call_status_580: 4 call_reserveresult: 6 751249 tasks for server 111.222.333.444 Total tasks: 751249 count_rpc_wait_queues(xprt): ---------------------------- **** rpc_xprt: 0xffff9275db64f000 num_reqs: 65511 wait_queue: xprt_binding[0] cnt: 0 wait_queue: xprt_binding[1] cnt: 0 wait_queue: xprt_binding[2] cnt: 0 wait_queue: xprt_binding[3] cnt: 0 rpc_wait_queue[xprt_binding].qlen: 0 maxpriority: 0 wait_queue: xprt_sending[0] cnt: 0 wait_queue: xprt_sending[1] cnt: 64887 wait_queue: xprt_sending[2] cnt: 0 wait_queue: xprt_sending[3] cnt: 0 rpc_wait_queue[xprt_sending].qlen: 64887 maxpriority: 3 wait_queue: xprt_pending[0] cnt: 524 wait_queue: xprt_pending[1] cnt: 0 wait_queue: xprt_pending[2] cnt: 0 wait_queue: xprt_pending[3] cnt: 0 rpc_wait_queue[xprt_pending].qlen: 524 maxpriority: 0 wait_queue: xprt_backlog[0] cnt: 0 wait_queue: xprt_backlog[1] cnt: 685801 wait_queue: xprt_backlog[2] cnt: 0 wait_queue: xprt_backlog[3] cnt: 0 rpc_wait_queue[xprt_backlog].qlen: 30441 maxpriority: 3 [task cnt mismatch] There is no effect on operations when this overflow occurs. However it causes confusion when trying to diagnose the performance problem. Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- include/linux/sunrpc/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8ada7dc802d30..8f9bee0e21c3b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -186,7 +186,7 @@ struct rpc_wait_queue { unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ - unsigned short qlen; /* total # tasks waiting in queue */ + unsigned int qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; -- GitLab From aaefa79c1532cfe6bd3702a113ed3eba921094fe Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 28 Feb 2024 08:53:16 +0100 Subject: [PATCH 1640/2290] Revert "ACPI: PM: Block ASUS B1400CEAE from suspend to idle by default" [ Upstream commit cb98555fcd8eee98c30165537c7e394f3a66e809 ] This reverts commit d52848620de00cde4a3a5df908e231b8c8868250, which was originally put in place to work around a s2idle failure on this platform where the NVMe device was inaccessible upon resume. After extended testing, we found that the firmware's implementation of S3 is buggy and intermittently fails to wake up the system. We need to revert to s2idle mode. The NVMe issue has now been solved more precisely in the commit titled "PCI: Disable D3cold on Asus B1400 PCI-NVMe bridge" Link: https://bugzilla.kernel.org/show_bug.cgi?id=215742 Link: https://lore.kernel.org/r/20240228075316.7404-2-drake@endlessos.org Signed-off-by: Daniel Drake Signed-off-by: Bjorn Helgaas Acked-by: Jian-Hong Pan Acked-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/sleep.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 539c12fbd2f14..6026e20f022a2 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -385,18 +385,6 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), }, }, - /* - * ASUS B1400CEAE hangs on resume from suspend (see - * https://bugzilla.kernel.org/show_bug.cgi?id=215742). - */ - { - .callback = init_default_s3, - .ident = "ASUS B1400CEAE", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"), - }, - }, {}, }; -- GitLab From b7d153bfba62f816c3bc0afa862b3e869f838d55 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 28 Feb 2024 23:07:57 -0800 Subject: [PATCH 1641/2290] libperf evlist: Avoid out-of-bounds access [ Upstream commit 1947b92464c3268381604bbe2ac977a3fd78192f ] Parallel testing appears to show a race between allocating and setting evsel ids. As there is a bounds check on the xyarray it yields a segv like: ``` AddressSanitizer:DEADLYSIGNAL ================================================================= ==484408==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000010 ==484408==The signal is caused by a WRITE memory access. ==484408==Hint: address points to the zero page. #0 0x55cef5d4eff4 in perf_evlist__id_hash tools/lib/perf/evlist.c:256 #1 0x55cef5d4f132 in perf_evlist__id_add tools/lib/perf/evlist.c:274 #2 0x55cef5d4f545 in perf_evlist__id_add_fd tools/lib/perf/evlist.c:315 #3 0x55cef5a1923f in store_evsel_ids util/evsel.c:3130 #4 0x55cef5a19400 in evsel__store_ids util/evsel.c:3147 #5 0x55cef5888204 in __run_perf_stat tools/perf/builtin-stat.c:832 #6 0x55cef5888c06 in run_perf_stat tools/perf/builtin-stat.c:960 #7 0x55cef58932db in cmd_stat tools/perf/builtin-stat.c:2878 ... ``` Avoid this crash by early exiting the perf_evlist__id_add_fd and perf_evlist__id_add is the access is out-of-bounds. Signed-off-by: Ian Rogers Cc: Yang Jihong Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240229070757.796244-1-irogers@google.com Signed-off-by: Sasha Levin --- tools/lib/perf/evlist.c | 18 ++++++++++++------ tools/lib/perf/include/internal/evlist.h | 4 ++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 61b637f29b827..b871923c7e5cd 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -233,10 +233,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist) static void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); + struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread); sid->id = id; sid->evsel = evsel; @@ -254,21 +254,27 @@ void perf_evlist__reset_id_hash(struct perf_evlist *evlist) void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + if (!SID(evsel, cpu_map_idx, thread)) + return; + + perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id); evsel->id[evsel->ids++] = id; } int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd) + int cpu_map_idx, int thread, int fd) { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ u64 id; int ret; + if (!SID(evsel, cpu_map_idx, thread)) + return -1; + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); if (!ret) goto add; @@ -297,7 +303,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, id = read_data[id_idx]; add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); + perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id); return 0; } diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 850f07070036c..cf77db75291b9 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -127,11 +127,11 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist); void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id); + int cpu_map_idx, int thread, u64 id); int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd); + int cpu_map_idx, int thread, int fd); void perf_evlist__reset_id_hash(struct perf_evlist *evlist); -- GitLab From c87e811cae7e1865046f64ce130e58f2af0a72c0 Mon Sep 17 00:00:00 2001 From: Markuss Broks Date: Fri, 1 Mar 2024 17:41:00 +0100 Subject: [PATCH 1642/2290] input/touchscreen: imagis: Correct the maximum touch area value [ Upstream commit 54a62ed17a705ef1ac80ebca2b62136b19243e19 ] As specified in downstream IST3038B driver and proved by testing, the correct maximum reported value of touch area is 16. Signed-off-by: Markuss Broks Signed-off-by: Karel Balej Link: https://lore.kernel.org/r/20240301164659.13240-2-karelb@gimli.ms.mff.cuni.cz Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/imagis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/imagis.c b/drivers/input/touchscreen/imagis.c index e2697e6c6d2a0..b667914a44f1d 100644 --- a/drivers/input/touchscreen/imagis.c +++ b/drivers/input/touchscreen/imagis.c @@ -210,7 +210,7 @@ static int imagis_init_input_dev(struct imagis_ts *ts) input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X); input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y); - input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); + input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, 16, 0, 0); touchscreen_parse_properties(input_dev, true, &ts->prop); if (!ts->prop.max_x || !ts->prop.max_y) { -- GitLab From 512a01da7134bac8f8b373506011e8aaa3283854 Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Tue, 5 Mar 2024 16:45:09 +0300 Subject: [PATCH 1643/2290] block: prevent division by zero in blk_rq_stat_sum() [ Upstream commit 93f52fbeaf4b676b21acfe42a5152620e6770d02 ] The expression dst->nr_samples + src->nr_samples may have zero value on overflow. It is necessary to add a check to avoid division by zero. Found by Linux Verification Center (linuxtesting.org) with Svace. Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20240305134509.23108-1-r.smirnov@omp.ru Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-stat.c b/block/blk-stat.c index da9407b7d4abf..41be89ecaf20e 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -28,7 +28,7 @@ void blk_rq_stat_init(struct blk_rq_stat *stat) /* src is a per-cpu stat, mean isn't initialized */ void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { - if (!src->nr_samples) + if (dst->nr_samples + src->nr_samples <= dst->nr_samples) return; dst->min = min(dst->min, src->min); -- GitLab From 4097b1f10f044e97f71b960312d9894de0d0929b Mon Sep 17 00:00:00 2001 From: Manjunath Patil Date: Fri, 8 Mar 2024 22:33:23 -0800 Subject: [PATCH 1644/2290] RDMA/cm: add timeout to cm_destroy_id wait [ Upstream commit 96d9cbe2f2ff7abde021bac75eafaceabe9a51fa ] Add timeout to cm_destroy_id, so that userspace can trigger any data collection that would help in analyzing the cause of delay in destroying the cm_id. New noinline function helps dtrace/ebpf programs to hook on to it. Existing functionality isn't changed except triggering a probe-able new function at every timeout interval. We have seen cases where CM messages stuck with MAD layer (either due to software bug or faulty HCA), leading to cm_id getting stuck in the following call stack. This patch helps in resolving such issues faster. kernel: ... INFO: task XXXX:56778 blocked for more than 120 seconds. ... Call Trace: __schedule+0x2bc/0x895 schedule+0x36/0x7c schedule_timeout+0x1f6/0x31f ? __slab_free+0x19c/0x2ba wait_for_completion+0x12b/0x18a ? wake_up_q+0x80/0x73 cm_destroy_id+0x345/0x610 [ib_cm] ib_destroy_cm_id+0x10/0x20 [ib_cm] rdma_destroy_id+0xa8/0x300 [rdma_cm] ucma_destroy_id+0x13e/0x190 [rdma_ucm] ucma_write+0xe0/0x160 [rdma_ucm] __vfs_write+0x3a/0x16d vfs_write+0xb2/0x1a1 ? syscall_trace_enter+0x1ce/0x2b8 SyS_write+0x5c/0xd3 do_syscall_64+0x79/0x1b9 entry_SYSCALL_64_after_hwframe+0x16d/0x0 Signed-off-by: Manjunath Patil Link: https://lore.kernel.org/r/20240309063323.458102-1-manjunath.b.patil@oracle.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b7f9023442890..462a10d6a5762 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -34,6 +34,7 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); +#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */ static const char * const ibcm_rej_reason_strs[] = { [IB_CM_REJ_NO_QP] = "no QP", [IB_CM_REJ_NO_EEC] = "no EEC", @@ -1025,10 +1026,20 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) } } +static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__, + cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount)); +} + static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; + int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irq(&cm_id_priv->lock); @@ -1135,7 +1146,14 @@ retest: xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id)); cm_deref_id(cm_id_priv); - wait_for_completion(&cm_id_priv->comp); + do { + ret = wait_for_completion_timeout(&cm_id_priv->comp, + msecs_to_jiffies( + CM_DESTROY_ID_WAIT_TIMEOUT)); + if (!ret) /* timeout happened */ + cm_destroy_id_wait_timeout(cm_id); + } while (!ret); + while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); -- GitLab From 055e406d7b0eed9ab874fb2e62ab8141ffb9bf38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= Date: Sat, 9 Mar 2024 21:18:05 -0800 Subject: [PATCH 1645/2290] Input: imagis - use FIELD_GET where applicable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c0ca3dbd03d66c6b9e044f48720e6ab5cef37ae5 ] Instead of manually extracting certain bits from registers with binary ANDs and shifts, the FIELD_GET macro can be used. With this in mind, the *_SHIFT macros can be dropped. Signed-off-by: Duje Mihanović Link: https://lore.kernel.org/r/20240306-b4-imagis-keys-v3-1-2c429afa8420@skole.hr Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/imagis.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/input/touchscreen/imagis.c b/drivers/input/touchscreen/imagis.c index b667914a44f1d..2636e1c9435d8 100644 --- a/drivers/input/touchscreen/imagis.c +++ b/drivers/input/touchscreen/imagis.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include @@ -23,12 +24,9 @@ #define IST3038C_I2C_RETRY_COUNT 3 #define IST3038C_MAX_FINGER_NUM 10 #define IST3038C_X_MASK GENMASK(23, 12) -#define IST3038C_X_SHIFT 12 #define IST3038C_Y_MASK GENMASK(11, 0) #define IST3038C_AREA_MASK GENMASK(27, 24) -#define IST3038C_AREA_SHIFT 24 #define IST3038C_FINGER_COUNT_MASK GENMASK(15, 12) -#define IST3038C_FINGER_COUNT_SHIFT 12 #define IST3038C_FINGER_STATUS_MASK GENMASK(9, 0) struct imagis_ts { @@ -92,8 +90,7 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id) goto out; } - finger_count = (intr_message & IST3038C_FINGER_COUNT_MASK) >> - IST3038C_FINGER_COUNT_SHIFT; + finger_count = FIELD_GET(IST3038C_FINGER_COUNT_MASK, intr_message); if (finger_count > IST3038C_MAX_FINGER_NUM) { dev_err(&ts->client->dev, "finger count %d is more than maximum supported\n", @@ -101,7 +98,7 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id) goto out; } - finger_pressed = intr_message & IST3038C_FINGER_STATUS_MASK; + finger_pressed = FIELD_GET(IST3038C_FINGER_STATUS_MASK, intr_message); for (i = 0; i < finger_count; i++) { error = imagis_i2c_read_reg(ts, @@ -118,12 +115,11 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id) input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, finger_pressed & BIT(i)); touchscreen_report_pos(ts->input_dev, &ts->prop, - (finger_status & IST3038C_X_MASK) >> - IST3038C_X_SHIFT, - finger_status & IST3038C_Y_MASK, 1); + FIELD_GET(IST3038C_X_MASK, finger_status), + FIELD_GET(IST3038C_Y_MASK, finger_status), + true); input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, - (finger_status & IST3038C_AREA_MASK) >> - IST3038C_AREA_SHIFT); + FIELD_GET(IST3038C_AREA_MASK, finger_status)); } input_mt_sync_frame(ts->input_dev); -- GitLab From 70310e55b52922afa4d9dfa4d60ba35602828455 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Sun, 10 Mar 2024 12:31:41 +0100 Subject: [PATCH 1646/2290] Input: allocate keycode for Display refresh rate toggle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cfeb98b95fff25c442f78a6f616c627bc48a26b7 ] Newer Lenovo Yogas and Legions with 60Hz/90Hz displays send a wmi event when Fn + R is pressed. This is intended for use to switch between the two refresh rates. Allocate a new KEY_REFRESH_RATE_TOGGLE keycode for it. Signed-off-by: Gergo Koteles Acked-by: Dmitry Torokhov Link: https://lore.kernel.org/r/15a5d08c84cf4d7b820de34ebbcf8ae2502fb3ca.1710065750.git.soyer@irl.hu Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 7ad931a329706..1ce8a91349e9f 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -602,6 +602,7 @@ #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ +#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ -- GitLab From 44479c7fefc825f91b43ae024372e0511b105328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alban=20Boy=C3=A9?= Date: Tue, 27 Feb 2024 22:40:17 +0000 Subject: [PATCH 1647/2290] platform/x86: touchscreen_dmi: Add an extra entry for a variant of the Chuwi Vi8 tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1266e2efb7512dbf20eac820ca2ed34de6b1c3e7 ] Signed-off-by: Alban Boyé Link: https://lore.kernel.org/r/20240227223919.11587-1-alban.boye@protonmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/touchscreen_dmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 11d72a3533552..399b97b54dd0f 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -1177,6 +1177,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BIOS_VERSION, "CHUWI.D86JLBNR"), }, }, + { + /* Chuwi Vi8 dual-boot (CWI506) */ + .driver_data = (void *)&chuwi_vi8_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "i86"), + DMI_MATCH(DMI_BIOS_VERSION, "CHUWI2.D86JHBNR02"), + }, + }, { /* Chuwi Vi8 Plus (CWI519) */ .driver_data = (void *)&chuwi_vi8_plus_data, -- GitLab From 39da708cb201d899fa4027af30f59987c2244624 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 29 Jan 2024 16:36:25 +0530 Subject: [PATCH 1648/2290] perf/x86/amd/lbr: Discard erroneous branch entries [ Upstream commit 29297ffffb0bf388778bd4b581a43cee6929ae65 ] The Revision Guide for AMD Family 19h Model 10-1Fh processors declares Erratum 1452 which states that non-branch entries may erroneously be recorded in the Last Branch Record (LBR) stack with the valid and spec bits set. Such entries can be recognized by inspecting bit 61 of the corresponding LastBranchStackToIp register. This bit is currently reserved but if found to be set, the associated branch entry should be discarded. Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://bugzilla.kernel.org/attachment.cgi?id=305518 Link: https://lore.kernel.org/r/3ad2aa305f7396d41a40e3f054f740d464b16b7f.1706526029.git.sandipan.das@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/lbr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index b8fe74e8e0a60..48f4095f500d4 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -173,9 +173,11 @@ void amd_pmu_lbr_read(void) /* * Check if a branch has been logged; if valid = 0, spec = 0 - * then no branch was recorded + * then no branch was recorded; if reserved = 1 then an + * erroneous branch was recorded (see Erratum 1452) */ - if (!entry.to.split.valid && !entry.to.split.spec) + if ((!entry.to.split.valid && !entry.to.split.spec) || + entry.to.split.reserved) continue; perf_clear_branch_entry_bitfields(br + out); -- GitLab From 5e7da5bb2d34b1f83d38fb69c5d044130a1136a0 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Fri, 15 Mar 2024 12:28:08 -0300 Subject: [PATCH 1649/2290] ktest: force $buildonly = 1 for 'make_warnings_file' test type [ Upstream commit 07283c1873a4d0eaa0e822536881bfdaea853910 ] The test type "make_warnings_file" should have no mandatory configuration parameters other than the ones required by the "build" test type, because its purpose is to create a file with build warnings that may or may not be used by other subsequent tests. Currently, the only way to use it as a stand-alone test is by setting POWER_CYCLE, CONSOLE, SSH_USER, BUILD_TARGET, TARGET_IMAGE, REBOOT_TYPE and GRUB_MENU. Link: https://lkml.kernel.org/r/20240315-ktest-v2-1-c5c20a75f6a3@marliere.net Cc: John Hawley Signed-off-by: Ricardo B. Marliere Signed-off-by: Steven Rostedt Signed-off-by: Sasha Levin --- tools/testing/ktest/ktest.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index e6c381498e632..449e45bd69665 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -836,6 +836,7 @@ sub set_value { if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ && $prvalue !~ /^(config_|)bisect$/ && $prvalue !~ /^build$/ && + $prvalue !~ /^make_warnings_file$/ && $buildonly) { # Note if a test is something other than build, then we -- GitLab From 1e9f5619d90866a6285139fc19291a93289a40cd Mon Sep 17 00:00:00 2001 From: linke li Date: Sat, 2 Mar 2024 12:42:21 +0800 Subject: [PATCH 1650/2290] ring-buffer: use READ_ONCE() to read cpu_buffer->commit_page in concurrent environment [ Upstream commit f1e30cb6369251c03f63c564006f96a54197dcc4 ] In function ring_buffer_iter_empty(), cpu_buffer->commit_page is read while other threads may change it. It may cause the time_stamp that read in the next line come from a different page. Use READ_ONCE() to avoid having to reason about compiler optimizations now and in future. Link: https://lore.kernel.org/linux-trace-kernel/tencent_DFF7D3561A0686B5E8FC079150A02505180A@qq.com Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Signed-off-by: linke li Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 431a922e5c89e..d2947de3021a9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4431,7 +4431,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter) cpu_buffer = iter->cpu_buffer; reader = cpu_buffer->reader_page; head_page = cpu_buffer->head_page; - commit_page = cpu_buffer->commit_page; + commit_page = READ_ONCE(cpu_buffer->commit_page); commit_ts = commit_page->page->time_stamp; /* -- GitLab From 63eaa43d5d5fd00777d5b849ada385a7047ae589 Mon Sep 17 00:00:00 2001 From: Petre Rodan Date: Mon, 8 Jan 2024 12:32:20 +0200 Subject: [PATCH 1651/2290] tools: iio: replace seekdir() in iio_generic_buffer [ Upstream commit 4e6500bfa053dc133021f9c144261b77b0ba7dc8 ] Replace seekdir() with rewinddir() in order to fix a localized glibc bug. One of the glibc patches that stable Gentoo is using causes an improper directory stream positioning bug on 32bit arm. That in turn ends up as a floating point exception in iio_generic_buffer. The attached patch provides a fix by using an equivalent function which should not cause trouble for other distros and is easier to reason about in general as it obviously always goes back to to the start. https://sourceware.org/bugzilla/show_bug.cgi?id=31212 Signed-off-by: Petre Rodan Link: https://lore.kernel.org/r/20240108103224.3986-1-petre.rodan@subdimension.ro Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- tools/iio/iio_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c index 6a00a6eecaef0..c5c5082cb24e5 100644 --- a/tools/iio/iio_utils.c +++ b/tools/iio/iio_utils.c @@ -376,7 +376,7 @@ int build_channel_array(const char *device_dir, int buffer_idx, goto error_close_dir; } - seekdir(dp, 0); + rewinddir(dp); while (ent = readdir(dp), ent) { if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), "_en") == 0) { -- GitLab From 7dbf082988d63b4d09c0fc90f616556eafa5448d Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 12 Jan 2024 11:08:00 -0700 Subject: [PATCH 1652/2290] bus: mhi: host: Add MHI_PM_SYS_ERR_FAIL state [ Upstream commit bce3f770684cc1d91ff9edab431b71ac991faf29 ] When processing a SYSERR, if the device does not respond to the MHI_RESET from the host, the host will be stuck in a difficult to recover state. The host will remain in MHI_PM_SYS_ERR_PROCESS and not clean up the host channels. Clients will not be notified of the SYSERR via the destruction of their channel devices, which means clients may think that the device is still up. Subsequent SYSERR events such as a device fatal error will not be processed as the state machine cannot transition from PROCESS back to DETECT. The only way to recover from this is to unload the mhi module (wipe the state machine state) or for the mhi controller to initiate SHUTDOWN. This issue was discovered by stress testing soc_reset events on AIC100 via the sysfs node. soc_reset is processed entirely in hardware. When the register write hits the endpoint hardware, it causes the soc to reset without firmware involvement. In stress testing, there is a rare race where soc_reset N will cause the soc to reset and PBL to signal SYSERR (fatal error). If soc_reset N+1 is triggered before PBL can process the MHI_RESET from the host, then the soc will reset again, and re-run PBL from the beginning. This will cause PBL to lose all state. PBL will be waiting for the host to respond to the new syserr, but host will be stuck expecting the previous MHI_RESET to be processed. Additionally, the AMSS EE firmware (QSM) was hacked to synthetically reproduce the issue by simulating a FW hang after the QSM issued a SYSERR. In this case, soc_reset would not recover the device. For this failure case, to recover the device, we need a state similar to PROCESS, but can transition to DETECT. There is not a viable existing state to use. POR has the needed transitions, but assumes the device is in a good state and could allow the host to attempt to use the device. Allowing PROCESS to transition to DETECT invites the possibility of parallel SYSERR processing which could get the host and device out of sync. Thus, invent a new state - MHI_PM_SYS_ERR_FAIL This essentially a holding state. It allows us to clean up the host elements that are based on the old state of the device (channels), but does not allow us to directly advance back to an operational state. It does allow the detection and processing of another SYSERR which may recover the device, or allows the controller to do a clean shutdown. Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240112180800.536733-1-quic_jhugo@quicinc.com Signed-off-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- drivers/bus/mhi/host/init.c | 1 + drivers/bus/mhi/host/internal.h | 9 ++++++--- drivers/bus/mhi/host/pm.c | 20 +++++++++++++++++--- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/bus/mhi/host/init.c b/drivers/bus/mhi/host/init.c index 04fbccff65ac2..60c1df048fa20 100644 --- a/drivers/bus/mhi/host/init.c +++ b/drivers/bus/mhi/host/init.c @@ -62,6 +62,7 @@ static const char * const mhi_pm_state_str[] = { [MHI_PM_STATE_FW_DL_ERR] = "Firmware Download Error", [MHI_PM_STATE_SYS_ERR_DETECT] = "SYS ERROR Detect", [MHI_PM_STATE_SYS_ERR_PROCESS] = "SYS ERROR Process", + [MHI_PM_STATE_SYS_ERR_FAIL] = "SYS ERROR Failure", [MHI_PM_STATE_SHUTDOWN_PROCESS] = "SHUTDOWN Process", [MHI_PM_STATE_LD_ERR_FATAL_DETECT] = "Linkdown or Error Fatal Detect", }; diff --git a/drivers/bus/mhi/host/internal.h b/drivers/bus/mhi/host/internal.h index 01fd10a399b61..6abf09da4f618 100644 --- a/drivers/bus/mhi/host/internal.h +++ b/drivers/bus/mhi/host/internal.h @@ -88,6 +88,7 @@ enum mhi_pm_state { MHI_PM_STATE_FW_DL_ERR, MHI_PM_STATE_SYS_ERR_DETECT, MHI_PM_STATE_SYS_ERR_PROCESS, + MHI_PM_STATE_SYS_ERR_FAIL, MHI_PM_STATE_SHUTDOWN_PROCESS, MHI_PM_STATE_LD_ERR_FATAL_DETECT, MHI_PM_STATE_MAX @@ -104,14 +105,16 @@ enum mhi_pm_state { #define MHI_PM_FW_DL_ERR BIT(7) #define MHI_PM_SYS_ERR_DETECT BIT(8) #define MHI_PM_SYS_ERR_PROCESS BIT(9) -#define MHI_PM_SHUTDOWN_PROCESS BIT(10) +#define MHI_PM_SYS_ERR_FAIL BIT(10) +#define MHI_PM_SHUTDOWN_PROCESS BIT(11) /* link not accessible */ -#define MHI_PM_LD_ERR_FATAL_DETECT BIT(11) +#define MHI_PM_LD_ERR_FATAL_DETECT BIT(12) #define MHI_REG_ACCESS_VALID(pm_state) ((pm_state & (MHI_PM_POR | MHI_PM_M0 | \ MHI_PM_M2 | MHI_PM_M3_ENTER | MHI_PM_M3_EXIT | \ MHI_PM_SYS_ERR_DETECT | MHI_PM_SYS_ERR_PROCESS | \ - MHI_PM_SHUTDOWN_PROCESS | MHI_PM_FW_DL_ERR))) + MHI_PM_SYS_ERR_FAIL | MHI_PM_SHUTDOWN_PROCESS | \ + MHI_PM_FW_DL_ERR))) #define MHI_PM_IN_ERROR_STATE(pm_state) (pm_state >= MHI_PM_FW_DL_ERR) #define MHI_PM_IN_FATAL_STATE(pm_state) (pm_state == MHI_PM_LD_ERR_FATAL_DETECT) #define MHI_DB_ACCESS_VALID(mhi_cntrl) (mhi_cntrl->pm_state & mhi_cntrl->db_access) diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c index 8a4362d75fc43..27f8a40f288cf 100644 --- a/drivers/bus/mhi/host/pm.c +++ b/drivers/bus/mhi/host/pm.c @@ -36,7 +36,10 @@ * M0 <--> M0 * M0 -> FW_DL_ERR * M0 -> M3_ENTER -> M3 -> M3_EXIT --> M0 - * L1: SYS_ERR_DETECT -> SYS_ERR_PROCESS --> POR + * L1: SYS_ERR_DETECT -> SYS_ERR_PROCESS + * SYS_ERR_PROCESS -> SYS_ERR_FAIL + * SYS_ERR_FAIL -> SYS_ERR_DETECT + * SYS_ERR_PROCESS --> POR * L2: SHUTDOWN_PROCESS -> LD_ERR_FATAL_DETECT * SHUTDOWN_PROCESS -> DISABLE * L3: LD_ERR_FATAL_DETECT <--> LD_ERR_FATAL_DETECT @@ -93,7 +96,12 @@ static const struct mhi_pm_transitions dev_state_transitions[] = { }, { MHI_PM_SYS_ERR_PROCESS, - MHI_PM_POR | MHI_PM_SHUTDOWN_PROCESS | + MHI_PM_POR | MHI_PM_SYS_ERR_FAIL | MHI_PM_SHUTDOWN_PROCESS | + MHI_PM_LD_ERR_FATAL_DETECT + }, + { + MHI_PM_SYS_ERR_FAIL, + MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS | MHI_PM_LD_ERR_FATAL_DETECT }, /* L2 States */ @@ -624,7 +632,13 @@ static void mhi_pm_sys_error_transition(struct mhi_controller *mhi_cntrl) !in_reset, timeout); if (!ret || in_reset) { dev_err(dev, "Device failed to exit MHI Reset state\n"); - goto exit_sys_error_transition; + write_lock_irq(&mhi_cntrl->pm_lock); + cur_state = mhi_tryset_pm_state(mhi_cntrl, + MHI_PM_SYS_ERR_FAIL); + write_unlock_irq(&mhi_cntrl->pm_lock); + /* Shutdown may have occurred, otherwise cleanup now */ + if (cur_state != MHI_PM_SYS_ERR_FAIL) + goto exit_sys_error_transition; } /* -- GitLab From 893b2562613f8e0b8141e15aa2f3791ebdcbd1e8 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 14 Feb 2024 00:37:55 +0100 Subject: [PATCH 1653/2290] usb: gadget: uvc: mark incomplete frames with UVC_STREAM_ERR [ Upstream commit 2a3b7af120477d0571b815ccb8600cafd5ebf02f ] If an frame was transmitted incomplete to the host, we set the UVC_STREAM_ERR bit in the header for the last request that is going to be queued. This way the host will know that it should drop the frame instead of trying to display the corrupted content. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20240214-uvc-error-tag-v1-2-37659a3877fe@pengutronix.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/uvc_video.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index e81865978299c..be48d5ab17c7b 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -35,6 +35,9 @@ uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf, data[1] = UVC_STREAM_EOH | video->fid; + if (video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) + data[1] |= UVC_STREAM_ERR; + if (video->queue.buf_used == 0 && ts.tv_sec) { /* dwClockFrequency is 48 MHz */ u32 pts = ((u64)ts.tv_sec * USEC_PER_SEC + ts.tv_nsec / NSEC_PER_USEC) * 48; -- GitLab From f692c547e3f2d5644499f9aca9dd4696306e72c0 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 26 Jan 2024 15:55:55 +0200 Subject: [PATCH 1654/2290] thunderbolt: Keep the domain powered when USB4 port is in redrive mode [ Upstream commit a75e0684efe567ae5f6a8e91a8360c4c1773cf3a ] If a DiplayPort cable is directly connected to the host routers USB4 port, there is no tunnel involved but the port is in "redrive" mode meaning that it is re-driving the DisplayPort signals from its DisplayPort source. In this case we need to keep the domain powered on otherwise once the domain enters D3cold the connected monitor blanks too. Since this happens only on Intel Barlow Ridge add a quirk that takes runtime PM reference if we detect that the USB4 port entered redrive mode (and release it once it exits the mode). Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/thunderbolt/quirks.c | 14 +++++++++++ drivers/thunderbolt/tb.c | 49 +++++++++++++++++++++++++++++++++++- drivers/thunderbolt/tb.h | 4 +++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index 4ab3803e10c83..638cb5fb22c11 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -42,6 +42,12 @@ static void quirk_usb3_maximum_bandwidth(struct tb_switch *sw) } } +static void quirk_block_rpm_in_redrive(struct tb_switch *sw) +{ + sw->quirks |= QUIRK_KEEP_POWER_IN_DP_REDRIVE; + tb_sw_dbg(sw, "preventing runtime PM in DP redrive mode\n"); +} + struct tb_quirk { u16 hw_vendor_id; u16 hw_device_id; @@ -85,6 +91,14 @@ static const struct tb_quirk tb_quirks[] = { quirk_usb3_maximum_bandwidth }, { 0x8087, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HUB_40G_BRIDGE, 0x0000, 0x0000, quirk_usb3_maximum_bandwidth }, + /* + * Block Runtime PM in DP redrive mode for Intel Barlow Ridge host + * controllers. + */ + { 0x8087, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_80G_NHI, 0x0000, 0x0000, + quirk_block_rpm_in_redrive }, + { 0x8087, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_40G_NHI, 0x0000, 0x0000, + quirk_block_rpm_in_redrive }, /* * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms. */ diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index e1eb092ad1d67..e83269dc2b067 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1050,6 +1050,49 @@ err_rpm_put: pm_runtime_put_autosuspend(&in->sw->dev); } +static void tb_enter_redrive(struct tb_port *port) +{ + struct tb_switch *sw = port->sw; + + if (!(sw->quirks & QUIRK_KEEP_POWER_IN_DP_REDRIVE)) + return; + + /* + * If we get hot-unplug for the DP IN port of the host router + * and the DP resource is not available anymore it means there + * is a monitor connected directly to the Type-C port and we are + * in "redrive" mode. For this to work we cannot enter RTD3 so + * we bump up the runtime PM reference count here. + */ + if (!tb_port_is_dpin(port)) + return; + if (tb_route(sw)) + return; + if (!tb_switch_query_dp_resource(sw, port)) { + port->redrive = true; + pm_runtime_get(&sw->dev); + tb_port_dbg(port, "enter redrive mode, keeping powered\n"); + } +} + +static void tb_exit_redrive(struct tb_port *port) +{ + struct tb_switch *sw = port->sw; + + if (!(sw->quirks & QUIRK_KEEP_POWER_IN_DP_REDRIVE)) + return; + + if (!tb_port_is_dpin(port)) + return; + if (tb_route(sw)) + return; + if (port->redrive && tb_switch_query_dp_resource(sw, port)) { + port->redrive = false; + pm_runtime_put(&sw->dev); + tb_port_dbg(port, "exit redrive mode\n"); + } +} + static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port) { struct tb_port *in, *out; @@ -1066,7 +1109,10 @@ static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port) } tunnel = tb_find_tunnel(tb, TB_TUNNEL_DP, in, out); - tb_deactivate_and_free_tunnel(tunnel); + if (tunnel) + tb_deactivate_and_free_tunnel(tunnel); + else + tb_enter_redrive(port); list_del_init(&port->list); /* @@ -1092,6 +1138,7 @@ static void tb_dp_resource_available(struct tb *tb, struct tb_port *port) tb_port_dbg(port, "DP %s resource available\n", tb_port_is_dpin(port) ? "IN" : "OUT"); list_add_tail(&port->list, &tcm->dp_resources); + tb_exit_redrive(port); /* Look for suitable DP IN <-> DP OUT pairs now */ tb_tunnel_dp(tb); diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index f79cae48a8eab..b3fec5f8e20cd 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -27,6 +27,8 @@ #define QUIRK_FORCE_POWER_LINK_CONTROLLER BIT(0) /* Disable CLx if not supported */ #define QUIRK_NO_CLX BIT(1) +/* Need to keep power on while USB4 port is in redrive mode */ +#define QUIRK_KEEP_POWER_IN_DP_REDRIVE BIT(2) /** * struct tb_nvm - Structure holding NVM information @@ -254,6 +256,7 @@ struct tb_switch { * DMA paths through this port. * @max_bw: Maximum possible bandwidth through this adapter if set to * non-zero. + * @redrive: For DP IN, if true the adapter is in redrive mode. * * In USB4 terminology this structure represents an adapter (protocol or * lane adapter). @@ -280,6 +283,7 @@ struct tb_port { unsigned int ctl_credits; unsigned int dma_credits; unsigned int max_bw; + bool redrive; }; /** -- GitLab From 57e6634e591b6e51cbb59957d24630af18b3869f Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Thu, 22 Feb 2024 22:09:01 +0100 Subject: [PATCH 1655/2290] usb: typec: tcpci: add generic tcpci fallback compatible [ Upstream commit 8774ea7a553e2aec323170d49365b59af0a2b7e0 ] The driver already support the tcpci binding for the i2c_device_id so add the support for the of_device_id too. Signed-off-by: Marco Felsch Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240222210903.208901-3-m.felsch@pengutronix.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/tcpm/tcpci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 816945913ed0d..f649769912e53 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -875,6 +875,7 @@ MODULE_DEVICE_TABLE(i2c, tcpci_id); #ifdef CONFIG_OF static const struct of_device_id tcpci_of_match[] = { { .compatible = "nxp,ptn5110", }, + { .compatible = "tcpci", }, {}, }; MODULE_DEVICE_TABLE(of, tcpci_of_match); -- GitLab From 409289d0a15166671dfa3f452ee87a00db8f08db Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Mar 2024 11:13:51 +0000 Subject: [PATCH 1656/2290] usb: sl811-hcd: only defined function checkdone if QUIRK2 is defined [ Upstream commit 12f371e2b6cb4b79c788f1f073992e115f4ca918 ] Function checkdone is only required if QUIRK2 is defined, so add appropriate #if / #endif around the function. Cleans up clang scan build warning: drivers/usb/host/sl811-hcd.c:588:18: warning: unused function 'checkdone' [-Wunused-function] Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240307111351.1982382-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/sl811-hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index b8b90eec91078..48478eb712119 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -585,6 +585,7 @@ done(struct sl811 *sl811, struct sl811h_ep *ep, u8 bank) finish_request(sl811, ep, urb, urbstat); } +#ifdef QUIRK2 static inline u8 checkdone(struct sl811 *sl811) { u8 ctl; @@ -616,6 +617,7 @@ static inline u8 checkdone(struct sl811 *sl811) #endif return irqstat; } +#endif static irqreturn_t sl811h_irq(struct usb_hcd *hcd) { -- GitLab From 103c0f946f9a173b40ec94788480fb667c153304 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 25 Jan 2024 13:11:16 +0100 Subject: [PATCH 1657/2290] thermal/of: Assume polling-delay(-passive) 0 when absent [ Upstream commit 488164006a281986d95abbc4b26e340c19c4c85b ] Currently, thermal zones associated with providers that have interrupts for signaling hot/critical trips are required to set a polling-delay of 0 to indicate no polling. This feels a bit backwards. Change the code such that "no polling delay" also means "no polling". Suggested-by: Bjorn Andersson Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20240125-topic-thermal-v1-2-3c9d4dced138@linaro.org Signed-off-by: Sasha Levin --- drivers/thermal/thermal_of.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 4104743dbc17e..202dce0d2e309 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -337,14 +337,18 @@ static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdel int ret; ret = of_property_read_u32(np, "polling-delay-passive", pdelay); - if (ret < 0) { - pr_err("%pOFn: missing polling-delay-passive property\n", np); + if (ret == -EINVAL) { + *pdelay = 0; + } else if (ret < 0) { + pr_err("%pOFn: Couldn't get polling-delay-passive: %d\n", np, ret); return ret; } ret = of_property_read_u32(np, "polling-delay", delay); - if (ret < 0) { - pr_err("%pOFn: missing polling-delay property\n", np); + if (ret == -EINVAL) { + *delay = 0; + } else if (ret < 0) { + pr_err("%pOFn: Couldn't get polling-delay: %d\n", np, ret); return ret; } -- GitLab From 8b849265dad8fcc5e1b5945a0aca088ff2791852 Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Tue, 5 Mar 2024 15:56:06 +0900 Subject: [PATCH 1658/2290] ASoC: soc-core.c: Skip dummy codec when adding platforms [ Upstream commit 23fb6bc2696119391ec3a92ccaffe50e567c515e ] When pcm_runtime is adding platform components it will scan all registered components. In case of DPCM FE/BE some DAI links will configure dummy platform. However both dummy codec and dummy platform are using "snd-soc-dummy" as component->name. Dummy codec should be skipped when adding platforms otherwise there'll be overflow and UBSAN complains. Reported-by: Zhipeng Wang Signed-off-by: Chancel Liu Link: https://msgid.link/r/20240305065606.3778642-1-chancel.liu@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index a409fbed8f34c..6a4101dc15a54 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1020,6 +1020,9 @@ int snd_soc_add_pcm_runtime(struct snd_soc_card *card, if (!snd_soc_is_matching_component(platform, component)) continue; + if (snd_soc_component_is_dummy(component) && component->num_dai) + continue; + snd_soc_rtd_add_component(rtd, component); } } -- GitLab From f8a7b7b085df99dc870df525d6806f035cc91ad1 Mon Sep 17 00:00:00 2001 From: Aleksandr Burakov Date: Fri, 1 Mar 2024 14:35:43 +0300 Subject: [PATCH 1659/2290] fbdev: viafb: fix typo in hw_bitblt_1 and hw_bitblt_2 [ Upstream commit bc87bb342f106a0402186bcb588fcbe945dced4b ] There are some actions with value 'tmp' but 'dst_addr' is checked instead. It is obvious that a copy-paste error was made here and the value of variable 'tmp' should be checked here. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Aleksandr Burakov Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/via/accel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/via/accel.c b/drivers/video/fbdev/via/accel.c index 0a1bc7a4d7853..1e04026f08091 100644 --- a/drivers/video/fbdev/via/accel.c +++ b/drivers/video/fbdev/via/accel.c @@ -115,7 +115,7 @@ static int hw_bitblt_1(void __iomem *engine, u8 op, u32 width, u32 height, if (op != VIA_BITBLT_FILL) { tmp = src_mem ? 0 : src_addr; - if (dst_addr & 0xE0000007) { + if (tmp & 0xE0000007) { printk(KERN_WARNING "hw_bitblt_1: Unsupported source " "address %X\n", tmp); return -EINVAL; @@ -260,7 +260,7 @@ static int hw_bitblt_2(void __iomem *engine, u8 op, u32 width, u32 height, writel(tmp, engine + 0x18); tmp = src_mem ? 0 : src_addr; - if (dst_addr & 0xE0000007) { + if (tmp & 0xE0000007) { printk(KERN_WARNING "hw_bitblt_2: Unsupported source " "address %X\n", tmp); return -EINVAL; -- GitLab From 5245a6da27ef79f8dba98dad5542ebe56d311837 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Mar 2024 09:51:40 -0600 Subject: [PATCH 1660/2290] io_uring: clear opcode specific data for an early failure [ Upstream commit e21e1c45e1fe2e31732f40256b49c04e76a17cee ] If failure happens before the opcode prep handler is called, ensure that we clear the opcode specific area of the request, which holds data specific to that request type. This prevents errors where opcode handlers either don't get to clear per-request private data since prep isn't even called. Reported-and-tested-by: syzbot+f8e9a371388aa62ecab4@syzkaller.appspotmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 415248c1f82c6..68f1b6f8699a6 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1978,6 +1978,13 @@ static void io_init_req_drain(struct io_kiocb *req) } } +static __cold int io_init_fail_req(struct io_kiocb *req, int err) +{ + /* ensure per-opcode data is cleared if we fail before prep */ + memset(&req->cmd.data, 0, sizeof(req->cmd.data)); + return err; +} + static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct io_uring_sqe *sqe) __must_hold(&ctx->uring_lock) @@ -1998,29 +2005,29 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (unlikely(opcode >= IORING_OP_LAST)) { req->opcode = 0; - return -EINVAL; + return io_init_fail_req(req, -EINVAL); } def = &io_op_defs[opcode]; if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { /* enforce forwards compatibility on users */ if (sqe_flags & ~SQE_VALID_FLAGS) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (sqe_flags & IOSQE_BUFFER_SELECT) { if (!def->buffer_select) - return -EOPNOTSUPP; + return io_init_fail_req(req, -EOPNOTSUPP); req->buf_index = READ_ONCE(sqe->buf_group); } if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS) ctx->drain_disabled = true; if (sqe_flags & IOSQE_IO_DRAIN) { if (ctx->drain_disabled) - return -EOPNOTSUPP; + return io_init_fail_req(req, -EOPNOTSUPP); io_init_req_drain(req); } } if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) { if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags)) - return -EACCES; + return io_init_fail_req(req, -EACCES); /* knock it to the slow queue path, will be drained there */ if (ctx->drain_active) req->flags |= REQ_F_FORCE_ASYNC; @@ -2033,9 +2040,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, } if (!def->ioprio && sqe->ioprio) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (def->needs_file) { struct io_submit_state *state = &ctx->submit_state; @@ -2059,12 +2066,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->creds = xa_load(&ctx->personalities, personality); if (!req->creds) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); get_cred(req->creds); ret = security_uring_override_creds(req->creds); if (ret) { put_cred(req->creds); - return ret; + return io_init_fail_req(req, ret); } req->flags |= REQ_F_CREDS; } -- GitLab From 30044c66bc1a8521bc96944a42bf5ff4e517b246 Mon Sep 17 00:00:00 2001 From: "Jiawei Fu (iBug)" Date: Sat, 16 Mar 2024 03:27:49 +0800 Subject: [PATCH 1661/2290] drivers/nvme: Add quirks for device 126f:2262 [ Upstream commit e89086c43f0500bc7c4ce225495b73b8ce234c1f ] This commit adds NVME_QUIRK_NO_DEEPEST_PS and NVME_QUIRK_BOGUS_NID for device [126f:2262], which appears to be a generic VID:PID pair used for many SSDs based on the Silicon Motion SM2262/SM2262EN controller. Two of my SSDs with this VID:PID pair exhibit the same behavior: * They frequently have trouble exiting the deepest power state (5), resulting in the entire disk unresponsive. Verified by setting nvme_core.default_ps_max_latency_us=10000 and observing them behaving normally. * They produce all-zero nguid and eui64 with `nvme id-ns` command. The offending products are: * HP SSD EX950 1TB * HIKVISION C2000Pro 2TB Signed-off-by: Jiawei Fu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3d01290994d89..5ff09f2cacab7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3471,6 +3471,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_BOGUS_NID, }, { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_BOGUS_NID, }, -- GitLab From 664206ff8b019bcd1e55b10b2eea3add8761b971 Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Tue, 19 Mar 2024 11:13:44 +0300 Subject: [PATCH 1662/2290] fbmon: prevent division by zero in fb_videomode_from_videomode() [ Upstream commit c2d953276b8b27459baed1277a4fdd5dd9bd4126 ] The expression htotal * vtotal can have a zero value on overflow. It is necessary to prevent division by zero like in fb_var_to_videomode(). Found by Linux Verification Center (linuxtesting.org) with Svace. Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbmon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index b0e690f41025a..9ca99da3a56a0 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -1311,7 +1311,7 @@ int fb_get_mode(int flags, u32 val, struct fb_var_screeninfo *var, struct fb_inf int fb_videomode_from_videomode(const struct videomode *vm, struct fb_videomode *fbmode) { - unsigned int htotal, vtotal; + unsigned int htotal, vtotal, total; fbmode->xres = vm->hactive; fbmode->left_margin = vm->hback_porch; @@ -1344,8 +1344,9 @@ int fb_videomode_from_videomode(const struct videomode *vm, vtotal = vm->vactive + vm->vfront_porch + vm->vback_porch + vm->vsync_len; /* prevent division by zero */ - if (htotal && vtotal) { - fbmode->refresh = vm->pixelclock / (htotal * vtotal); + total = htotal * vtotal; + if (total) { + fbmode->refresh = vm->pixelclock / total; /* a mode must have htotal and vtotal != 0 or it is invalid */ } else { fbmode->refresh = 0; -- GitLab From fcd1993a2937604d43ce134b9b9022cb76136610 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 8 Apr 2024 23:18:32 +0200 Subject: [PATCH 1663/2290] netfilter: nf_tables: release batch on table validation from abort path commit a45e6889575c2067d3c0212b6bc1022891e65b91 upstream. Unlike early commit path stage which triggers a call to abort, an explicit release of the batch is required on abort, otherwise mutex is released and commit_list remains in place. Add WARN_ON_ONCE to ensure commit_list is empty from the abort path before releasing the mutex. After this patch, commit_list is always assumed to be empty before grabbing the mutex, therefore 03c1f1ef1584 ("netfilter: Cleanup nft_net->module_list from nf_tables_exit_net()") only needs to release the pending modules for registration. Cc: stable@vger.kernel.org Fixes: c0391b6ab810 ("netfilter: nf_tables: missing validation from the abort path") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8d38cd5047692..6b032a90e2b15 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9902,10 +9902,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { @@ -10081,7 +10082,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) else nf_tables_module_autoload_cleanup(net); - return 0; + return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, @@ -10095,6 +10096,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, ret = __nf_tables_abort(net, action); nft_gc_seq_end(nft_net, gc_seq); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + mutex_unlock(&nft_net->commit_mutex); return ret; @@ -10892,9 +10895,10 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nft_net->commit_list) || - !list_empty(&nft_net->module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); __nft_release_tables(net); -- GitLab From 8d3a58af50e46167b6f1db47adadad03c0045dae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 8 Apr 2024 23:18:33 +0200 Subject: [PATCH 1664/2290] netfilter: nf_tables: release mutex after nft_gc_seq_end from abort path commit 0d459e2ffb541841714839e8228b845458ed3b27 upstream. The commit mutex should not be released during the critical section between nft_gc_seq_begin() and nft_gc_seq_end(), otherwise, async GC worker could collect expired objects and get the released commit lock within the same GC sequence. nf_tables_module_autoload() temporarily releases the mutex to load module dependencies, then it goes back to replay the transaction again. Move it at the end of the abort phase after nft_gc_seq_end() is called. Cc: stable@vger.kernel.org Fixes: 720344340fb9 ("netfilter: nf_tables: GC transaction race with abort path") Reported-by: Kuan-Ting Chen Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6b032a90e2b15..e7b31c2c92df2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10077,11 +10077,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - return err; } @@ -10098,6 +10093,14 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); + mutex_unlock(&nft_net->commit_mutex); return ret; -- GitLab From 2aeb805a1bcd5f27c8c0d1a9d4d653f16d1506f4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 8 Apr 2024 23:18:34 +0200 Subject: [PATCH 1665/2290] netfilter: nf_tables: discard table flag update with pending basechain deletion commit 1bc83a019bbe268be3526406245ec28c2458a518 upstream. Hook unregistration is deferred to the commit phase, same occurs with hook updates triggered by the table dormant flag. When both commands are combined, this results in deleting a basechain while leaving its hook still registered in the core. Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e7b31c2c92df2..8152a69d82681 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1192,6 +1192,24 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ __NFT_TABLE_F_WAS_AWAKEN) +static bool nft_table_pending_update(const struct nft_ctx *ctx) +{ + struct nftables_pernet *nft_net = nft_pernet(ctx->net); + struct nft_trans *trans; + + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return true; + + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->ctx.table == ctx->table && + trans->msg_type == NFT_MSG_DELCHAIN && + nft_is_base_chain(trans->ctx.chain)) + return true; + } + + return false; +} + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -1215,7 +1233,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return -EOPNOTSUPP; /* No dormant off/on/off/on games in single transaction */ - if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + if (nft_table_pending_update(ctx)) return -EINVAL; trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, -- GitLab From 2b85977977cbd120591b23c2450e90a5806a7167 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 31 Jul 2023 15:59:42 -0300 Subject: [PATCH 1666/2290] tty: n_gsm: require CAP_NET_ADMIN to attach N_GSM0710 ldisc commit 67c37756898a5a6b2941a13ae7260c89b54e0d88 upstream. Any unprivileged user can attach N_GSM0710 ldisc, but it requires CAP_NET_ADMIN to create a GSM network anyway. Require initial namespace CAP_NET_ADMIN to do that. Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20230731185942.279611-1-cascardo@canonical.com Cc: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 6b6abce6b69f4..d2daf0a72e347 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2969,6 +2969,9 @@ static int gsmld_open(struct tty_struct *tty) { struct gsm_mux *gsm; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (tty->ops->write == NULL) return -EINVAL; -- GitLab From cf0650adb62574b1d5ada471c34aff086e554db3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 28 Mar 2024 07:42:57 +0100 Subject: [PATCH 1667/2290] gcc-plugins/stackleak: Avoid .head.text section commit e7d24c0aa8e678f41457d1304e2091cac6fd1a2e upstream. The .head.text section carries the startup code that runs with the MMU off or with a translation of memory that deviates from the ordinary one. So avoid instrumentation with the stackleak plugin, which already avoids .init.text and .noinstr.text entirely. Fixes: 48204aba801f1b51 ("x86/sme: Move early SME kernel encryption handling into .head.text") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202403221630.2692c998-oliver.sang@intel.com Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20240328064256.2358634-2-ardb+git@google.com Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/stackleak_plugin.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index c5c2ce113c923..d20c47d21ad83 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -467,6 +467,8 @@ static bool stackleak_gate(void) return false; if (STRING_EQUAL(section, ".entry.text")) return false; + if (STRING_EQUAL(section, ".head.text")) + return false; } return track_frame_size >= 0; -- GitLab From f1465ff4c83c0544fd2c6333523301f3484184a7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 11 Apr 2024 09:24:48 +0200 Subject: [PATCH 1668/2290] Revert "scsi: sd: usb_storage: uas: Access media prior to querying device properties" This reverts commit b73dd5f9997279715cd450ee8ca599aaff2eabb9 which is commit 321da3dc1f3c92a12e3c5da934090d2992a8814c upstream. It is known to cause problems and has asked to be dropped. Link: https://lore.kernel.org/r/yq1frvvpymp.fsf@ca-mkp.ca.oracle.com Cc: Tasos Sahanidis Cc: Ewan D. Milne Cc: Bart Van Assche Cc: Tasos Sahanidis Cc: Martin K. Petersen Cc: James Bottomley Cc: Sasha Levin Reported-by: John David Anglin Reported-by: Cyril Brulebois Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 26 +------------------------- drivers/usb/storage/scsiglue.c | 7 ------- drivers/usb/storage/uas.c | 7 ------- include/scsi/scsi_device.h | 1 - 4 files changed, 1 insertion(+), 40 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f32236c3f81c6..ad619f7c74187 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3286,24 +3286,6 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } -static void sd_read_block_zero(struct scsi_disk *sdkp) -{ - unsigned int buf_len = sdkp->device->sector_size; - char *buffer, cmd[10] = { }; - - buffer = kmalloc(buf_len, GFP_KERNEL); - if (!buffer) - return; - - cmd[0] = READ_10; - put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ - put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ - - scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, - SD_TIMEOUT, sdkp->max_retries, NULL); - kfree(buffer); -} - /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3343,13 +3325,7 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - /* - * Some USB/UAS devices return generic values for mode pages - * until the media has been accessed. Trigger a READ operation - * to force the device to populate mode pages. - */ - if (sdp->read_before_ms) - sd_read_block_zero(sdkp); + /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 12cf9940e5b67..c54e9805da536 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,13 +179,6 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; - /* - * Some devices report generic values until the media has been - * accessed. Force a READ(10) prior to querying device - * characteristics. - */ - sdev->read_before_ms = 1; - /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index af619efe8eabf..ee5621bdb11e2 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -876,13 +876,6 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; - /* - * Some devices report generic values until the media has been - * accessed. Force a READ(10) prior to querying device - * characteristics. - */ - sdev->read_before_ms = 1; - /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index a64713fe52640..b407807cc6695 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -204,7 +204,6 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ - unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ -- GitLab From 345b6b831980964b607db53cfd681abd2234a1b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 11 Apr 2024 09:26:49 +0200 Subject: [PATCH 1669/2290] Revert "scsi: core: Add struct for args to execution functions" This reverts commit cf33e6ca12d814e1be2263cb76960d0019d7fb94 which is commit d0949565811f0896c1c7e781ab2ad99d34273fdf upstream. It is known to cause problems and has asked to be dropped. Link: https://lore.kernel.org/r/yq1frvvpymp.fsf@ca-mkp.ca.oracle.com Cc: Tasos Sahanidis Cc: Ewan D. Milne Cc: Bart Van Assche Cc: Tasos Sahanidis Cc: Martin K. Petersen Cc: James Bottomley Cc: Sasha Levin Reported-by: John David Anglin Reported-by: Cyril Brulebois Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 52 ++++++++++++++++++++------------------ include/scsi/scsi_device.h | 51 ++++++++++--------------------------- 2 files changed, 41 insertions(+), 62 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index edd296f950a33..5c5954b78585e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -185,37 +185,39 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } + /** - * scsi_execute_cmd - insert request and wait for the result - * @sdev: scsi_device + * __scsi_execute - insert request and wait for the result + * @sdev: scsi device * @cmd: scsi command - * @opf: block layer request cmd_flags + * @data_direction: data direction * @buffer: data buffer * @bufflen: len of buffer + * @sense: optional sense buffer + * @sshdr: optional decoded sense header * @timeout: request timeout in HZ * @retries: number of times to retry request - * @args: Optional args. See struct definition for field descriptions + * @flags: flags for ->cmd_flags + * @rq_flags: flags for ->rq_flags + * @resid: optional residual length * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ -int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, - blk_opf_t opf, void *buffer, unsigned int bufflen, - int timeout, int retries, - const struct scsi_exec_args *args) +int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + int data_direction, void *buffer, unsigned bufflen, + unsigned char *sense, struct scsi_sense_hdr *sshdr, + int timeout, int retries, blk_opf_t flags, + req_flags_t rq_flags, int *resid) { - static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; - if (!args) - args = &default_args; - else if (WARN_ON_ONCE(args->sense && - args->sense_len != SCSI_SENSE_BUFFERSIZE)) - return -EINVAL; - - req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); + req = scsi_alloc_request(sdev->request_queue, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, + rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -230,7 +232,8 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, memcpy(scmd->cmnd, cmd, scmd->cmd_len); scmd->allowed = retries; req->timeout = timeout; - req->rq_flags |= RQF_QUIET; + req->cmd_flags |= flags; + req->rq_flags |= rq_flags | RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work @@ -246,21 +249,20 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); - if (args->resid) - *args->resid = scmd->resid_len; - if (args->sense) - memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); - if (args->sshdr) + if (resid) + *resid = scmd->resid_len; + if (sense && scmd->sense_len) + memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); + if (sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, - args->sshdr); - + sshdr); ret = scmd->result; out: blk_mq_free_request(req); return ret; } -EXPORT_SYMBOL(scsi_execute_cmd); +EXPORT_SYMBOL(__scsi_execute); /* * Wake up the error handler if necessary. Avoid as follows that the error diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b407807cc6695..d2751ed536df2 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -479,51 +479,28 @@ extern const char *scsi_device_state_name(enum scsi_device_state); extern int scsi_is_sdev_device(const struct device *); extern int scsi_is_target_device(const struct device *); extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); - -/* Optional arguments to scsi_execute_cmd */ -struct scsi_exec_args { - unsigned char *sense; /* sense buffer */ - unsigned int sense_len; /* sense buffer len */ - struct scsi_sense_hdr *sshdr; /* decoded sense header */ - blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */ - int *resid; /* residual length */ -}; - -int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, - blk_opf_t opf, void *buffer, unsigned int bufflen, - int timeout, int retries, - const struct scsi_exec_args *args); - +extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + int data_direction, void *buffer, unsigned bufflen, + unsigned char *sense, struct scsi_sense_hdr *sshdr, + int timeout, int retries, blk_opf_t flags, + req_flags_t rq_flags, int *resid); /* Make sure any sense buffer is the correct size. */ -#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \ - _sshdr, _timeout, _retries, _flags, _rq_flags, \ - _resid) \ +#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ + sshdr, timeout, retries, flags, rq_flags, resid) \ ({ \ - scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \ - REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \ - _buffer, _bufflen, _timeout, _retries, \ - &(struct scsi_exec_args) { \ - .sense = _sense, \ - .sshdr = _sshdr, \ - .req_flags = _rq_flags & RQF_PM ? \ - BLK_MQ_REQ_PM : 0, \ - .resid = _resid, \ - }); \ + BUILD_BUG_ON((sense) != NULL && \ + sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \ + __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \ + sense, sshdr, timeout, retries, flags, rq_flags, \ + resid); \ }) - static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, int *resid) { - return scsi_execute_cmd(sdev, cmd, - data_direction == DMA_TO_DEVICE ? - REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, - bufflen, timeout, retries, - &(struct scsi_exec_args) { - .sshdr = sshdr, - .resid = resid, - }); + return scsi_execute(sdev, cmd, data_direction, buffer, + bufflen, NULL, sshdr, timeout, retries, 0, 0, resid); } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); -- GitLab From 3a9c459091e33f7947c6c7958963e0bd14e737e4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 13 Feb 2024 09:33:06 -0500 Subject: [PATCH 1670/2290] scsi: sd: usb_storage: uas: Access media prior to querying device properties [ Upstream commit 321da3dc1f3c92a12e3c5da934090d2992a8814c ] It has been observed that some USB/UAS devices return generic properties hardcoded in firmware for mode pages for a period of time after a device has been discovered. The reported properties are either garbage or they do not accurately reflect the characteristics of the physical storage device attached in the case of a bridge. Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") we would call revalidate several times during device discovery. As a result, incorrect values would eventually get replaced with ones accurately describing the attached storage. When we did away with the redundant revalidate pass, several cases were reported where devices reported nonsensical values or would end up in write-protected state. An initial attempt at addressing this issue involved introducing a delayed second revalidate invocation. However, this approach still left some devices reporting incorrect characteristics. Tasos Sahanidis debugged the problem further and identified that introducing a READ operation prior to MODE SENSE fixed the problem and that it wasn't a timing issue. Issuing a READ appears to cause the devices to update their state to reflect the actual properties of the storage media. Device properties like vendor, model, and storage capacity appear to be correctly reported from the get-go. It is unclear why these devices defer populating the remaining characteristics. Match the behavior of a well known commercial operating system and trigger a READ operation prior to querying device characteristics to force the device to populate the mode pages. The additional READ is triggered by a flag set in the USB storage and UAS drivers. We avoid issuing the READ for other transport classes since some storage devices identify Linux through our particular discovery command sequence. Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable@vger.kernel.org Reported-by: Tasos Sahanidis Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Tested-by: Tasos Sahanidis Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 26 +++++++++++++++++++++++++- drivers/usb/storage/scsiglue.c | 7 +++++++ drivers/usb/storage/uas.c | 7 +++++++ include/scsi/scsi_device.h | 1 + 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ad619f7c74187..3ec9b324fdcf9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3286,6 +3286,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_req(sdkp->device, cmd, DMA_FROM_DEVICE, buffer, buf_len, + NULL, SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3325,7 +3343,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805da536..12cf9940e5b67 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index ee5621bdb11e2..af619efe8eabf 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -876,6 +876,13 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d2751ed536df2..1504d3137cc69 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -204,6 +204,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ -- GitLab From 649e5646362a2815ec9cc85120fc59a885282085 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 13 Feb 2024 14:54:25 +0100 Subject: [PATCH 1671/2290] virtio: reenable config if freezing device failed commit 310227f42882c52356b523e2f4e11690eebcd2ab upstream. Currently, we don't reenable the config if freezing the device failed. For example, virtio-mem currently doesn't support suspend+resume, and trying to freeze the device will always fail. Afterwards, the device will no longer respond to resize requests, because it won't get notified about config changes. Let's fix this by re-enabling the config if freezing fails. Fixes: 22b7050a024d ("virtio: defer config changed notifications") Cc: Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Xuan Zhuo Signed-off-by: David Hildenbrand Message-Id: <20240213135425.795001-1-david@redhat.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 828ced0607423..1ef094427f299 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -489,13 +489,19 @@ EXPORT_SYMBOL_GPL(unregister_virtio_device); int virtio_device_freeze(struct virtio_device *dev) { struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + int ret; virtio_config_disable(dev); dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; - if (drv && drv->freeze) - return drv->freeze(dev); + if (drv && drv->freeze) { + ret = drv->freeze(dev); + if (ret) { + virtio_config_enable(dev); + return ret; + } + } return 0; } -- GitLab From e80b4980af2688d8ff69c157ffa773dd1f1eb02c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 9 Mar 2024 12:24:48 -0800 Subject: [PATCH 1672/2290] randomize_kstack: Improve entropy diffusion [ Upstream commit 9c573cd313433f6c1f7236fe64b9b743500c1628 ] The kstack_offset variable was really only ever using the low bits for kernel stack offset entropy. Add a ror32() to increase bit diffusion. Suggested-by: Arnd Bergmann Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall") Link: https://lore.kernel.org/r/20240309202445.work.165-kees@kernel.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- include/linux/randomize_kstack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 5d868505a94e4..6d92b68efbf6c 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - offset ^= (rand); \ + offset = ror32(offset, 5) ^ (rand); \ raw_cpu_write(kstack_offset, offset); \ } \ } while (0) -- GitLab From 00e34ff1ce811749c2dbf3ff5ad2322bce54ebcb Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 29 Mar 2024 07:32:06 -0700 Subject: [PATCH 1673/2290] platform/x86: intel-vbtn: Update tablet mode switch at end of probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 434e5781d8cd2d0ed512d920c6cdeba4b33a2e81 ] ACER Vivobook Flip (TP401NAS) virtual intel switch is implemented as follow: Device (VGBI) { Name (_HID, EisaId ("INT33D6") ... Name (VBDS, Zero) Method (_STA, 0, Serialized) // _STA: Status ... Method (VBDL, 0, Serialized) { PB1E |= 0x20 VBDS |= 0x40 } Method (VGBS, 0, Serialized) { Return (VBDS) /* \_SB_.PCI0.SBRG.EC0_.VGBI.VBDS */ } ... } By default VBDS is set to 0. At boot it is set to clamshell (bit 6 set) only after method VBDL is executed. Since VBDL is now evaluated in the probe routine later, after the device is registered, the retrieved value of VBDS was still 0 ("tablet mode") when setting up the virtual switch. Make sure to evaluate VGBS after VBDL, to ensure the convertible boots in clamshell mode, the expected default. Fixes: 26173179fae1 ("platform/x86: intel-vbtn: Eval VBDL after registering our notifier") Signed-off-by: Gwendal Grignou Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240329143206.2977734-3-gwendal@chromium.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/vbtn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index c10c99a31a90a..224139006a433 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device) priv->switches_dev->id.bustype = BUS_HOST; if (priv->has_switches) { - detect_tablet_mode(&device->dev); - ret = input_register_device(priv->switches_dev); if (ret) return ret; @@ -316,6 +314,9 @@ static int intel_vbtn_probe(struct platform_device *device) if (ACPI_FAILURE(status)) dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status); } + // Check switches after buttons since VBDL may have side effects. + if (has_switches) + detect_tablet_mode(&device->dev); device_init_wakeup(&device->dev, true); /* -- GitLab From 21f5cfed1e7a3254de46fee5f0ceeca6bda14b21 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 23 Feb 2024 12:36:23 -0500 Subject: [PATCH 1674/2290] Bluetooth: btintel: Fixe build regression commit 6e62ebfb49eb65bdcbfc5797db55e0ce7f79c3dd upstream. This fixes the following build regression: drivers-bluetooth-btintel.c-btintel_read_version()-warn: passing-zero-to-PTR_ERR Fixes: b79e04091010 ("Bluetooth: btintel: Fix null ptr deref in btintel_read_version") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btintel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index c77c06b84d86c..7a9d2da3c8146 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -405,13 +405,13 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver) struct sk_buff *skb; skb = __hci_cmd_sync(hdev, 0xfc05, 0, NULL, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { bt_dev_err(hdev, "Reading Intel version information failed (%ld)", PTR_ERR(skb)); return PTR_ERR(skb); } - if (skb->len != sizeof(*ver)) { + if (!skb || skb->len != sizeof(*ver)) { bt_dev_err(hdev, "Intel version event size mismatch"); kfree_skb(skb); return -EILSEQ; -- GitLab From cea93dae3e253f03b39403b2e0fd15626feea4ce Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Feb 2024 15:03:10 +0100 Subject: [PATCH 1675/2290] net: mpls: error out if inner headers are not set commit 025f8ad20f2e3264d11683aa9cbbf0083eefbdcd upstream. mpls_gso_segment() assumes skb_inner_network_header() returns a valid result: mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) goto out; if (unlikely(!pskb_may_pull(skb, mpls_hlen))) With syzbot reproducer, skb_inner_network_header() yields 0, skb_network_header() returns 108, so this will "pskb_may_pull(skb, -108)))" which triggers a newly added DEBUG_NET_WARN_ON_ONCE() check: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 pskb_may_pull_reason include/linux/skbuff.h:2723 [inline] WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 pskb_may_pull include/linux/skbuff.h:2739 [inline] WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 mpls_gso_segment+0x773/0xaa0 net/mpls/mpls_gso.c:34 [..] skb_mac_gso_segment+0x383/0x740 net/core/gso.c:53 nsh_gso_segment+0x40a/0xad0 net/nsh/nsh.c:108 skb_mac_gso_segment+0x383/0x740 net/core/gso.c:53 __skb_gso_segment+0x324/0x4c0 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] [..] sch_direct_xmit+0x11a/0x5f0 net/sched/sch_generic.c:327 [..] packet_sendmsg+0x46a9/0x6130 net/packet/af_packet.c:3113 [..] First iteration of this patch made mpls_hlen signed and changed test to error out to "mpls_hlen <= 0 || ..". Eric Dumazet said: > I was thinking about adding a debug check in skb_inner_network_header() > if inner_network_header is zero (that would mean it is not 'set' yet), > but this would trigger even after your patch. So add new skb_inner_network_header_was_set() helper and use that. The syzbot reproducer injects data via packet socket. The skb that gets allocated and passed down the stack has ->protocol set to NSH (0x894f) and gso_type set to SKB_GSO_UDP | SKB_GSO_DODGY. This gets passed to skb_mac_gso_segment(), which sees NSH as ptype to find a callback for. nsh_gso_segment() retrieves next type: proto = tun_p_to_eth_p(nsh_hdr(skb)->np); ... which is MPLS (TUN_P_MPLS_UC). It updates skb->protocol and then calls mpls_gso_segment(). Inner offsets are all 0, so mpls_gso_segment() ends up with a negative header size. In case more callers rely on silent handling of such large may_pull values we could also 'legalize' this behaviour, either replacing the debug check with (len > INT_MAX) test or removing it and instead adding a comment before existing if (unlikely(len > skb->len)) return SKB_DROP_REASON_PKT_TOO_SMALL; test in pskb_may_pull_reason(), saying that this check also implicitly takes care of callers that miscompute header sizes. Cc: Simon Horman Fixes: 219eee9c0d16 ("net: skbuff: add overflow debug check to pull/push helpers") Reported-by: syzbot+99d15fcdb0132a1e1a82@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/00000000000043b1310611e388aa@google.com/raw Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20240222140321.14080-1-fw@strlen.de Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 5 +++++ net/mpls/mpls_gso.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1326a935b6fad..d5f888fe0e331 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2802,6 +2802,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) +{ + return skb->inner_network_header > 0; +} + static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 1482259de9b5d..40334d4d89dea 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -26,6 +26,9 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, __be16 mpls_protocol; unsigned int mpls_hlen; + if (!skb_inner_network_header_was_set(skb)) + goto out; + skb_reset_network_header(skb); mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) -- GitLab From eea65ed73898e6e94ba44b21836f911d64ec59cb Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Mon, 19 Feb 2024 13:53:15 +0300 Subject: [PATCH 1676/2290] VMCI: Fix possible memcpy() run-time warning in vmci_datagram_invoke_guest_handler() commit e606e4b71798cc1df20e987dde2468e9527bd376 upstream. The changes are similar to those given in the commit 19b070fefd0d ("VMCI: Fix memcpy() run-time warning in dg_dispatch_as_host()"). Fix filling of the msg and msg_payload in dg_info struct, which prevents a possible "detected field-spanning write" of memcpy warning that is issued by the tracking mechanism __fortify_memcpy_chk. Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240219105315.76955-1-kovalev@altlinux.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_datagram.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c index d1d8224c8800c..a0ad1f3a69f7e 100644 --- a/drivers/misc/vmw_vmci/vmci_datagram.c +++ b/drivers/misc/vmw_vmci/vmci_datagram.c @@ -378,7 +378,8 @@ int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg) dg_info->in_dg_host_queue = false; dg_info->entry = dst_entry; - memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg)); + dg_info->msg = *dg; + memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); -- GitLab From c5f9fe2c1e5023fa096189a8bfba6420aa035587 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Tue, 19 Mar 2024 15:24:03 +0800 Subject: [PATCH 1677/2290] Revert "drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 03c6284df179de3a4a6e0684764b1c71d2a405e2 upstream. This patch causes the following iounmap erorr and calltrace iounmap: bad address 00000000d0b3631f The original patch was unjustified because amdgpu_device_fini_sw() will always cleanup the rmmio mapping. This reverts commit eb4f139888f636614dab3bcce97ff61cefc4b3a7. Signed-off-by: Ma Jun Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e4eb906806a51..b11690a816e73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3713,10 +3713,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); - if (!adev->reset_domain) { - r = -ENOMEM; - goto unmap_memory; - } + if (!adev->reset_domain) + return -ENOMEM; /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); @@ -3724,18 +3722,18 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - goto unmap_memory; + return r; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - goto unmap_memory; + return r; /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) - goto unmap_memory; + return r; /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -3745,7 +3743,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); if (r) - goto unmap_memory; + return r; } /* enable PCIE atomic ops */ @@ -4001,8 +3999,6 @@ release_ras_con: failed: amdgpu_vf_error_trans_all(adev); -unmap_memory: - iounmap(adev->rmmio); return r; } -- GitLab From cd5d98c0556cd790f78a3ba26afc9d2f896163e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 13 Apr 2024 13:05:29 +0200 Subject: [PATCH 1678/2290] Linux 6.1.86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240411095412.671665933@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: kernelci.org bot Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Mateusz Jończyk Tested-by: Kelsey Steele Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5dff9ff999981..baddd8ed81868 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 85 +SUBLEVEL = 86 EXTRAVERSION = NAME = Curry Ramen -- GitLab From ca5962bdc53c86d3aa23fa619aa52be194f1e20a Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 6 Apr 2024 23:16:08 -0500 Subject: [PATCH 1679/2290] smb3: fix Open files on server counter going negative commit 28e0947651ce6a2200b9a7eceb93282e97d7e51a upstream. We were decrementing the count of open files on server twice for the case where we were closing cached directories. Fixes: 8e843bf38f7b ("cifs: return a single-use cfid if we did not get a lease") Cc: stable@vger.kernel.org Acked-by: Bharath SM Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index f4ad343b06c1f..2ca1881919c7b 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -386,8 +386,8 @@ smb2_close_cached_fid(struct kref *ref) if (cfid->is_open) { rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, cfid->fid.volatile_fid); - if (rc != -EBUSY && rc != -EAGAIN) - atomic_dec(&cfid->tcon->num_remote_opens); + if (rc) /* should we retry on -EBUSY or -EAGAIN? */ + cifs_dbg(VFS, "close cached dir rc %d\n", rc); } free_cached_dir(cfid); -- GitLab From 0559b2d759be06bb92a0470d88d2f1207db20211 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 12 Apr 2024 08:41:15 +0900 Subject: [PATCH 1680/2290] ata: libata-scsi: Fix ata_scsi_dev_rescan() error path commit 79336504781e7fee5ddaf046dcc186c8dfdf60b1 upstream. Commit 0c76106cb975 ("scsi: sd: Fix TCG OPAL unlock on system resume") incorrectly handles failures of scsi_resume_device() in ata_scsi_dev_rescan(), leading to a double call to spin_unlock_irqrestore() to unlock a device port. Fix this by redefining the goto labels used in case of errors and only unlock the port scsi_scan_mutex when scsi_resume_device() fails. Bug found with the Smatch static checker warning: drivers/ata/libata-scsi.c:4774 ata_scsi_dev_rescan() error: double unlocked 'ap->lock' (orig line 4757) Reported-by: Dan Carpenter Fixes: 0c76106cb975 ("scsi: sd: Fix TCG OPAL unlock on system resume") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a09548630fc8b..65fde5717928b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4667,7 +4667,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) * bail out. */ if (ap->pflags & ATA_PFLAG_SUSPENDED) - goto unlock; + goto unlock_ap; if (!sdev) continue; @@ -4680,7 +4680,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (do_resume) { ret = scsi_resume_device(sdev); if (ret == -EWOULDBLOCK) - goto unlock; + goto unlock_scan; dev->flags &= ~ATA_DFLAG_RESUMING; } ret = scsi_rescan_device(sdev); @@ -4688,12 +4688,13 @@ void ata_scsi_dev_rescan(struct work_struct *work) spin_lock_irqsave(ap->lock, flags); if (ret) - goto unlock; + goto unlock_ap; } } -unlock: +unlock_ap: spin_unlock_irqrestore(ap->lock, flags); +unlock_scan: mutex_unlock(&ap->scsi_scan_mutex); /* Reschedule with a delay if scsi_rescan_device() returned an error */ -- GitLab From 3fe79b2c83461edbbf86ed8a6f3924820ff89259 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 12 Feb 2024 13:58:33 +0100 Subject: [PATCH 1681/2290] batman-adv: Avoid infinite loop trying to resize local TT commit b1f532a3b1e6d2e5559c7ace49322922637a28aa upstream. If the MTU of one of an attached interface becomes too small to transmit the local translation table then it must be resized to fit inside all fragments (when enabled) or a single packet. But if the MTU becomes too low to transmit even the header + the VLAN specific part then the resizing of the local TT will never succeed. This can for example happen when the usable space is 110 bytes and 11 VLANs are on top of batman-adv. In this case, at least 116 byte would be needed. There will just be an endless spam of batman_adv: batadv0: Forced to purge local tt entries to fit new maximum fragment MTU (110) in the log but the function will never finish. Problem here is that the timeout will be halved all the time and will then stagnate at 0 and therefore never be able to reduce the table even more. There are other scenarios possible with a similar result. The number of BATADV_TT_CLIENT_NOPURGE entries in the local TT can for example be too high to fit inside a packet. Such a scenario can therefore happen also with only a single VLAN + 7 non-purgable addresses - requiring at least 120 bytes. While this should be handled proactively when: * interface with too low MTU is added * VLAN is added * non-purgeable local mac is added * MTU of an attached interface is reduced * fragmentation setting gets disabled (which most likely requires dropping attached interfaces) not all of these scenarios can be prevented because batman-adv is only consuming events without the the possibility to prevent these actions (non-purgable MAC address added, MTU of an attached interface is reduced). It is therefore necessary to also make sure that the code is able to handle also the situations when there were already incompatible system configuration are present. Cc: stable@vger.kernel.org Fixes: a19d3d85e1b8 ("batman-adv: limit local translation table max size") Reported-by: syzbot+a6a4b5bb3da165594cff@syzkaller.appspotmail.com Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5d8cee74772fe..4fc66cd95dc47 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) spin_lock_bh(&bat_priv->tt.commit_lock); - while (true) { + while (timeout) { table_size = batadv_tt_local_table_transmit_size(bat_priv); if (packet_size_max >= table_size) break; -- GitLab From 2d5f12de4cf589c85a0f33ef66f68d38b8f24970 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 9 Apr 2024 15:13:09 -0400 Subject: [PATCH 1682/2290] ring-buffer: Only update pages_touched when a new page is touched commit ffe3986fece696cf65e0ef99e74c75f848be8e30 upstream. The "buffer_percent" logic that is used by the ring buffer splice code to only wake up the tasks when there's no data after the buffer is filled to the percentage of the "buffer_percent" file is dependent on three variables that determine the amount of data that is in the ring buffer: 1) pages_read - incremented whenever a new sub-buffer is consumed 2) pages_lost - incremented every time a writer overwrites a sub-buffer 3) pages_touched - incremented when a write goes to a new sub-buffer The percentage is the calculation of: (pages_touched - (pages_lost + pages_read)) / nr_pages Basically, the amount of data is the total number of sub-bufs that have been touched, minus the number of sub-bufs lost and sub-bufs consumed. This is divided by the total count to give the buffer percentage. When the percentage is greater than the value in the "buffer_percent" file, it wakes up splice readers waiting for that amount. It was observed that over time, the amount read from the splice was constantly decreasing the longer the trace was running. That is, if one asked for 60%, it would read over 60% when it first starts tracing, but then it would be woken up at under 60% and would slowly decrease the amount of data read after being woken up, where the amount becomes much less than the buffer percent. This was due to an accounting of the pages_touched incrementation. This value is incremented whenever a writer transfers to a new sub-buffer. But the place where it was incremented was incorrect. If a writer overflowed the current sub-buffer it would go to the next one. If it gets preempted by an interrupt at that time, and the interrupt performs a trace, it too will end up going to the next sub-buffer. But only one should increment the counter. Unfortunately, that was not the case. Change the cmpxchg() that does the real switch of the tail-page into a try_cmpxchg(), and on success, perform the increment of pages_touched. This will only increment the counter once for when the writer moves to a new sub-buffer, and not when there's a race and is incremented for when a writer and its preempting writer both move to the same new sub-buffer. Link: https://lore.kernel.org/linux-trace-kernel/20240409151309.0d0e5056@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d2947de3021a9..337162e0c3d53 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1543,7 +1543,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); - local_inc(&cpu_buffer->pages_touched); /* * Just make sure we have seen our old_write and synchronize * with any interrupts that come in. @@ -1580,8 +1579,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, */ local_set(&next_page->page->commit, 0); - /* Again, either we update tail_page or an interrupt does */ - (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page); + /* Either we update tail_page or an interrupt does */ + if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page)) + local_inc(&cpu_buffer->pages_touched); } } -- GitLab From 66fab1e120b39f8f47a94186ddee36006fc02ca8 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 2 Apr 2024 14:32:05 +0300 Subject: [PATCH 1683/2290] Bluetooth: Fix memory leak in hci_req_sync_complete() commit 45d355a926ab40f3ae7bc0b0a00cb0e3e8a5a810 upstream. In 'hci_req_sync_complete()', always free the previous sync request state before assigning reference to a new one. Reported-by: syzbot+39ec16ff6cc18b1d066d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=39ec16ff6cc18b1d066d Cc: stable@vger.kernel.org Fixes: f60cb30579d3 ("Bluetooth: Convert hci_req_sync family of function to new request API") Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 4468647df6722..cf69e973b724f 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; - if (skb) + if (skb) { + kfree_skb(hdev->req_skb); hdev->req_skb = skb_get(skb); + } wake_up_interruptible(&hdev->req_wait_q); } } -- GitLab From bd9b94055c3deb2398ee4490c1dfdf03f53efb8f Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 27 Mar 2024 13:10:37 +0800 Subject: [PATCH 1684/2290] drm/amd/pm: fixes a random hang in S4 for SMU v13.0.4/11 commit 31729e8c21ecfd671458e02b6511eb68c2225113 upstream. While doing multiple S4 stress tests, GC/RLC/PMFW get into an invalid state resulting into hard hangs. Adding a GFX reset as workaround just before sending the MP1_UNLOAD message avoids this failure. Signed-off-by: Tim Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 6d9760eac16d8..21b374d121819 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -222,8 +222,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en) struct amdgpu_device *adev = smu->adev; int ret = 0; - if (!en && !adev->in_s0ix) + if (!en && !adev->in_s0ix) { + /* Adds a GFX reset as workaround just before sending the + * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering + * an invalid state. + */ + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, + SMU_RESET_MODE_2, NULL); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); + } return ret; } -- GitLab From bccc8d15509273c49cfb2541003febe1469fa680 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 8 Apr 2024 09:02:23 +0200 Subject: [PATCH 1685/2290] PM: s2idle: Make sure CPUs will wakeup directly on resume commit 3c89a068bfd0698a5478f4cf39493595ef757d5e upstream. s2idle works like a regular suspend with freezing processes and freezing devices. All CPUs except the control CPU go into idle. Once this is completed the control CPU kicks all other CPUs out of idle, so that they reenter the idle loop and then enter s2idle state. The control CPU then issues an swait() on the suspend state and therefore enters the idle loop as well. Due to being kicked out of idle, the other CPUs leave their NOHZ states, which means the tick is active and the corresponding hrtimer is programmed to the next jiffie. On entering s2idle the CPUs shut down their local clockevent device to prevent wakeups. The last CPU which enters s2idle shuts down its local clockevent and freezes timekeeping. On resume, one of the CPUs receives the wakeup interrupt, unfreezes timekeeping and its local clockevent and starts the resume process. At that point all other CPUs are still in s2idle with their clockevents switched off. They only resume when they are kicked by another CPU or after resuming devices and then receiving a device interrupt. That means there is no guarantee that all CPUs will wakeup directly on resume. As a consequence there is no guarantee that timers which are queued on those CPUs and should expire directly after resume, are handled. Also timer list timers which are remotely queued to one of those CPUs after resume will not result in a reprogramming IPI as the tick is active. Queueing a hrtimer will also not result in a reprogramming IPI because the first hrtimer event is already in the past. The recent introduction of the timer pull model (7ee988770326 ("timers: Implement the hierarchical pull model")) amplifies this problem, if the current migrator is one of the non woken up CPUs. When a non pinned timer list timer is queued and the queuing CPU goes idle, it relies on the still suspended migrator CPU to expire the timer which will happen by chance. The problem exists since commit 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path"). There the cpuidle_pause() call which in turn invoked a wakeup for all idle CPUs was moved to a later point in the resume process. This might not be reached or reached very late because it waits on a timer of a still suspended CPU. Address this by kicking all CPUs out of idle after the control CPU returns from swait() so that they resume their timers and restore consistent system state. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218641 Fixes: 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path") Signed-off-by: Anna-Maria Behnsen Reviewed-by: Thomas Gleixner Tested-by: Mario Limonciello Cc: 5.16+ # 5.16+ Acked-by: Peter Zijlstra (Intel) Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/suspend.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index a718067deecee..3aae526cc4aac 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -106,6 +106,12 @@ static void s2idle_enter(void) swait_event_exclusive(s2idle_wait_head, s2idle_state == S2IDLE_STATE_WAKE); + /* + * Kick all CPUs to ensure that they resume their timers and restore + * consistent system state. + */ + wake_up_all_idle_cpus(); + cpus_read_unlock(); raw_spin_lock_irq(&s2idle_lock); -- GitLab From 49054b3ed24954827c7691db78831a86da0cdb47 Mon Sep 17 00:00:00 2001 From: Nini Song Date: Thu, 25 Jan 2024 21:28:45 +0800 Subject: [PATCH 1686/2290] media: cec: core: remove length check of Timer Status commit ce5d241c3ad4568c12842168288993234345c0eb upstream. The valid_la is used to check the length requirements, including special cases of Timer Status. If the length is shorter than 5, that means no Duration Available is returned, the message will be forced to be invalid. However, the description of Duration Available in the spec is that this parameter may be returned when these cases, or that it can be optionally return when these cases. The key words in the spec description are flexible choices. Remove the special length check of Timer Status to fit the spec which is not compulsory about that. Signed-off-by: Nini Song Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/core/cec-adap.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index 4bc2a705029e6..c761ac35e120d 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1121,20 +1121,6 @@ void cec_received_msg_ts(struct cec_adapter *adap, if (valid_la && min_len) { /* These messages have special length requirements */ switch (cmd) { - case CEC_MSG_TIMER_STATUS: - if (msg->msg[2] & 0x10) { - switch (msg->msg[2] & 0xf) { - case CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE: - case CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE: - if (msg->len < 5) - valid_la = false; - break; - } - } else if ((msg->msg[2] & 0xf) == CEC_OP_PROG_ERROR_DUPLICATE) { - if (msg->len < 5) - valid_la = false; - } - break; case CEC_MSG_RECORD_ON: switch (msg->msg[2]) { case CEC_OP_RECORD_SRC_OWN: -- GitLab From 24c0c5867ad279926a6eba411133eb962a1d55fb Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 22 Mar 2024 12:47:05 -0400 Subject: [PATCH 1687/2290] arm64: dts: imx8-ss-conn: fix usdhc wrong lpcg clock order [ Upstream commit c6ddd6e7b166532a0816825442ff60f70aed9647 ] The actual clock show wrong frequency: echo on >/sys/devices/platform/bus\@5b000000/5b010000.mmc/power/control cat /sys/kernel/debug/mmc0/ios clock: 200000000 Hz actual clock: 166000000 Hz ^^^^^^^^^ ..... According to sdhc0_lpcg: clock-controller@5b200000 { compatible = "fsl,imx8qxp-lpcg"; reg = <0x5b200000 0x10000>; #clock-cells = <1>; clocks = <&clk IMX_SC_R_SDHC_0 IMX_SC_PM_CLK_PER>, <&conn_ipg_clk>, <&conn_axi_clk>; clock-indices = , , ; clock-output-names = "sdhc0_lpcg_per_clk", "sdhc0_lpcg_ipg_clk", "sdhc0_lpcg_ahb_clk"; power-domains = <&pd IMX_SC_R_SDHC_0>; } "per_clk" should be IMX_LPCG_CLK_0 instead of IMX_LPCG_CLK_5. After correct clocks order: echo on >/sys/devices/platform/bus\@5b000000/5b010000.mmc/power/control cat /sys/kernel/debug/mmc0/ios clock: 200000000 Hz actual clock: 198000000 Hz ^^^^^^^^ ... Fixes: 16c4ea7501b1 ("arm64: dts: imx8: switch to new lpcg clock binding") Signed-off-by: Frank Li Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 10370d1a6c6de..dbb298b907c1c 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -38,8 +38,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b010000 0x10000>; clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, - <&sdhc0_lpcg IMX_LPCG_CLK_0>, - <&sdhc0_lpcg IMX_LPCG_CLK_5>; + <&sdhc0_lpcg IMX_LPCG_CLK_5>, + <&sdhc0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_0>; status = "disabled"; @@ -49,8 +49,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b020000 0x10000>; clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, - <&sdhc1_lpcg IMX_LPCG_CLK_0>, - <&sdhc1_lpcg IMX_LPCG_CLK_5>; + <&sdhc1_lpcg IMX_LPCG_CLK_5>, + <&sdhc1_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_1>; fsl,tuning-start-tap = <20>; @@ -62,8 +62,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b030000 0x10000>; clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, - <&sdhc2_lpcg IMX_LPCG_CLK_0>, - <&sdhc2_lpcg IMX_LPCG_CLK_5>; + <&sdhc2_lpcg IMX_LPCG_CLK_5>, + <&sdhc2_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_2>; status = "disabled"; -- GitLab From 84fb60063509e462e39c0e097c7d6dbb71c95967 Mon Sep 17 00:00:00 2001 From: Alex Constantino Date: Thu, 4 Apr 2024 19:14:48 +0100 Subject: [PATCH 1688/2290] Revert "drm/qxl: simplify qxl_fence_wait" [ Upstream commit 07ed11afb68d94eadd4ffc082b97c2331307c5ea ] This reverts commit 5a838e5d5825c85556011478abde708251cc0776. Changes from commit 5a838e5d5825 ("drm/qxl: simplify qxl_fence_wait") would result in a '[TTM] Buffer eviction failed' exception whenever it reached a timeout. Due to a dependency to DMA_FENCE_WARN this also restores some code deleted by commit d72277b6c37d ("dma-buf: nuke DMA_FENCE_TRACE macros v2"). Fixes: 5a838e5d5825 ("drm/qxl: simplify qxl_fence_wait") Link: https://lore.kernel.org/regressions/ZTgydqRlK6WX_b29@eldamar.lan/ Reported-by: Timo Lindfors Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1054514 Signed-off-by: Alex Constantino Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240404181448.1643-2-dreaming.about.electric.sheep@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/qxl/qxl_release.c | 50 +++++++++++++++++++++++++++---- include/linux/dma-fence.h | 7 +++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 368d26da0d6a2..9febc8b73f09e 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct qxl_device *qdev; + struct qxl_release *release; + int count = 0, sc = 0; + bool have_drawable_releases; unsigned long cur, end = jiffies + timeout; qdev = container_of(fence->lock, struct qxl_device, release_lock); + release = container_of(fence, struct qxl_release, base); + have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE; - if (!wait_event_timeout(qdev->release_event, - (dma_fence_is_signaled(fence) || - (qxl_io_notify_oom(qdev), 0)), - timeout)) - return 0; +retry: + sc++; + + if (dma_fence_is_signaled(fence)) + goto signaled; + + qxl_io_notify_oom(qdev); + + for (count = 0; count < 11; count++) { + if (!qxl_queue_garbage_collect(qdev, true)) + break; + + if (dma_fence_is_signaled(fence)) + goto signaled; + } + + if (dma_fence_is_signaled(fence)) + goto signaled; + + if (have_drawable_releases || sc < 4) { + if (sc > 2) + /* back off */ + usleep_range(500, 1000); + + if (time_after(jiffies, end)) + return 0; + + if (have_drawable_releases && sc > 300) { + DMA_FENCE_WARN(fence, + "failed to wait on release %llu after spincount %d\n", + fence->context & ~0xf0000000, sc); + goto signaled; + } + goto retry; + } + /* + * yeah, original sync_obj_wait gave up after 3 spins when + * have_drawable_releases is not set. + */ +signaled: cur = jiffies; if (time_after(cur, end)) return 0; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index b79097b9070b3..5d6a5f3097cd0 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -659,4 +659,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence) return dma_fence_is_array(fence) || dma_fence_is_chain(fence); } +#define DMA_FENCE_WARN(f, fmt, args...) \ + do { \ + struct dma_fence *__ff = (f); \ + pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ + ##args); \ + } while (0) + #endif /* __LINUX_DMA_FENCE_H */ -- GitLab From b7dc2e6b8798518ee69b1bcf67f0f8374af91dd2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Apr 2024 18:02:25 +0200 Subject: [PATCH 1689/2290] nouveau: fix function cast warning [ Upstream commit 185fdb4697cc9684a02f2fab0530ecdd0c2f15d4 ] Calling a function through an incompatible pointer type causes breaks kcfi, so clang warns about the assignment: drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c:73:10: error: cast from 'void (*)(const void *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 73 | .fini = (void(*)(void *))kfree, Avoid this with a trivial wrapper. Fixes: c39f472e9f14 ("drm/nouveau: remove symlinks, move core/ to nvkm/ (no code changes)") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240404160234.2923554-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c index 4bf486b571013..cb05f7f48a98b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c @@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name) return ERR_PTR(-EINVAL); } +static void of_fini(void *p) +{ + kfree(p); +} + const struct nvbios_source nvbios_of = { .name = "OpenFirmware", .init = of_init, - .fini = (void(*)(void *))kfree, + .fini = of_fini, .read = of_read, .size = of_size, .rw = false, -- GitLab From 438b9a71b25abbf27a49c977200dcb708b0a1643 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Tue, 2 Apr 2024 11:55:13 +0800 Subject: [PATCH 1690/2290] scsi: hisi_sas: Modify the deadline for ata_wait_after_reset() [ Upstream commit 0098c55e0881f0b32591f2110410d5c8b7f9bd5a ] We found that the second parameter of function ata_wait_after_reset() is incorrectly used. We call smp_ata_check_ready_type() to poll the device type until the 30s timeout, so the correct deadline should be (jiffies + 30000). Fixes: 3c2673a09cf1 ("scsi: hisi_sas: Fix SATA devices missing issue during I_T nexus reset") Co-developed-by: xiabing Signed-off-by: xiabing Co-developed-by: Yihang Li Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/20240402035513.2024241-3-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 450a8578157cb..2116f5ee36e20 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1715,7 +1715,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device) if (dev_is_sata(device)) { struct ata_link *link = &device->sata_dev.ap->link; - rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT, + rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT, smp_ata_check_ready_type); } else { msleep(2000); -- GitLab From 9fc74e367be4247a5ac39bb8ec41eaa73fade510 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Apr 2024 12:56:54 +0300 Subject: [PATCH 1691/2290] scsi: qla2xxx: Fix off by one in qla_edif_app_getstats() [ Upstream commit 4406e4176f47177f5e51b4cc7e6a7a2ff3dbfbbd ] The app_reply->elem[] array is allocated earlier in this function and it has app_req.num_ports elements. Thus this > comparison needs to be >= to prevent memory corruption. Fixes: 7878f22a2e03 ("scsi: qla2xxx: edif: Add getfcinfo and statistic bsgs") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/5c125b2f-92dd-412b-9b6f-fc3a3207bd60@moroto.mountain Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_edif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index 7aee4d093969a..969008071decd 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -1058,7 +1058,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job) list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) { if (fcport->edif.enable) { - if (pcnt > app_req.num_ports) + if (pcnt >= app_req.num_ports) break; app_reply->elem[pcnt].rekey_count = -- GitLab From 0b445005599d915991361b5f16957f45a0cd0425 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 3 Apr 2024 22:38:01 +0200 Subject: [PATCH 1692/2290] net: openvswitch: fix unwanted error log on timeout policy probing [ Upstream commit 4539f91f2a801c0c028c252bffae56030cfb2cae ] On startup, ovs-vswitchd probes different datapath features including support for timeout policies. While probing, it tries to execute certain operations with OVS_PACKET_ATTR_PROBE or OVS_FLOW_ATTR_PROBE attributes set. These attributes tell the openvswitch module to not log any errors when they occur as it is expected that some of the probes will fail. For some reason, setting the timeout policy ignores the PROBE attribute and logs a failure anyway. This is causing the following kernel log on each re-start of ovs-vswitchd: kernel: Failed to associated timeout policy `ovs_test_tp' Fix that by using the same logging macro that all other messages are using. The message will still be printed at info level when needed and will be rate limited, but with a net rate limiter instead of generic printk one. The nf_ct_set_timeout() itself will still print some info messages, but at least this change makes logging in openvswitch module more consistent. Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") Signed-off-by: Ilya Maximets Acked-by: Eelco Chaudron Link: https://lore.kernel.org/r/20240403203803.2137962-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/openvswitch/conntrack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0591cfb289d50..e4ba86b84b9b1 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1711,8 +1711,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) - pr_info_ratelimited("Failed to associated timeout " - "policy `%s'\n", ct_info.timeout); + OVS_NLERR(log, + "Failed to associated timeout policy '%s'", + ct_info.timeout); else ct_info.nf_ct_timeout = rcu_dereference( nf_ct_timeout_find(ct_info.ct)->timeout); -- GitLab From ac1c10b4ebdf7b1f4211524030e089d8f9c64d52 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 4 Apr 2024 09:57:40 +0200 Subject: [PATCH 1693/2290] u64_stats: fix u64_stats_init() for lockdep when used repeatedly in one file [ Upstream commit 38a15d0a50e0a43778561a5861403851f0b0194c ] Fix bogus lockdep warnings if multiple u64_stats_sync variables are initialized in the same file. With CONFIG_LOCKDEP, seqcount_init() is a macro which declares: static struct lock_class_key __key; Since u64_stats_init() is a function (albeit an inline one), all calls within the same file end up using the same instance, effectively treating them all as a single lock-class. Fixes: 9464ca650008 ("net: make u64_stats_init() a function") Closes: https://lore.kernel.org/netdev/ea1567d9-ce66-45e6-8168-ac40a47d1821@roeck-us.net/ Signed-off-by: Petr Tesarik Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240404075740.30682-1-petr@tesarici.cz Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/u64_stats_sync.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 46040d66334a8..79c3bbaa7e13e 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p) p->v++; } -static inline void u64_stats_init(struct u64_stats_sync *syncp) -{ - seqcount_init(&syncp->seq); -} +#define u64_stats_init(syncp) \ + do { \ + struct u64_stats_sync *__s = (syncp); \ + seqcount_init(&__s->seq); \ + } while (0) static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { -- GitLab From 2a523f14a3f53b46ff0e1fafd215b0bc5f6783aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Apr 2024 20:27:38 +0000 Subject: [PATCH 1694/2290] xsk: validate user input for XDP_{UMEM|COMPLETION}_FILL_RING MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 237f3cf13b20db183d3706d997eedc3c49eacd44 ] syzbot reported an illegal copy in xsk_setsockopt() [1] Make sure to validate setsockopt() @optlen parameter. [1] BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] BUG: KASAN: slab-out-of-bounds in copy_from_sockptr include/linux/sockptr.h:55 [inline] BUG: KASAN: slab-out-of-bounds in xsk_setsockopt+0x909/0xa40 net/xdp/xsk.c:1420 Read of size 4 at addr ffff888028c6cde3 by task syz-executor.0/7549 CPU: 0 PID: 7549 Comm: syz-executor.0 Not tainted 6.8.0-syzkaller-08951-gfe46a7dd189e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] copy_from_sockptr include/linux/sockptr.h:55 [inline] xsk_setsockopt+0x909/0xa40 net/xdp/xsk.c:1420 do_sock_setsockopt+0x3af/0x720 net/socket.c:2311 __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 RIP: 0033:0x7fb40587de69 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fb40665a0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007fb4059abf80 RCX: 00007fb40587de69 RDX: 0000000000000005 RSI: 000000000000011b RDI: 0000000000000006 RBP: 00007fb4058ca47a R08: 0000000000000002 R09: 0000000000000000 R10: 0000000020001980 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fb4059abf80 R15: 00007fff57ee4d08 Allocated by task 7549: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:370 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:387 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slub.c:3966 [inline] __kmalloc+0x233/0x4a0 mm/slub.c:3979 kmalloc include/linux/slab.h:632 [inline] __cgroup_bpf_run_filter_setsockopt+0xd2f/0x1040 kernel/bpf/cgroup.c:1869 do_sock_setsockopt+0x6b4/0x720 net/socket.c:2293 __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 The buggy address belongs to the object at ffff888028c6cde0 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 1 bytes to the right of allocated 2-byte region [ffff888028c6cde0, ffff888028c6cde2) The buggy address belongs to the physical page: page:ffffea0000a31b00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888028c6c9c0 pfn:0x28c6c anon flags: 0xfff00000000800(slab|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000800 ffff888014c41280 0000000000000000 dead000000000001 raw: ffff888028c6c9c0 0000000080800057 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112cc0(GFP_USER|__GFP_NOWARN|__GFP_NORETRY), pid 6648, tgid 6644 (syz-executor.0), ts 133906047828, free_ts 133859922223 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1ea/0x210 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0x33ea/0x3580 mm/page_alloc.c:3311 __alloc_pages+0x256/0x680 mm/page_alloc.c:4569 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page+0x5f/0x160 mm/slub.c:2175 allocate_slab mm/slub.c:2338 [inline] new_slab+0x84/0x2f0 mm/slub.c:2391 ___slab_alloc+0xc73/0x1260 mm/slub.c:3525 __slab_alloc mm/slub.c:3610 [inline] __slab_alloc_node mm/slub.c:3663 [inline] slab_alloc_node mm/slub.c:3835 [inline] __do_kmalloc_node mm/slub.c:3965 [inline] __kmalloc_node+0x2db/0x4e0 mm/slub.c:3973 kmalloc_node include/linux/slab.h:648 [inline] __vmalloc_area_node mm/vmalloc.c:3197 [inline] __vmalloc_node_range+0x5f9/0x14a0 mm/vmalloc.c:3392 __vmalloc_node mm/vmalloc.c:3457 [inline] vzalloc+0x79/0x90 mm/vmalloc.c:3530 bpf_check+0x260/0x19010 kernel/bpf/verifier.c:21162 bpf_prog_load+0x1667/0x20f0 kernel/bpf/syscall.c:2895 __sys_bpf+0x4ee/0x810 kernel/bpf/syscall.c:5631 __do_sys_bpf kernel/bpf/syscall.c:5738 [inline] __se_sys_bpf kernel/bpf/syscall.c:5736 [inline] __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5736 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 page last free pid 6650 tgid 6647 stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1140 [inline] free_unref_page_prepare+0x95d/0xa80 mm/page_alloc.c:2346 free_unref_page_list+0x5a3/0x850 mm/page_alloc.c:2532 release_pages+0x2117/0x2400 mm/swap.c:1042 tlb_batch_pages_flush mm/mmu_gather.c:98 [inline] tlb_flush_mmu_free mm/mmu_gather.c:293 [inline] tlb_flush_mmu+0x34d/0x4e0 mm/mmu_gather.c:300 tlb_finish_mmu+0xd4/0x200 mm/mmu_gather.c:392 exit_mmap+0x4b6/0xd40 mm/mmap.c:3300 __mmput+0x115/0x3c0 kernel/fork.c:1345 exit_mm+0x220/0x310 kernel/exit.c:569 do_exit+0x99e/0x27e0 kernel/exit.c:865 do_group_exit+0x207/0x2c0 kernel/exit.c:1027 get_signal+0x176e/0x1850 kernel/signal.c:2907 arch_do_signal_or_restart+0x96/0x860 arch/x86/kernel/signal.c:310 exit_to_user_mode_loop kernel/entry/common.c:105 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:201 [inline] syscall_exit_to_user_mode+0xc9/0x360 kernel/entry/common.c:212 do_syscall_64+0x10a/0x240 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Memory state around the buggy address: ffff888028c6cc80: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc ffff888028c6cd00: fa fc fc fc fa fc fc fc 00 fc fc fc 06 fc fc fc >ffff888028c6cd80: fa fc fc fc fa fc fc fc fa fc fc fc 02 fc fc fc ^ ffff888028c6ce00: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc ffff888028c6ce80: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc Fixes: 423f38329d26 ("xsk: add umem fill queue support and mmap") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: "Björn Töpel" Cc: Magnus Karlsson Cc: Maciej Fijalkowski Cc: Jonathan Lemon Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20240404202738.3634547-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/xdp/xsk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5c8e02d56fd43..e3bdfc517424d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1127,6 +1127,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, struct xsk_queue **q; int entries; + if (optlen < sizeof(entries)) + return -EINVAL; if (copy_from_sockptr(&entries, optval, sizeof(entries))) return -EFAULT; -- GitLab From 4a1b65d1e55d53b397cb27014208be1e04172670 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Apr 2024 10:30:34 +0000 Subject: [PATCH 1695/2290] geneve: fix header validation in geneve[6]_xmit_skb [ Upstream commit d8a6213d70accb403b82924a1c229e733433a5ef ] syzbot is able to trigger an uninit-value in geneve_xmit() [1] Problem : While most ip tunnel helpers (like ip_tunnel_get_dsfield()) uses skb_protocol(skb, true), pskb_inet_may_pull() is only using skb->protocol. If anything else than ETH_P_IPV6 or ETH_P_IP is found in skb->protocol, pskb_inet_may_pull() does nothing at all. If a vlan tag was provided by the caller (af_packet in the syzbot case), the network header might not point to the correct location, and skb linear part could be smaller than expected. Add skb_vlan_inet_prepare() to perform a complete mac validation. Use this in geneve for the moment, I suspect we need to adopt this more broadly. v4 - Jakub reported v3 broke l2_tos_ttl_inherit.sh selftest - Only call __vlan_get_protocol() for vlan types. Link: https://lore.kernel.org/netdev/20240404100035.3270a7d5@kernel.org/ v2,v3 - Addressed Sabrina comments on v1 and v2 Link: https://lore.kernel.org/netdev/Zg1l9L2BNoZWZDZG@hog/ [1] BUG: KMSAN: uninit-value in geneve_xmit_skb drivers/net/geneve.c:910 [inline] BUG: KMSAN: uninit-value in geneve_xmit+0x302d/0x5420 drivers/net/geneve.c:1030 geneve_xmit_skb drivers/net/geneve.c:910 [inline] geneve_xmit+0x302d/0x5420 drivers/net/geneve.c:1030 __netdev_start_xmit include/linux/netdevice.h:4903 [inline] netdev_start_xmit include/linux/netdevice.h:4917 [inline] xmit_one net/core/dev.c:3531 [inline] dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3547 __dev_queue_xmit+0x348d/0x52c0 net/core/dev.c:4335 dev_queue_xmit include/linux/netdevice.h:3091 [inline] packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8bb0/0x9ef0 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 __sys_sendto+0x685/0x830 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2199 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was created at: slab_post_alloc_hook mm/slub.c:3804 [inline] slab_alloc_node mm/slub.c:3845 [inline] kmem_cache_alloc_node+0x613/0xc50 mm/slub.c:3888 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:577 __alloc_skb+0x35b/0x7a0 net/core/skbuff.c:668 alloc_skb include/linux/skbuff.h:1318 [inline] alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6504 sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2795 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x722d/0x9ef0 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 __sys_sendto+0x685/0x830 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2199 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 CPU: 0 PID: 5033 Comm: syz-executor346 Not tainted 6.9.0-rc1-syzkaller-00005-g928a87efa423 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Fixes: d13f048dd40e ("net: geneve: modify IP header check in geneve6_xmit_skb and geneve_xmit_skb") Reported-by: syzbot+9ee20ec1de7b3168db09@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/000000000000d19c3a06152f9ee4@google.com/ Signed-off-by: Eric Dumazet Cc: Phillip Potter Cc: Sabrina Dubroca Reviewed-by: Sabrina Dubroca Reviewed-by: Phillip Potter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/geneve.c | 4 ++-- include/net/ip_tunnels.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 3f8da6f0b25ce..488ca1c854962 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -930,7 +930,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); @@ -1028,7 +1028,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index bca80522f95c8..f9906b73e7ff4 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -351,6 +351,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) return pskb_network_may_pull(skb, nhlen); } +/* Variant of pskb_inet_may_pull(). + */ +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +{ + int nhlen = 0, maclen = ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) + * And we get MAC len. + */ + if (eth_type_vlan(type)) + type = __vlan_get_protocol(skb, type, &maclen); + + switch (type) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + } + /* For ETH_P_IPV6/ETH_P_IP we make sure to pull + * a base network header in skb->head. + */ + if (!pskb_may_pull(skb, maclen + nhlen)) + return false; + + skb_set_network_header(skb, maclen); + return true; +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; -- GitLab From ecedcd7e398542c3e594605de9b44ac8981f17c2 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Fri, 5 Apr 2024 16:55:13 -0700 Subject: [PATCH 1696/2290] bnxt_en: Reset PTP tx_avail after possible firmware reset [ Upstream commit faa12ca245585379d612736a4b5e98e88481ea59 ] It is possible that during error recovery and firmware reset, there is a pending TX PTP packet waiting for the timestamp. We need to reset this condition so that after recovery, the tx_avail count for PTP is reset back to the initial value. Otherwise, we may not accept any PTP TX timestamps after recovery. Fixes: 118612d519d8 ("bnxt_en: Add PTP clock APIs, ioctls, and ethtool methods") Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f810b5dc25f01..0d0aad7141c15 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10564,6 +10564,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); + if (bp->ptp_cfg) + atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); return 0; -- GitLab From be033154523f554cf1d3af9e6bbe41b756fab2ff Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 5 Apr 2024 22:30:39 +0200 Subject: [PATCH 1697/2290] net: ks8851: Inline ks8851_rx_skb() [ Upstream commit f96f700449b6d190e06272f1cf732ae8e45b73df ] Both ks8851_rx_skb_par() and ks8851_rx_skb_spi() call netif_rx(skb), inline the netif_rx(skb) call directly into ks8851_common.c and drop the .rx_skb callback and ks8851_rx_skb() wrapper. This removes one indirect call from the driver, no functional change otherwise. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20240405203204.82062-1-marex@denx.de Signed-off-by: Jakub Kicinski Stable-dep-of: be0384bf599c ("net: ks8851: Handle softirqs at the end of IRQ thread to fix hang") Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851.h | 3 --- drivers/net/ethernet/micrel/ks8851_common.c | 12 +----------- drivers/net/ethernet/micrel/ks8851_par.c | 11 ----------- drivers/net/ethernet/micrel/ks8851_spi.c | 11 ----------- 4 files changed, 1 insertion(+), 36 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index e5ec0a363aff8..31f75b4a67fd7 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -368,7 +368,6 @@ union ks8851_tx_hdr { * @rdfifo: FIFO read callback * @wrfifo: FIFO write callback * @start_xmit: start_xmit() implementation callback - * @rx_skb: rx_skb() implementation callback * @flush_tx_work: flush_tx_work() implementation callback * * The @statelock is used to protect information in the structure which may @@ -423,8 +422,6 @@ struct ks8851_net { struct sk_buff *txp, bool irq); netdev_tx_t (*start_xmit)(struct sk_buff *skb, struct net_device *dev); - void (*rx_skb)(struct ks8851_net *ks, - struct sk_buff *skb); void (*flush_tx_work)(struct ks8851_net *ks); }; diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 0bf13b38b8f5b..896d43bb8883d 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -231,16 +231,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt) rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]); } -/** - * ks8851_rx_skb - receive skbuff - * @ks: The device state. - * @skb: The skbuff - */ -static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb) -{ - ks->rx_skb(ks, skb); -} - /** * ks8851_rx_pkts - receive packets from the host * @ks: The device information. @@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) ks8851_dbg_dumpkkt(ks, rxpkt); skb->protocol = eth_type_trans(skb, ks->netdev); - ks8851_rx_skb(ks, skb); + netif_rx(skb); ks->netdev->stats.rx_packets++; ks->netdev->stats.rx_bytes += rxlen; diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 7f49042484bdc..96fb0ffcedb90 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp, iowrite16_rep(ksp->hw_addr, txp->data, len / 2); } -/** - * ks8851_rx_skb_par - receive skbuff - * @ks: The device state. - * @skb: The skbuff - */ -static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb) -{ - netif_rx(skb); -} - static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks) { return ks8851_rdreg16_par(ks, KS_TXQCR); @@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev) ks->rdfifo = ks8851_rdfifo_par; ks->wrfifo = ks8851_wrfifo_par; ks->start_xmit = ks8851_start_xmit_par; - ks->rx_skb = ks8851_rx_skb_par; #define STD_IRQ (IRQ_LCI | /* Link Change */ \ IRQ_RXI | /* RX done */ \ diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 88e26c120b483..4dcbff789b19d 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -298,16 +298,6 @@ static unsigned int calc_txlen(unsigned int len) return ALIGN(len + 4, 4); } -/** - * ks8851_rx_skb_spi - receive skbuff - * @ks: The device state - * @skb: The skbuff - */ -static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) -{ - netif_rx(skb); -} - /** * ks8851_tx_work - process tx packet(s) * @work: The work strucutre what was scheduled. @@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi) ks->rdfifo = ks8851_rdfifo_spi; ks->wrfifo = ks8851_wrfifo_spi; ks->start_xmit = ks8851_start_xmit_spi; - ks->rx_skb = ks8851_rx_skb_spi; ks->flush_tx_work = ks8851_flush_tx_work_spi; #define STD_IRQ (IRQ_LCI | /* Link Change */ \ -- GitLab From 492337a4fbd1421b42df684ee9b34be2a2722540 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 5 Apr 2024 22:30:40 +0200 Subject: [PATCH 1698/2290] net: ks8851: Handle softirqs at the end of IRQ thread to fix hang [ Upstream commit be0384bf599cf1eb8d337517feeb732d71f75a6f ] The ks8851_irq() thread may call ks8851_rx_pkts() in case there are any packets in the MAC FIFO, which calls netif_rx(). This netif_rx() implementation is guarded by local_bh_disable() and local_bh_enable(). The local_bh_enable() may call do_softirq() to run softirqs in case any are pending. One of the softirqs is net_rx_action, which ultimately reaches the driver .start_xmit callback. If that happens, the system hangs. The entire call chain is below: ks8851_start_xmit_par from netdev_start_xmit netdev_start_xmit from dev_hard_start_xmit dev_hard_start_xmit from sch_direct_xmit sch_direct_xmit from __dev_queue_xmit __dev_queue_xmit from __neigh_update __neigh_update from neigh_update neigh_update from arp_process.constprop.0 arp_process.constprop.0 from __netif_receive_skb_one_core __netif_receive_skb_one_core from process_backlog process_backlog from __napi_poll.constprop.0 __napi_poll.constprop.0 from net_rx_action net_rx_action from __do_softirq __do_softirq from call_with_stack call_with_stack from do_softirq do_softirq from __local_bh_enable_ip __local_bh_enable_ip from netif_rx netif_rx from ks8851_irq ks8851_irq from irq_thread_fn irq_thread_fn from irq_thread irq_thread from kthread kthread from ret_from_fork The hang happens because ks8851_irq() first locks a spinlock in ks8851_par.c ks8851_lock_par() spin_lock_irqsave(&ksp->lock, ...) and with that spinlock locked, calls netif_rx(). Once the execution reaches ks8851_start_xmit_par(), it calls ks8851_lock_par() again which attempts to claim the already locked spinlock again, and the hang happens. Move the do_softirq() call outside of the spinlock protected section of ks8851_irq() by disabling BHs around the entire spinlock protected section of ks8851_irq() handler. Place local_bh_enable() outside of the spinlock protected section, so that it can trigger do_softirq() without the ks8851_par.c ks8851_lock_par() spinlock being held, and safely call ks8851_start_xmit_par() without attempting to lock the already locked spinlock. Since ks8851_irq() is protected by local_bh_disable()/local_bh_enable() now, replace netif_rx() with __netif_rx() which is not duplicating the local_bh_disable()/local_bh_enable() calls. Fixes: 797047f875b5 ("net: ks8851: Implement Parallel bus operations") Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20240405203204.82062-2-marex@denx.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 896d43bb8883d..d4cdf3d4f5525 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -299,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) ks8851_dbg_dumpkkt(ks, rxpkt); skb->protocol = eth_type_trans(skb, ks->netdev); - netif_rx(skb); + __netif_rx(skb); ks->netdev->stats.rx_packets++; ks->netdev->stats.rx_bytes += rxlen; @@ -330,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) unsigned long flags; unsigned int status; + local_bh_disable(); + ks8851_lock(ks, &flags); status = ks8851_rdreg16(ks, KS_ISR); @@ -406,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); + local_bh_enable(); + return IRQ_HANDLED; } -- GitLab From 84a352b7eba1142a95441380058985ff19f25ec9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 5 Apr 2024 15:10:57 -0700 Subject: [PATCH 1699/2290] af_unix: Clear stale u->oob_skb. [ Upstream commit b46f4eaa4f0ec38909fb0072eea3aeddb32f954e ] syzkaller started to report deadlock of unix_gc_lock after commit 4090fa373f0e ("af_unix: Replace garbage collection algorithm."), but it just uncovers the bug that has been there since commit 314001f0bf92 ("af_unix: Add OOB support"). The repro basically does the following. from socket import * from array import array c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) c1.sendmsg([b'a'], [(SOL_SOCKET, SCM_RIGHTS, array("i", [c2.fileno()]))], MSG_OOB) c2.recv(1) # blocked as no normal data in recv queue c2.close() # done async and unblock recv() c1.close() # done async and trigger GC A socket sends its file descriptor to itself as OOB data and tries to receive normal data, but finally recv() fails due to async close(). The problem here is wrong handling of OOB skb in manage_oob(). When recvmsg() is called without MSG_OOB, manage_oob() is called to check if the peeked skb is OOB skb. In such a case, manage_oob() pops it out of the receive queue but does not clear unix_sock(sk)->oob_skb. This is wrong in terms of uAPI. Let's say we send "hello" with MSG_OOB, and "world" without MSG_OOB. The 'o' is handled as OOB data. When recv() is called twice without MSG_OOB, the OOB data should be lost. >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM, 0) >>> c1.send(b'hello', MSG_OOB) # 'o' is OOB data 5 >>> c1.send(b'world') 5 >>> c2.recv(5) # OOB data is not received b'hell' >>> c2.recv(5) # OOB date is skipped b'world' >>> c2.recv(5, MSG_OOB) # This should return an error b'o' In the same situation, TCP actually returns -EINVAL for the last recv(). Also, if we do not clear unix_sk(sk)->oob_skb, unix_poll() always set EPOLLPRI even though the data has passed through by previous recv(). To avoid these issues, we must clear unix_sk(sk)->oob_skb when dequeuing it from recv queue. The reason why the old GC did not trigger the deadlock is because the old GC relied on the receive queue to detect the loop. When it is triggered, the socket with OOB data is marked as GC candidate because file refcount == inflight count (1). However, after traversing all inflight sockets, the socket still has a positive inflight count (1), thus the socket is excluded from candidates. Then, the old GC lose the chance to garbage-collect the socket. With the old GC, the repro continues to create true garbage that will never be freed nor detected by kmemleak as it's linked to the global inflight list. That's why we couldn't even notice the issue. Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: syzbot+7f7f201cc2668a8fd169@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7f7f201cc2668a8fd169 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240405221057.2406-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e1af94393789f..373530303ad19 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2677,7 +2677,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, } } else if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + WRITE_ONCE(u->oob_skb, NULL); + if (!WARN_ON_ONCE(skb_unref(skb))) + kfree_skb(skb); skb = skb_peek(&sk->sk_receive_queue); } } -- GitLab From 7e33f68791eb84c4f2457855fecfc592710accc8 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Mon, 8 Apr 2024 12:06:43 +0530 Subject: [PATCH 1700/2290] octeontx2-af: Fix NIX SQ mode and BP config [ Upstream commit faf23006185e777db18912685922c5ddb2df383f ] NIX SQ mode and link backpressure configuration is required for all platforms. But in current driver this code is wrongly placed under specific platform check. This patch fixes the issue by moving the code out of platform check. Fixes: 5d9b976d4480 ("octeontx2-af: Support fixed transmit scheduler topology") Signed-off-by: Geetha sowjanya Link: https://lore.kernel.org/r/20240408063643.26288-1-gakula@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index bb99302eab67a..67080d5053e07 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4237,18 +4237,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) */ rvu_write64(rvu, blkaddr, NIX_AF_CFG, rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL); + } - /* Set chan/link to backpressure TL3 instead of TL2 */ - rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); + /* Set chan/link to backpressure TL3 instead of TL2 */ + rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); - /* Disable SQ manager's sticky mode operation (set TM6 = 0) - * This sticky mode is known to cause SQ stalls when multiple - * SQs are mapped to same SMQ and transmitting pkts at a time. - */ - cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); - cfg &= ~BIT_ULL(15); - rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); - } + /* Disable SQ manager's sticky mode operation (set TM6 = 0) + * This sticky mode is known to cause SQ stalls when multiple + * SQs are mapped to same SMQ and transmitting pkts at a time. + */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); + cfg &= ~BIT_ULL(15); + rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); ltdefs = rvu->kpu.lt_def; /* Calibrate X2P bus to check if CGX/LBK links are fine */ -- GitLab From 2c46877f5f935099604c53c9ad3118bae6770108 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Apr 2024 09:42:02 +0200 Subject: [PATCH 1701/2290] ipv6: fib: hide unused 'pn' variable [ Upstream commit 74043489fcb5e5ca4074133582b5b8011b67f9e7 ] When CONFIG_IPV6_SUBTREES is disabled, the only user is hidden, causing a 'make W=1' warning: net/ipv6/ip6_fib.c: In function 'fib6_add': net/ipv6/ip6_fib.c:1388:32: error: variable 'pn' set but not used [-Werror=unused-but-set-variable] Add another #ifdef around the variable declaration, matching the other uses in this file. Fixes: 66729e18df08 ("[IPV6] ROUTE: Make sure we have fn->leaf when adding a node on subtree.") Link: https://lore.kernel.org/netdev/20240322131746.904943-1-arnd@kernel.org/ Reviewed-by: David Ahern Signed-off-by: Arnd Bergmann Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240408074219.3030256-1-arnd@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/ip6_fib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e606374854ce5..8213626434b91 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1376,7 +1376,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { struct fib6_table *table = rt->fib6_table; - struct fib6_node *fn, *pn = NULL; + struct fib6_node *fn; +#ifdef CONFIG_IPV6_SUBTREES + struct fib6_node *pn = NULL; +#endif int err = -ENOMEM; int allow_create = 1; int replace_required = 0; @@ -1400,9 +1403,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, goto out; } +#ifdef CONFIG_IPV6_SUBTREES pn = fn; -#ifdef CONFIG_IPV6_SUBTREES if (rt->fib6_src.plen) { struct fib6_node *sn; -- GitLab From 03d564999fa8a5ed01965628c39f70a69be19485 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Apr 2024 09:42:03 +0200 Subject: [PATCH 1702/2290] ipv4/route: avoid unused-but-set-variable warning [ Upstream commit cf1b7201df59fb936f40f4a807433fe3f2ce310a ] The log_martians variable is only used in an #ifdef, causing a 'make W=1' warning with gcc: net/ipv4/route.c: In function 'ip_rt_send_redirect': net/ipv4/route.c:880:13: error: variable 'log_martians' set but not used [-Werror=unused-but-set-variable] Change the #ifdef to an equivalent IS_ENABLED() to let the compiler see where the variable is used. Fixes: 30038fc61adf ("net: ip_rt_send_redirect() optimization") Reviewed-by: David Ahern Signed-off-by: Arnd Bergmann Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240408074219.3030256-2-arnd@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 474f391fab35d..a0c687ff25987 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->n_redirects; -#ifdef CONFIG_IP_ROUTE_VERBOSE - if (log_martians && + if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); -#endif } out_put_peer: inet_putpeer(peer); -- GitLab From de76ae9ea1a6cf9e77fcec4f2df2904e26c23ceb Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 8 Apr 2024 16:18:21 +0200 Subject: [PATCH 1703/2290] ipv6: fix race condition between ipv6_get_ifaddr and ipv6_del_addr [ Upstream commit 7633c4da919ad51164acbf1aa322cc1a3ead6129 ] Although ipv6_get_ifaddr walks inet6_addr_lst under the RCU lock, it still means hlist_for_each_entry_rcu can return an item that got removed from the list. The memory itself of such item is not freed thanks to RCU but nothing guarantees the actual content of the memory is sane. In particular, the reference count can be zero. This can happen if ipv6_del_addr is called in parallel. ipv6_del_addr removes the entry from inet6_addr_lst (hlist_del_init_rcu(&ifp->addr_lst)) and drops all references (__in6_ifa_put(ifp) + in6_ifa_put(ifp)). With bad enough timing, this can happen: 1. In ipv6_get_ifaddr, hlist_for_each_entry_rcu returns an entry. 2. Then, the whole ipv6_del_addr is executed for the given entry. The reference count drops to zero and kfree_rcu is scheduled. 3. ipv6_get_ifaddr continues and tries to increments the reference count (in6_ifa_hold). 4. The rcu is unlocked and the entry is freed. 5. The freed entry is returned. Prevent increasing of the reference count in such case. The name in6_ifa_hold_safe is chosen to mimic the existing fib6_info_hold_safe. [ 41.506330] refcount_t: addition on 0; use-after-free. [ 41.506760] WARNING: CPU: 0 PID: 595 at lib/refcount.c:25 refcount_warn_saturate+0xa5/0x130 [ 41.507413] Modules linked in: veth bridge stp llc [ 41.507821] CPU: 0 PID: 595 Comm: python3 Not tainted 6.9.0-rc2.main-00208-g49563be82afa #14 [ 41.508479] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) [ 41.509163] RIP: 0010:refcount_warn_saturate+0xa5/0x130 [ 41.509586] Code: ad ff 90 0f 0b 90 90 c3 cc cc cc cc 80 3d c0 30 ad 01 00 75 a0 c6 05 b7 30 ad 01 01 90 48 c7 c7 38 cc 7a 8c e8 cc 18 ad ff 90 <0f> 0b 90 90 c3 cc cc cc cc 80 3d 98 30 ad 01 00 0f 85 75 ff ff ff [ 41.510956] RSP: 0018:ffffbda3c026baf0 EFLAGS: 00010282 [ 41.511368] RAX: 0000000000000000 RBX: ffff9e9c46914800 RCX: 0000000000000000 [ 41.511910] RDX: ffff9e9c7ec29c00 RSI: ffff9e9c7ec1c900 RDI: ffff9e9c7ec1c900 [ 41.512445] RBP: ffff9e9c43660c9c R08: 0000000000009ffb R09: 00000000ffffdfff [ 41.512998] R10: 00000000ffffdfff R11: ffffffff8ca58a40 R12: ffff9e9c4339a000 [ 41.513534] R13: 0000000000000001 R14: ffff9e9c438a0000 R15: ffffbda3c026bb48 [ 41.514086] FS: 00007fbc4cda1740(0000) GS:ffff9e9c7ec00000(0000) knlGS:0000000000000000 [ 41.514726] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 41.515176] CR2: 000056233b337d88 CR3: 000000000376e006 CR4: 0000000000370ef0 [ 41.515713] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 41.516252] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 41.516799] Call Trace: [ 41.517037] [ 41.517249] ? __warn+0x7b/0x120 [ 41.517535] ? refcount_warn_saturate+0xa5/0x130 [ 41.517923] ? report_bug+0x164/0x190 [ 41.518240] ? handle_bug+0x3d/0x70 [ 41.518541] ? exc_invalid_op+0x17/0x70 [ 41.520972] ? asm_exc_invalid_op+0x1a/0x20 [ 41.521325] ? refcount_warn_saturate+0xa5/0x130 [ 41.521708] ipv6_get_ifaddr+0xda/0xe0 [ 41.522035] inet6_rtm_getaddr+0x342/0x3f0 [ 41.522376] ? __pfx_inet6_rtm_getaddr+0x10/0x10 [ 41.522758] rtnetlink_rcv_msg+0x334/0x3d0 [ 41.523102] ? netlink_unicast+0x30f/0x390 [ 41.523445] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 41.523832] netlink_rcv_skb+0x53/0x100 [ 41.524157] netlink_unicast+0x23b/0x390 [ 41.524484] netlink_sendmsg+0x1f2/0x440 [ 41.524826] __sys_sendto+0x1d8/0x1f0 [ 41.525145] __x64_sys_sendto+0x1f/0x30 [ 41.525467] do_syscall_64+0xa5/0x1b0 [ 41.525794] entry_SYSCALL_64_after_hwframe+0x72/0x7a [ 41.526213] RIP: 0033:0x7fbc4cfcea9a [ 41.526528] Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 [ 41.527942] RSP: 002b:00007ffcf54012a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 41.528593] RAX: ffffffffffffffda RBX: 00007ffcf5401368 RCX: 00007fbc4cfcea9a [ 41.529173] RDX: 000000000000002c RSI: 00007fbc4b9d9bd0 RDI: 0000000000000005 [ 41.529786] RBP: 00007fbc4bafb040 R08: 00007ffcf54013e0 R09: 000000000000000c [ 41.530375] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 41.530977] R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007fbc4ca85d1b [ 41.531573] Fixes: 5c578aedcb21d ("IPv6: convert addrconf hash list to RCU") Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jiri Benc Link: https://lore.kernel.org/r/8ab821e36073a4a406c50ec83c9e8dc586c539e4.1712585809.git.jbenc@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/addrconf.h | 4 ++++ net/ipv6/addrconf.c | 7 ++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 86eb2aba1479c..5bcc63eade035 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -437,6 +437,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) refcount_inc(&ifp->refcnt); } +static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp) +{ + return refcount_inc_not_zero(&ifp->refcnt); +} /* * compute link-local solicited-node multicast address diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1648373692a99..3866deaadbb66 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2050,9 +2050,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { - result = ifp; - in6_ifa_hold(ifp); - break; + if (in6_ifa_hold_safe(ifp)) { + result = ifp; + break; + } } } } -- GitLab From 7bc65d23ba20dcd7ecc094a12c181e594e5eb315 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Apr 2024 15:41:52 -0400 Subject: [PATCH 1704/2290] Bluetooth: SCO: Fix not validating setsockopt user input [ Upstream commit 51eda36d33e43201e7a4fd35232e069b2c850b01 ] syzbot reported sco_sock_setsockopt() is copying data without checking user input length. BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] BUG: KASAN: slab-out-of-bounds in copy_from_sockptr include/linux/sockptr.h:55 [inline] BUG: KASAN: slab-out-of-bounds in sco_sock_setsockopt+0xc0b/0xf90 net/bluetooth/sco.c:893 Read of size 4 at addr ffff88805f7b15a3 by task syz-executor.5/12578 Fixes: ad10b1a48754 ("Bluetooth: Add Bluetooth socket voice option") Fixes: b96e9c671b05 ("Bluetooth: Add BT_DEFER_SETUP option to sco socket") Fixes: 00398e1d5183 ("Bluetooth: Add support for BT_PKT_STATUS CMSG data for SCO connections") Fixes: f6873401a608 ("Bluetooth: Allow setting of codec for HFP offload use case") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/bluetooth.h | 9 +++++++++ net/bluetooth/sco.c | 23 ++++++++++------------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index bcc5a4cd2c17b..5aaf7d7f3c6fa 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -565,6 +565,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, return skb; } +static inline int bt_copy_from_sockptr(void *dst, size_t dst_size, + sockptr_t src, size_t src_size) +{ + if (dst_size > src_size) + return -EINVAL; + + return copy_from_sockptr(dst, src, dst_size); +} + int bt_to_errno(u16 code); __u8 bt_status(int err); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 6d4168cfeb563..2e9137c539a49 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -831,7 +831,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; - int len, err = 0; + int err = 0; struct bt_voice voice; u32 opt; struct bt_codecs *codecs; @@ -850,10 +850,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -870,11 +869,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, voice.setting = sco_pi(sk)->setting; - len = min_t(unsigned int, sizeof(voice), optlen); - if (copy_from_sockptr(&voice, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&voice, sizeof(voice), optval, + optlen); + if (err) break; - } /* Explicitly check for these values */ if (voice.setting != BT_VOICE_TRANSPARENT && @@ -897,10 +895,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_PKT_STATUS: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) sco_pi(sk)->cmsg_mask |= SCO_CMSG_PKT_STATUS; @@ -941,9 +938,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(buffer, optval, optlen)) { + err = bt_copy_from_sockptr(buffer, optlen, optval, optlen); + if (err) { hci_dev_put(hdev); - err = -EFAULT; break; } -- GitLab From 9d42f373391211c7c8af66a3a316533a32b8a607 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Apr 2024 15:50:47 -0400 Subject: [PATCH 1705/2290] Bluetooth: L2CAP: Fix not validating setsockopt user input [ Upstream commit 4f3951242ace5efc7131932e2e01e6ac6baed846 ] Check user input length before copying data. Fixes: 33575df7be67 ("Bluetooth: move l2cap_sock_setsockopt() to l2cap_sock.c") Fixes: 3ee7b7cd8390 ("Bluetooth: Add BT_MODE socket option") Signed-off-by: Eric Dumazet Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_sock.c | 52 +++++++++++++++----------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 947ca580bb9a2..4198ca66fbe10 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -745,7 +745,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; - int len, err = 0; + int err = 0; u32 opt; BT_DBG("sk %p", sk); @@ -772,11 +772,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, opts.max_tx = chan->max_tx; opts.txwin_size = chan->tx_win; - len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_sockptr(&opts, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen); + if (err) break; - } if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) { err = -EINVAL; @@ -819,10 +817,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; case L2CAP_LM: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt & L2CAP_LM_FIPS) { err = -EINVAL; @@ -903,7 +900,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, struct bt_security sec; struct bt_power pwr; struct l2cap_conn *conn; - int len, err = 0; + int err = 0; u32 opt; u16 mtu; u8 mode; @@ -929,11 +926,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, sec.level = BT_SECURITY_LOW; - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_sockptr(&sec, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen); + if (err) break; - } if (sec.level < BT_SECURITY_LOW || sec.level > BT_SECURITY_FIPS) { @@ -978,10 +973,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) { set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -993,10 +987,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_FLUSHABLE: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_FLUSHABLE_ON) { err = -EINVAL; @@ -1028,11 +1021,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, pwr.force_active = BT_POWER_FORCE_ACTIVE_ON; - len = min_t(unsigned int, sizeof(pwr), optlen); - if (copy_from_sockptr(&pwr, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen); + if (err) break; - } if (pwr.force_active) set_bit(FLAG_FORCE_ACTIVE, &chan->flags); @@ -1041,10 +1032,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_CHANNEL_POLICY: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_CHANNEL_POLICY_AMP_PREFERRED) { err = -EINVAL; @@ -1089,10 +1079,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mtu, optval, sizeof(u16))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen); + if (err) break; - } if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && sk->sk_state == BT_CONNECTED) @@ -1120,10 +1109,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mode, optval, sizeof(u8))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen); + if (err) break; - } BT_DBG("mode %u", mode); -- GitLab From c760089aa98289b4b88a7ff5a62dd92845adf223 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Apr 2024 12:07:41 +0000 Subject: [PATCH 1706/2290] netfilter: complete validation of user input [ Upstream commit 65acf6e0501ac8880a4f73980d01b5d27648b956 ] In my recent commit, I missed that do_replace() handlers use copy_from_sockptr() (which I fixed), followed by unsafe copy_from_sockptr_offset() calls. In all functions, we can perform the @optlen validation before even calling xt_alloc_table_info() with the following check: if ((u64)optlen < (u64)tmp.size + sizeof(tmp)) return -EINVAL; Fixes: 0c83842df40f ("netfilter: validate user input for expected length") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Pablo Neira Ayuso Link: https://lore.kernel.org/r/20240409120741.3538135-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/netfilter/arp_tables.c | 4 ++++ net/ipv4/netfilter/ip_tables.c | 4 ++++ net/ipv6/netfilter/ip6_tables.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b150c9929b12e..14365b20f1c5c 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -966,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1266,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1f365e28e316c..a6208efcfccfc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1120,6 +1120,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1506,6 +1508,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 37a2b3301e423..b844e519da1b4 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1137,6 +1137,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1515,6 +1517,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; -- GitLab From 2e8dc5cffc844dacfa79f056dea88002312f253f Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 9 Apr 2024 22:08:12 +0300 Subject: [PATCH 1707/2290] net/mlx5: Properly link new fs rules into the tree [ Upstream commit 7c6782ad4911cbee874e85630226ed389ff2e453 ] Previously, add_rule_fg would only add newly created rules from the handle into the tree when they had a refcount of 1. On the other hand, create_flow_handle tries hard to find and reference already existing identical rules instead of creating new ones. These two behaviors can result in a situation where create_flow_handle 1) creates a new rule and references it, then 2) in a subsequent step during the same handle creation references it again, resulting in a rule with a refcount of 2 that is not linked into the tree, will have a NULL parent and root and will result in a crash when the flow group is deleted because del_sw_hw_rule, invoked on rule deletion, assumes node->parent is != NULL. This happened in the wild, due to another bug related to incorrect handling of duplicate pkt_reformat ids, which lead to the code in create_flow_handle incorrectly referencing a just-added rule in the same flow handle, resulting in the problem described above. Full details are at [1]. This patch changes add_rule_fg to add new rules without parents into the tree, properly initializing them and avoiding the crash. This makes it more consistent with how rules are added to an FTE in create_flow_handle. Fixes: 74491de93712 ("net/mlx5: Add multi dest support") Link: https://lore.kernel.org/netdev/ea5264d6-6b55-4449-a602-214c6f509c1e@163.com/T/#u [1] Signed-off-by: Cosmin Ratiu Reviewed-by: Tariq Toukan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240409190820.227554-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e6674118bc428..164e10b5f9b7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1752,8 +1752,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } trace_mlx5_fs_set_fte(fte, false); + /* Link newly added rules into the tree. */ for (i = 0; i < handle->num_rules; i++) { - if (refcount_read(&handle->rule[i]->node.refcount) == 1) { + if (!handle->rule[i]->node.parent) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } -- GitLab From ad26f26abd353113dea4e8d5ebadccdab9b61e76 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 9 Apr 2024 22:08:15 +0300 Subject: [PATCH 1708/2290] net/mlx5e: Fix mlx5e_priv_init() cleanup flow [ Upstream commit ecb829459a841198e142f72fadab56424ae96519 ] When mlx5e_priv_init() fails, the cleanup flow calls mlx5e_selq_cleanup which calls mlx5e_selq_apply() that assures that the `priv->state_lock` is held using lockdep_is_held(). Acquire the state_lock in mlx5e_selq_cleanup(). Kernel log: ============================= WARNING: suspicious RCU usage 6.8.0-rc3_net_next_841a9b5 #1 Not tainted ----------------------------- drivers/net/ethernet/mellanox/mlx5/core/en/selq.c:124 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by systemd-modules/293: #0: ffffffffa05067b0 (devices_rwsem){++++}-{3:3}, at: ib_register_client+0x109/0x1b0 [ib_core] #1: ffff8881096c65c0 (&device->client_data_rwsem){++++}-{3:3}, at: add_client_context+0x104/0x1c0 [ib_core] stack backtrace: CPU: 4 PID: 293 Comm: systemd-modules Not tainted 6.8.0-rc3_net_next_841a9b5 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x8a/0xa0 lockdep_rcu_suspicious+0x154/0x1a0 mlx5e_selq_apply+0x94/0xa0 [mlx5_core] mlx5e_selq_cleanup+0x3a/0x60 [mlx5_core] mlx5e_priv_init+0x2be/0x2f0 [mlx5_core] mlx5_rdma_setup_rn+0x7c/0x1a0 [mlx5_core] rdma_init_netdev+0x4e/0x80 [ib_core] ? mlx5_rdma_netdev_free+0x70/0x70 [mlx5_core] ipoib_intf_init+0x64/0x550 [ib_ipoib] ipoib_intf_alloc+0x4e/0xc0 [ib_ipoib] ipoib_add_one+0xb0/0x360 [ib_ipoib] add_client_context+0x112/0x1c0 [ib_core] ib_register_client+0x166/0x1b0 [ib_core] ? 0xffffffffa0573000 ipoib_init_module+0xeb/0x1a0 [ib_ipoib] do_one_initcall+0x61/0x250 do_init_module+0x8a/0x270 init_module_from_file+0x8b/0xd0 idempotent_init_module+0x17d/0x230 __x64_sys_finit_module+0x61/0xb0 do_syscall_64+0x71/0x140 entry_SYSCALL_64_after_hwframe+0x46/0x4e Fixes: 8bf30be75069 ("net/mlx5e: Introduce select queue parameters") Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240409190820.227554-8-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/selq.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c index f675b1926340f..f66bbc8464645 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) void mlx5e_selq_cleanup(struct mlx5e_selq *selq) { + mutex_lock(selq->state_lock); WARN_ON_ONCE(selq->is_prepared); kvfree(selq->standby); @@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq) kvfree(selq->standby); selq->standby = NULL; + mutex_unlock(selq->state_lock); } void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9910a0480f589..e7d396434da36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5578,9 +5578,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) kfree(priv->tx_rates); kfree(priv->txq2sq); destroy_workqueue(priv->wq); - mutex_lock(&priv->state_lock); mlx5e_selq_cleanup(&priv->selq); - mutex_unlock(&priv->state_lock); free_cpumask_var(priv->scratchpad.cpumask); for (i = 0; i < priv->htb_max_qos_sqs; i++) -- GitLab From 88a50c8a504823773373dd116c43ba0b93639790 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 9 Apr 2024 22:08:16 +0300 Subject: [PATCH 1709/2290] net/mlx5e: HTB, Fix inconsistencies with QoS SQs number [ Upstream commit 2f436f1869771d46e1a9f85738d5a1a7c5653a4e ] When creating a new HTB class while the interface is down, the variable that follows the number of QoS SQs (htb_max_qos_sqs) may not be consistent with the number of HTB classes. Previously, we compared these two values to ensure that the node_qid is lower than the number of QoS SQs, and we allocated stats for that SQ when they are equal. Change the check to compare the node_qid with the current number of leaf nodes and fix the checking conditions to ensure allocation of stats_list and stats for each node. Fixes: 214baf22870c ("net/mlx5e: Support HTB offload") Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240409190820.227554-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en/qos.c | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 2842195ee548a..1e887d640cffc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -82,24 +82,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, txq_ix = mlx5e_qid_from_qos(chs, node_qid); - WARN_ON(node_qid > priv->htb_max_qos_sqs); - if (node_qid == priv->htb_max_qos_sqs) { - struct mlx5e_sq_stats *stats, **stats_list = NULL; - - if (priv->htb_max_qos_sqs == 0) { - stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), - sizeof(*stats_list), - GFP_KERNEL); - if (!stats_list) - return -ENOMEM; - } + WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb)); + if (!priv->htb_qos_sq_stats) { + struct mlx5e_sq_stats **stats_list; + + stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), + sizeof(*stats_list), GFP_KERNEL); + if (!stats_list) + return -ENOMEM; + + WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + } + + if (!priv->htb_qos_sq_stats[node_qid]) { + struct mlx5e_sq_stats *stats; + stats = kzalloc(sizeof(*stats), GFP_KERNEL); - if (!stats) { - kvfree(stats_list); + if (!stats) return -ENOMEM; - } - if (stats_list) - WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats); /* Order htb_max_qos_sqs increment after writing the array pointer. * Pairs with smp_load_acquire in en_stats.c. -- GitLab From 8edb087c44a43d2404276e9efc274f007da70166 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Tue, 9 Apr 2024 12:41:59 +0200 Subject: [PATCH 1710/2290] net: sparx5: fix wrong config being used when reconfiguring PCS [ Upstream commit 33623113a48ea906f1955cbf71094f6aa4462e8f ] The wrong port config is being used if the PCS is reconfigured. Fix this by correctly using the new config instead of the old one. Fixes: 946e7fd5053a ("net: sparx5: add port module support") Signed-off-by: Daniel Machon Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409-link-mode-reconfiguration-fix-v2-1-db6a507f3627@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 32709d21ab2f9..212bf6f4ed72d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -730,7 +730,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5, bool sgmii = false, inband_aneg = false; int err; - if (port->conf.inband) { + if (conf->inband) { if (conf->portmode == PHY_INTERFACE_MODE_SGMII || conf->portmode == PHY_INTERFACE_MODE_QSGMII) inband_aneg = true; /* Cisco-SGMII in-band-aneg */ @@ -947,7 +947,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5, if (err) return -EINVAL; - if (port->conf.inband) { + if (conf->inband) { /* Enable/disable 1G counters in ASM */ spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev), ASM_PORT_CFG_CSC_STAT_DIS, -- GitLab From 19643bf8c9b5bb5eea5163bf2f6a3eee6fb5b99b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Tue, 9 Apr 2024 18:01:14 +0300 Subject: [PATCH 1711/2290] net: dsa: mt7530: trap link-local frames regardless of ST Port State MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 17c560113231ddc20088553c7b499b289b664311 ] In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL) of the Open Systems Interconnection basic reference model (OSI/RM) are described; the medium access control (MAC) and logical link control (LLC) sublayers. The MAC sublayer is the one facing the physical layer. In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A Bridge component comprises a MAC Relay Entity for interconnecting the Ports of the Bridge, at least two Ports, and higher layer entities with at least a Spanning Tree Protocol Entity included. Each Bridge Port also functions as an end station and shall provide the MAC Service to an LLC Entity. Each instance of the MAC Service is provided to a distinct LLC Entity that supports protocol identification, multiplexing, and demultiplexing, for protocol data unit (PDU) transmission and reception by one or more higher layer entities. It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC Entity associated with each Bridge Port is modeled as being directly connected to the attached Local Area Network (LAN). On the switch with CPU port architecture, CPU port functions as Management Port, and the Management Port functionality is provided by software which functions as an end station. Software is connected to an IEEE 802 LAN that is wholly contained within the system that incorporates the Bridge. Software provides access to the LLC Entity associated with each Bridge Port by the value of the source port field on the special tag on the frame received by software. We call frames that carry control information to determine the active topology and current extent of each Virtual Local Area Network (VLAN), i.e., spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration Protocol Data Units (MVRPDUs), and frames from other link constrained protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and Link Layer Discovery Protocol (LLDP), link-local frames. They are not forwarded by a Bridge. Permanently configured entries in the filtering database (FDB) ensure that such frames are discarded by the Forwarding Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail: Each of the reserved MAC addresses specified in Table 8-1 (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be permanently configured in the FDB in C-VLAN components and ERs. Each of the reserved MAC addresses specified in Table 8-2 (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently configured in the FDB in S-VLAN components. Each of the reserved MAC addresses specified in Table 8-3 (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in TPMR components. The FDB entries for reserved MAC addresses shall specify filtering for all Bridge Ports and all VIDs. Management shall not provide the capability to modify or remove entries for reserved MAC addresses. The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of propagation of PDUs within a Bridged Network, as follows: The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN) component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward. PDUs transmitted using this destination address, or any other addresses that appear in Table 8-1, Table 8-2, and Table 8-3 (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can therefore travel no further than those stations that can be reached via a single individual LAN from the originating station. The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an address that no conformant S-VLAN component, C-VLAN component, or MAC Bridge can forward; however, this address is relayed by a TPMR component. PDUs using this destination address, or any of the other addresses that appear in both Table 8-1 and Table 8-2 but not in Table 8-3 (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by any TPMRs but will propagate no further than the nearest S-VLAN component, C-VLAN component, or MAC Bridge. The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address that no conformant C-VLAN component, MAC Bridge can forward; however, it is relayed by TPMR components and S-VLAN components. PDUs using this destination address, or any of the other addresses that appear in Table 8-1 but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]), will be relayed by TPMR components and S-VLAN components but will propagate no further than the nearest C-VLAN component or MAC Bridge. Because the LLC Entity associated with each Bridge Port is provided via CPU port, we must not filter these frames but forward them to CPU port. In a Bridge, the transmission Port is majorly decided by ingress and egress rules, FDB, and spanning tree Port State functions of the Forwarding Process. For link-local frames, only CPU port should be designated as destination port in the FDB, and the other functions of the Forwarding Process must not interfere with the decision of the transmission Port. We call this process trapping frames to CPU port. Therefore, on the switch with CPU port architecture, link-local frames must be trapped to CPU port, and certain link-local frames received by a Port of a Bridge comprising a TPMR component or an S-VLAN component must be excluded from it. A Bridge of the switch with CPU port architecture cannot comprise a Two-Port MAC Relay (TPMR) component as a TPMR component supports only a subset of the functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port doesn't count) of this architecture will either function as a standard MAC Bridge or a standard VLAN Bridge. Therefore, a Bridge of this architecture can only comprise S-VLAN components, C-VLAN components, or MAC Bridge components. Since there's no TPMR component, we don't need to relay PDUs using the destination addresses specified on the Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge section where they must be relayed by TPMR components. One option to trap link-local frames to CPU port is to add static FDB entries with CPU port designated as destination port. However, because that Independent VLAN Learning (IVL) is being used on every VID, each entry only applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC Bridge component or a C-VLAN component, there would have to be 16 times 4096 entries. This switch intellectual property can only hold a maximum of 2048 entries. Using this option, there also isn't a mechanism to prevent link-local frames from being discarded when the spanning tree Port State of the reception Port is discarding. The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4 registers. Whilst this applies to every VID, it doesn't contain all of the reserved MAC addresses without affecting the remaining Standard Group MAC Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF destination addresses which may be relayed by MAC Bridges or VLAN Bridges. The latter option provides better but not complete conformance. This switch intellectual property also does not provide a mechanism to trap link-local frames with specific destination addresses to CPU port by Bridge, to conform to the filtering rules for the distinct Bridge components. Therefore, regardless of the type of the Bridge component, link-local frames with these destination addresses will be trapped to CPU port: 01-80-C2-00-00-[00,01,02,03,0E] In a Bridge comprising a MAC Bridge component or a C-VLAN component: Link-local frames with these destination addresses won't be trapped to CPU port which won't conform to IEEE Std 802.1Q-2022: 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] In a Bridge comprising an S-VLAN component: Link-local frames with these destination addresses will be trapped to CPU port which won't conform to IEEE Std 802.1Q-2022: 01-80-C2-00-00-00 Link-local frames with these destination addresses won't be trapped to CPU port which won't conform to IEEE Std 802.1Q-2022: 01-80-C2-00-00-[04,05,06,07,08,09,0A] Currently on this switch intellectual property, if the spanning tree Port State of the reception Port is discarding, link-local frames will be discarded. To trap link-local frames regardless of the spanning tree Port State, make the switch regard them as Bridge Protocol Data Units (BPDUs). This switch intellectual property only lets the frames regarded as BPDUs bypass the spanning tree Port State function of the Forwarding Process. With this change, the only remaining interference is the ingress rules. When the reception Port has no PVID assigned on software, VLAN-untagged frames won't be allowed in. There doesn't seem to be a mechanism on the switch intellectual property to have link-local frames bypass this function of the Forwarding Process. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Reviewed-by: Daniel Golle Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20240409-b4-for-net-mt7530-fix-link-local-when-stp-discarding-v2-1-07b1150164ac@arinc9.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 229 +++++++++++++++++++++++++++++++++------ drivers/net/dsa/mt7530.h | 5 + 2 files changed, 200 insertions(+), 34 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 07065c1af55e4..d4515c19a5f34 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -998,20 +998,173 @@ unlock_exit: mutex_unlock(&priv->reg_mutex); } -/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std - * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA - * must only be propagated to C-VLAN and MAC Bridge components. That means - * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, - * these frames are supposed to be processed by the CPU (software). So we make - * the switch only forward them to the CPU port. And if received from a CPU - * port, forward to a single port. The software is responsible of making the - * switch conform to the latter by setting a single port as destination port on - * the special tag. +/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL) + * of the Open Systems Interconnection basic reference model (OSI/RM) are + * described; the medium access control (MAC) and logical link control (LLC) + * sublayers. The MAC sublayer is the one facing the physical layer. * - * This switch intellectual property cannot conform to this part of the standard - * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC - * DAs, it also includes :22-FF which the scope of propagation is not supposed - * to be restricted for these MAC DAs. + * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A + * Bridge component comprises a MAC Relay Entity for interconnecting the Ports + * of the Bridge, at least two Ports, and higher layer entities with at least a + * Spanning Tree Protocol Entity included. + * + * Each Bridge Port also functions as an end station and shall provide the MAC + * Service to an LLC Entity. Each instance of the MAC Service is provided to a + * distinct LLC Entity that supports protocol identification, multiplexing, and + * demultiplexing, for protocol data unit (PDU) transmission and reception by + * one or more higher layer entities. + * + * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC + * Entity associated with each Bridge Port is modeled as being directly + * connected to the attached Local Area Network (LAN). + * + * On the switch with CPU port architecture, CPU port functions as Management + * Port, and the Management Port functionality is provided by software which + * functions as an end station. Software is connected to an IEEE 802 LAN that is + * wholly contained within the system that incorporates the Bridge. Software + * provides access to the LLC Entity associated with each Bridge Port by the + * value of the source port field on the special tag on the frame received by + * software. + * + * We call frames that carry control information to determine the active + * topology and current extent of each Virtual Local Area Network (VLAN), i.e., + * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration + * Protocol Data Units (MVRPDUs), and frames from other link constrained + * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and + * Link Layer Discovery Protocol (LLDP), link-local frames. They are not + * forwarded by a Bridge. Permanently configured entries in the filtering + * database (FDB) ensure that such frames are discarded by the Forwarding + * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail: + * + * Each of the reserved MAC addresses specified in Table 8-1 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be + * permanently configured in the FDB in C-VLAN components and ERs. + * + * Each of the reserved MAC addresses specified in Table 8-2 + * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently + * configured in the FDB in S-VLAN components. + * + * Each of the reserved MAC addresses specified in Table 8-3 + * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in + * TPMR components. + * + * The FDB entries for reserved MAC addresses shall specify filtering for all + * Bridge Ports and all VIDs. Management shall not provide the capability to + * modify or remove entries for reserved MAC addresses. + * + * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of + * propagation of PDUs within a Bridged Network, as follows: + * + * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no + * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN) + * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward. + * PDUs transmitted using this destination address, or any other addresses + * that appear in Table 8-1, Table 8-2, and Table 8-3 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can + * therefore travel no further than those stations that can be reached via a + * single individual LAN from the originating station. + * + * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an + * address that no conformant S-VLAN component, C-VLAN component, or MAC + * Bridge can forward; however, this address is relayed by a TPMR component. + * PDUs using this destination address, or any of the other addresses that + * appear in both Table 8-1 and Table 8-2 but not in Table 8-3 + * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by + * any TPMRs but will propagate no further than the nearest S-VLAN component, + * C-VLAN component, or MAC Bridge. + * + * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address + * that no conformant C-VLAN component, MAC Bridge can forward; however, it is + * relayed by TPMR components and S-VLAN components. PDUs using this + * destination address, or any of the other addresses that appear in Table 8-1 + * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]), + * will be relayed by TPMR components and S-VLAN components but will propagate + * no further than the nearest C-VLAN component or MAC Bridge. + * + * Because the LLC Entity associated with each Bridge Port is provided via CPU + * port, we must not filter these frames but forward them to CPU port. + * + * In a Bridge, the transmission Port is majorly decided by ingress and egress + * rules, FDB, and spanning tree Port State functions of the Forwarding Process. + * For link-local frames, only CPU port should be designated as destination port + * in the FDB, and the other functions of the Forwarding Process must not + * interfere with the decision of the transmission Port. We call this process + * trapping frames to CPU port. + * + * Therefore, on the switch with CPU port architecture, link-local frames must + * be trapped to CPU port, and certain link-local frames received by a Port of a + * Bridge comprising a TPMR component or an S-VLAN component must be excluded + * from it. + * + * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port + * MAC Relay (TPMR) component as a TPMR component supports only a subset of the + * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port + * doesn't count) of this architecture will either function as a standard MAC + * Bridge or a standard VLAN Bridge. + * + * Therefore, a Bridge of this architecture can only comprise S-VLAN components, + * C-VLAN components, or MAC Bridge components. Since there's no TPMR component, + * we don't need to relay PDUs using the destination addresses specified on the + * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge + * section where they must be relayed by TPMR components. + * + * One option to trap link-local frames to CPU port is to add static FDB entries + * with CPU port designated as destination port. However, because that + * Independent VLAN Learning (IVL) is being used on every VID, each entry only + * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC + * Bridge component or a C-VLAN component, there would have to be 16 times 4096 + * entries. This switch intellectual property can only hold a maximum of 2048 + * entries. Using this option, there also isn't a mechanism to prevent + * link-local frames from being discarded when the spanning tree Port State of + * the reception Port is discarding. + * + * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4 + * registers. Whilst this applies to every VID, it doesn't contain all of the + * reserved MAC addresses without affecting the remaining Standard Group MAC + * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the + * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination + * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF + * destination addresses which may be relayed by MAC Bridges or VLAN Bridges. + * The latter option provides better but not complete conformance. + * + * This switch intellectual property also does not provide a mechanism to trap + * link-local frames with specific destination addresses to CPU port by Bridge, + * to conform to the filtering rules for the distinct Bridge components. + * + * Therefore, regardless of the type of the Bridge component, link-local frames + * with these destination addresses will be trapped to CPU port: + * + * 01-80-C2-00-00-[00,01,02,03,0E] + * + * In a Bridge comprising a MAC Bridge component or a C-VLAN component: + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] + * + * In a Bridge comprising an S-VLAN component: + * + * Link-local frames with these destination addresses will be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-00 + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A] + * + * To trap link-local frames to CPU port as conformant as this switch + * intellectual property can allow, link-local frames are made to be regarded as + * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual + * property only lets the frames regarded as BPDUs bypass the spanning tree Port + * State function of the Forwarding Process. + * + * The only remaining interference is the ingress rules. When the reception Port + * has no PVID assigned on software, VLAN-untagged frames won't be allowed in. + * There doesn't seem to be a mechanism on the switch intellectual property to + * have link-local frames bypass this function of the Forwarding Process. */ static void mt753x_trap_frames(struct mt7530_priv *priv) @@ -1019,35 +1172,43 @@ mt753x_trap_frames(struct mt7530_priv *priv) /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them * VLAN-untagged. */ - mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK | - MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | - MT753X_BPDU_PORT_FW_MASK, - MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_BPC, + MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK | + MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | + MT753X_BPDU_PORT_FW_MASK, + MT753X_PAE_BPDU_FR | + MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK | - MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK | - MT753X_R01_PORT_FW_MASK, - MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC1, + MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK | + MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK, + MT753X_R02_BPDU_FR | + MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | - MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK | - MT753X_R03_PORT_FW_MASK, - MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC2, + MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK | + MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK, + MT753X_R0E_BPDU_FR | + MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); } static int diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index fa2afa67ceb07..2d1ea390f05ab 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -63,6 +63,7 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 +#define MT753X_PAE_BPDU_FR BIT(25) #define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22) #define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x) #define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) @@ -73,20 +74,24 @@ enum mt753x_id { /* Register for :01 and :02 MAC DA frame control */ #define MT753X_RGAC1 0x28 +#define MT753X_R02_BPDU_FR BIT(25) #define MT753X_R02_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x) #define MT753X_R02_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x) +#define MT753X_R01_BPDU_FR BIT(9) #define MT753X_R01_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x) #define MT753X_R01_PORT_FW_MASK GENMASK(2, 0) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c +#define MT753X_R0E_BPDU_FR BIT(25) #define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) +#define MT753X_R03_BPDU_FR BIT(9) #define MT753X_R03_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x) #define MT753X_R03_PORT_FW_MASK GENMASK(2, 0) -- GitLab From fb6d14e23d489e338d4ddef1f013d5599e7722de Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:53 -0800 Subject: [PATCH 1712/2290] af_unix: Do not use atomic ops for unix_sk(sk)->inflight. [ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ] When touching unix_sk(sk)->inflight, we are always under spin_lock(&unix_gc_lock). Let's convert unix_sk(sk)->inflight to the normal unsigned long. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Stable-dep-of: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Signed-off-by: Sasha Levin --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 4 ++-- net/unix/garbage.c | 17 ++++++++--------- net/unix/scm.c | 8 +++++--- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 0920b669b9b31..16d6936baa2fb 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -54,7 +54,7 @@ struct unix_sock { struct mutex iolock, bindlock; struct sock *peer; struct list_head link; - atomic_long_t inflight; + unsigned long inflight; spinlock_t lock; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 373530303ad19..0a75d76535f75 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -968,11 +968,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; - u = unix_sk(sk); + u = unix_sk(sk); + u->inflight = 0; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 9bfffe2a7f020..7b326582d97da 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_long_dec(&usk->inflight); + usk->inflight--; } static void inc_inflight(struct unix_sock *usk) { - atomic_long_inc(&usk->inflight); + usk->inflight++; } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_long_inc(&u->inflight); + u->inflight++; + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over @@ -237,14 +238,12 @@ void unix_gc(void) */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; - long inflight_refs; total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_long_read(&u->inflight); - BUG_ON(inflight_refs < 1); - BUG_ON(total_refs < inflight_refs); - if (total_refs == inflight_refs) { + BUG_ON(!u->inflight); + BUG_ON(total_refs < u->inflight); + if (total_refs == u->inflight) { list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); @@ -271,7 +270,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_long_read(&u->inflight) > 0) { + if (u->inflight) { list_move_tail(&u->link, ¬_cycle_list); __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/unix/scm.c b/net/unix/scm.c index d1048b4c2baaf..4eff7da9f6f96 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -52,12 +52,13 @@ void unix_inflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - if (atomic_long_inc_return(&u->inflight) == 1) { + if (!u->inflight) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { BUG_ON(list_empty(&u->link)); } + u->inflight++; /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } @@ -74,10 +75,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - BUG_ON(!atomic_long_read(&u->inflight)); + BUG_ON(!u->inflight); BUG_ON(list_empty(&u->link)); - if (atomic_long_dec_and_test(&u->inflight)) + u->inflight--; + if (!u->inflight) list_del_init(&u->link); /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); -- GitLab From b75722be422c276b699200de90527d01c602ea7c Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 9 Apr 2024 22:09:39 +0200 Subject: [PATCH 1713/2290] af_unix: Fix garbage collector racing against connect() [ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ] Garbage collector does not take into account the risk of embryo getting enqueued during the garbage collection. If such embryo has a peer that carries SCM_RIGHTS, two consecutive passes of scan_children() may see a different set of children. Leading to an incorrectly elevated inflight count, and then a dangling pointer within the gc_inflight_list. sockets are AF_UNIX/SOCK_STREAM S is an unconnected socket L is a listening in-flight socket bound to addr, not in fdtable V's fd will be passed via sendmsg(), gets inflight count bumped connect(S, addr) sendmsg(S, [V]); close(V) __unix_gc() ---------------- ------------------------- ----------- NS = unix_create1() skb1 = sock_wmalloc(NS) L = unix_find_other(addr) unix_state_lock(L) unix_peer(S) = NS // V count=1 inflight=0 NS = unix_peer(S) skb2 = sock_alloc() skb_queue_tail(NS, skb2[V]) // V became in-flight // V count=2 inflight=1 close(V) // V count=1 inflight=1 // GC candidate condition met for u in gc_inflight_list: if (total_refs == inflight_refs) add u to gc_candidates // gc_candidates={L, V} for u in gc_candidates: scan_children(u, dec_inflight) // embryo (skb1) was not // reachable from L yet, so V's // inflight remains unchanged __skb_queue_tail(L, skb1) unix_state_unlock(L) for u in gc_candidates: if (u.inflight) scan_children(u, inc_inflight_move_tail) // V count=1 inflight=2 (!) If there is a GC-candidate listening socket, lock/unlock its state. This makes GC wait until the end of any ongoing connect() to that socket. After flipping the lock, a possibly SCM-laden embryo is already enqueued. And if there is another embryo coming, it can not possibly carry SCM_RIGHTS. At this point, unix_inflight() can not happen because unix_gc_lock is already taken. Inflight graph remains unaffected. Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.") Signed-off-by: Michal Luczaj Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/unix/garbage.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 7b326582d97da..85c6f05c0fa3c 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -235,11 +235,22 @@ void unix_gc(void) * receive queues. Other, non candidate sockets _can_ be * added to queue, so we must make sure only to touch * candidates. + * + * Embryos, though never candidates themselves, affect which + * candidates are reachable by the garbage collector. Before + * being added to a listener's queue, an embryo may already + * receive data carrying SCM_RIGHTS, potentially making the + * passed socket a candidate that is not yet reachable by the + * collector. It becomes reachable once the embryo is + * enqueued. Therefore, we must ensure that no SCM-laden + * embryo appears in a (candidate) listener's queue between + * consecutive scan_children() calls. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + struct sock *sk = &u->sk; long total_refs; - total_refs = file_count(u->sk.sk_socket->file); + total_refs = file_count(sk->sk_socket->file); BUG_ON(!u->inflight); BUG_ON(total_refs < u->inflight); @@ -247,6 +258,11 @@ void unix_gc(void) list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); + + if (sk->sk_state == TCP_LISTEN) { + unix_state_lock(sk); + unix_state_unlock(sk); + } } } -- GitLab From 4dea83d483d574645763440aaf50f186a17bfbd6 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Wed, 10 Apr 2024 09:13:55 +0000 Subject: [PATCH 1714/2290] net: ena: Fix potential sign extension issue [ Upstream commit 713a85195aad25d8a26786a37b674e3e5ec09e3c ] Small unsigned types are promoted to larger signed types in the case of multiplication, the result of which may overflow. In case the result of such a multiplication has its MSB turned on, it will be sign extended with '1's. This changes the multiplication result. Code example of the phenomenon: ------------------------------- u16 x, y; size_t z1, z2; x = y = 0xffff; printk("x=%x y=%x\n",x,y); z1 = x*y; z2 = (size_t)x*y; printk("z1=%lx z2=%lx\n", z1, z2); Output: ------- x=ffff y=ffff z1=fffffffffffe0001 z2=fffe0001 The expected result of ffff*ffff is fffe0001, and without the explicit casting to avoid the unwanted sign extension we got fffffffffffe0001. This commit adds an explicit casting to avoid the sign extension issue. Fixes: 689b2bdaaa14 ("net: ena: add functions for handling Low Latency Queues in ena_com") Signed-off-by: Arthur Kiyanovski Signed-off-by: David Arinzon Reviewed-by: Shannon Nelson Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 633b321d7fdd9..4db689372980e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -362,7 +362,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, ENA_COM_BOUNCE_BUFFER_CNTRL_CNT; io_sq->bounce_buf_ctrl.next_to_use = 0; - size = io_sq->bounce_buf_ctrl.buffer_size * + size = (size_t)io_sq->bounce_buf_ctrl.buffer_size * io_sq->bounce_buf_ctrl.buffers_num; dev_node = dev_to_node(ena_dev->dmadev); -- GitLab From 7d44e12efb7d777484feea2818d99d68224b3180 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Wed, 10 Apr 2024 09:13:56 +0000 Subject: [PATCH 1715/2290] net: ena: Wrong missing IO completions check order [ Upstream commit f7e417180665234fdb7af2ebe33d89aaa434d16f ] Missing IO completions check is called every second (HZ jiffies). This commit fixes several issues with this check: 1. Duplicate queues check: Max of 4 queues are scanned on each check due to monitor budget. Once reaching the budget, this check exits under the assumption that the next check will continue to scan the remainder of the queues, but in practice, next check will first scan the last already scanned queue which is not necessary and may cause the full queue scan to last a couple of seconds longer. The fix is to start every check with the next queue to scan. For example, on 8 IO queues: Bug: [0,1,2,3], [3,4,5,6], [6,7] Fix: [0,1,2,3], [4,5,6,7] 2. Unbalanced queues check: In case the number of active IO queues is not a multiple of budget, there will be checks which don't utilize the full budget because the full scan exits when reaching the last queue id. The fix is to run every TX completion check with exact queue budget regardless of the queue id. For example, on 7 IO queues: Bug: [0,1,2,3], [4,5,6], [0,1,2,3] Fix: [0,1,2,3], [4,5,6,0], [1,2,3,4] The budget may be lowered in case the number of IO queues is less than the budget (4) to make sure there are no duplicate queues on the same check. For example, on 3 IO queues: Bug: [0,1,2,0], [1,2,0,1] Fix: [0,1,2], [0,1,2] Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Amit Bernstein Signed-off-by: David Arinzon Reviewed-by: Shannon Nelson Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 21 +++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 9e82e7b9c3b72..b2eb6e1958f04 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3797,10 +3797,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter) { struct ena_ring *tx_ring; struct ena_ring *rx_ring; - int i, budget, rc; + int qid, budget, rc; int io_queue_count; io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; + /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -3813,27 +3814,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) return; - budget = ENA_MONITORED_TX_QUEUES; + budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES); - for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { - tx_ring = &adapter->tx_ring[i]; - rx_ring = &adapter->rx_ring[i]; + qid = adapter->last_monitored_tx_qid; + + while (budget) { + qid = (qid + 1) % io_queue_count; + + tx_ring = &adapter->tx_ring[qid]; + rx_ring = &adapter->rx_ring[qid]; rc = check_missing_comp_in_tx_queue(adapter, tx_ring); if (unlikely(rc)) return; - rc = !ENA_IS_XDP_INDEX(adapter, i) ? + rc = !ENA_IS_XDP_INDEX(adapter, qid) ? check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; budget--; - if (!budget) - break; } - adapter->last_monitored_tx_qid = i % io_queue_count; + adapter->last_monitored_tx_qid = qid; } /* trigger napi schedule after 2 consecutive detections */ -- GitLab From 19ff8fed3338898b70b2aad831386c78564912e1 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Wed, 10 Apr 2024 09:13:57 +0000 Subject: [PATCH 1716/2290] net: ena: Fix incorrect descriptor free behavior [ Upstream commit bf02d9fe00632d22fa91d34749c7aacf397b6cde ] ENA has two types of TX queues: - queues which only process TX packets arriving from the network stack - queues which only process TX packets forwarded to it by XDP_REDIRECT or XDP_TX instructions The ena_free_tx_bufs() cycles through all descriptors in a TX queue and unmaps + frees every descriptor that hasn't been acknowledged yet by the device (uncompleted TX transactions). The function assumes that the processed TX queue is necessarily from the first category listed above and ends up using napi_consume_skb() for descriptors belonging to an XDP specific queue. This patch solves a bug in which, in case of a VF reset, the descriptors aren't freed correctly, leading to crashes. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Reviewed-by: Shannon Nelson Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index b2eb6e1958f04..5e37b18ac3adf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1203,8 +1203,11 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring, static void ena_free_tx_bufs(struct ena_ring *tx_ring) { bool print_once = true; + bool is_xdp_ring; u32 i; + is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid); + for (i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; @@ -1224,10 +1227,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) ena_unmap_tx_buff(tx_ring, tx_info); - dev_kfree_skb_any(tx_info->skb); + if (is_xdp_ring) + xdp_return_frame(tx_info->xdpf); + else + dev_kfree_skb_any(tx_info->skb); } - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->qid)); + + if (!is_xdp_ring) + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); } static void ena_free_all_tx_bufs(struct ena_adapter *adapter) -- GitLab From 91580ea48b6dc32c236eecbdcb64ecd20fea3f6b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:24 +0200 Subject: [PATCH 1717/2290] tracing: hide unused ftrace_event_id_fops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5281ec83454d70d98b71f1836fb16512566c01cd ] When CONFIG_PERF_EVENTS, a 'make W=1' build produces a warning about the unused ftrace_event_id_fops variable: kernel/trace/trace_events.c:2155:37: error: 'ftrace_event_id_fops' defined but not used [-Werror=unused-const-variable=] 2155 | static const struct file_operations ftrace_event_id_fops = { Hide this in the same #ifdef as the reference to it. Link: https://lore.kernel.org/linux-trace-kernel/20240403080702.3509288-7-arnd@kernel.org Cc: Masami Hiramatsu Cc: Oleg Nesterov Cc: Mathieu Desnoyers Cc: Zheng Yejian Cc: Kees Cook Cc: Ajay Kaher Cc: Jinjie Ruan Cc: Clément Léger Cc: Dan Carpenter Cc: "Tzvetomir Stoyanov (VMware)" Fixes: 620a30e97feb ("tracing: Don't pass file_operations array to event_create_dir()") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_events.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a6d2f99f847d3..24859d9645050 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1669,6 +1669,7 @@ static int trace_format_open(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PERF_EVENTS static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -1683,6 +1684,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } +#endif static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, @@ -2127,10 +2129,12 @@ static const struct file_operations ftrace_event_format_fops = { .release = seq_release, }; +#ifdef CONFIG_PERF_EVENTS static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, .llseek = default_llseek, }; +#endif static const struct file_operations ftrace_event_filter_fops = { .open = tracing_open_file_tr, -- GitLab From 5f1205b86bd0419beb246156945b9e6ac3b43afa Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 11 Apr 2024 11:07:43 +0800 Subject: [PATCH 1718/2290] iommu/vt-d: Allocate local memory for page request queue [ Upstream commit a34f3e20ddff02c4f12df2c0635367394e64c63d ] The page request queue is per IOMMU, its allocation should be made NUMA-aware for performance reasons. Fixes: a222a7f0bb6c ("iommu/vt-d: Implement page request handling") Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240403214007.985600-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 03b25358946c4..cb862ab96873e 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -71,7 +71,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) struct page *pages; int irq, ret; - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); -- GitLab From cb3131b5a204dc8882dff3abe99f8618292a324b Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 19 Mar 2024 10:54:22 -0700 Subject: [PATCH 1719/2290] btrfs: qgroup: correctly model root qgroup rsv in convert commit 141fb8cd206ace23c02cd2791c6da52c1d77d42a upstream. We use add_root_meta_rsv and sub_root_meta_rsv to track prealloc and pertrans reservations for subvolumes when quotas are enabled. The convert function does not properly increment pertrans after decrementing prealloc, so the count is not accurate. Note: we check that the fs is not read-only to mirror the logic in qgroup_convert_meta, which checks that before adding to the pertrans rsv. Fixes: 8287475a2055 ("btrfs: qgroup: Use root::qgroup_meta_rsv_* to record qgroup meta reserved space") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c14d4f70e84bd..80ca7b435b0d1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4154,6 +4154,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) BTRFS_QGROUP_RSV_META_PREALLOC); trace_qgroup_meta_convert(root, num_bytes); qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); + if (!sb_rdonly(fs_info->sb)) + add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); } /* -- GitLab From 06fe99985427385b60fbb933cc6fb668b82b5453 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Thu, 21 Mar 2024 10:14:24 -0700 Subject: [PATCH 1720/2290] btrfs: record delayed inode root in transaction commit 71537e35c324ea6fbd68377a4f26bb93a831ae35 upstream. When running delayed inode updates, we do not record the inode's root in the transaction, but we do allocate PREALLOC and thus converted PERTRANS space for it. To be sure we free that PERTRANS meta rsv, we must ensure that we record the root in the transaction. Fixes: 4f5427ccce5d ("btrfs: delayed-inode: Use new qgroup meta rsv for delayed inode and item") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c6426080cf0ad..1494ce990d298 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1115,6 +1115,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_record_root_in_trans(trans, node->root); + if (ret) + return ret; ret = btrfs_update_delayed_inode(trans, node->root, path, node); return ret; } -- GitLab From c00146b399a51b5ccd4a960aebbbcd88edff34b7 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Thu, 21 Mar 2024 10:18:39 -0700 Subject: [PATCH 1721/2290] btrfs: qgroup: convert PREALLOC to PERTRANS after record_root_in_trans commit 211de93367304ab395357f8cb12568a4d1e20701 upstream. The transaction is only able to free PERTRANS reservations for a root once that root has been recorded with the TRANS tag on the roots radix tree. Therefore, until we are sure that this root will get tagged, it isn't safe to convert. Generally, this is not an issue as *some* transaction will likely tag the root before long and this reservation will get freed in that transaction, but technically it could stick around until unmount and result in a warning about leaked metadata reservation space. This path is most exercised by running the generic/269 fstest with CONFIG_BTRFS_DEBUG. Fixes: a6496849671a ("btrfs: fix start transaction qgroup rsv double free") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/transaction.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b172091f42612..5549c843f0d3f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -700,14 +700,6 @@ again: h->reloc_reserved = reloc_reserved; } - /* - * Now that we have found a transaction to be a part of, convert the - * qgroup reservation from prealloc to pertrans. A different transaction - * can't race in and free our pertrans out from under us. - */ - if (qgroup_reserved) - btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); - got_it: if (!current->journal_info) current->journal_info = h; @@ -741,8 +733,15 @@ got_it: * not just freed. */ btrfs_end_transaction(h); - return ERR_PTR(ret); + goto reserve_fail; } + /* + * Now that we have found a transaction to be a part of, convert the + * qgroup reservation from prealloc to pertrans. A different transaction + * can't race in and free our pertrans out from under us. + */ + if (qgroup_reserved) + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); return h; -- GitLab From 88dd8bb129fca723c036c27803072d369254e5bb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 8 Apr 2024 18:11:09 +0100 Subject: [PATCH 1722/2290] io_uring/net: restore msg_control on sendzc retry commit 4fe82aedeb8a8cb09bfa60f55ab57b5c10a74ac4 upstream. cac9e4418f4cb ("io_uring/net: save msghdr->msg_control for retries") reinstatiates msg_control before every __sys_sendmsg_sock(), since the function can overwrite the value in msghdr. We need to do same for zerocopy sendmsg. Cc: stable@vger.kernel.org Fixes: 493108d95f146 ("io_uring/net: zerocopy sendmsg") Link: https://github.com/axboe/liburing/issues/1067 Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cc1d5d9df0576fa66ddad4420d240a98a020b267.1712596179.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index b1b564c04d1e7..48404bd330017 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1229,6 +1229,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { kmsg = req->async_data; + kmsg->msg.msg_control_user = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) -- GitLab From 62029bc9ff2c17a4e3a2478d83418ec575413808 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 10 Apr 2024 09:58:02 +0800 Subject: [PATCH 1723/2290] kprobes: Fix possible use-after-free issue on kprobe registration commit 325f3fb551f8cd672dbbfc4cf58b14f9ee3fc9e8 upstream. When unloading a module, its state is changing MODULE_STATE_LIVE -> MODULE_STATE_GOING -> MODULE_STATE_UNFORMED. Each change will take a time. `is_module_text_address()` and `__module_text_address()` works with MODULE_STATE_LIVE and MODULE_STATE_GOING. If we use `is_module_text_address()` and `__module_text_address()` separately, there is a chance that the first one is succeeded but the next one is failed because module->state becomes MODULE_STATE_UNFORMED between those operations. In `check_kprobe_address_safe()`, if the second `__module_text_address()` is failed, that is ignored because it expected a kernel_text address. But it may have failed simply because module->state has been changed to MODULE_STATE_UNFORMED. In this case, arm_kprobe() will try to modify non-exist module text address (use-after-free). To fix this problem, we should not use separated `is_module_text_address()` and `__module_text_address()`, but use only `__module_text_address()` once and do `try_module_get(module)` which is only available with MODULE_STATE_LIVE. Link: https://lore.kernel.org/all/20240410015802.265220-1-zhengyejian1@huawei.com/ Fixes: 28f6c37a2910 ("kprobes: Forbid probing on trampoline and BPF code areas") Cc: stable@vger.kernel.org Signed-off-by: Zheng Yejian Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index dbfddfa86c14e..5b5ee060a2db5 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p, jump_label_lock(); preempt_disable(); - /* Ensure it is not in reserved area nor out of text */ - if (!(core_kernel_text((unsigned long) p->addr) || - is_module_text_address((unsigned long) p->addr)) || - in_gate_area_no_mm((unsigned long) p->addr) || + /* Ensure the address is in a text area, and find a module if exists. */ + *probed_mod = NULL; + if (!core_kernel_text((unsigned long) p->addr)) { + *probed_mod = __module_text_address((unsigned long) p->addr); + if (!(*probed_mod)) { + ret = -EINVAL; + goto out; + } + } + /* Ensure it is not in reserved area. */ + if (in_gate_area_no_mm((unsigned long) p->addr) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || static_call_text_reserved(p->addr, p->addr) || @@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p, goto out; } - /* Check if 'p' is probing a module. */ - *probed_mod = __module_text_address((unsigned long) p->addr); + /* Get module refcount and reject __init functions for loaded modules. */ if (*probed_mod) { /* * We must hold a refcount of the probed module while updating -- GitLab From 4b53d7d620c45c60501fb81c00fd2eb07aeb142f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Apr 2024 00:34:29 +0300 Subject: [PATCH 1724/2290] drm/i915/vrr: Disable VRR when using bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dcd8992e47f13afb5c11a61e8d9c141c35e23751 upstream. All joined pipes share the same transcoder/timing generator. Currently we just do the commits per-pipe, which doesn't really work if we need to change switch between non-VRR and VRR timings generators on the fly, or even when sending the push to the transcoder. For now just disable VRR when bigjoiner is needed. Cc: stable@vger.kernel.org Tested-by: Vidya Srinivas Reviewed-by: Vandita Kulkarni Link: https://patchwork.freedesktop.org/patch/msgid/20240404213441.17637-6-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit f9d5e51db65652dbd8a2102fd7619440e3599fd2) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_vrr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5eac99021875e..6615e4153f37a 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -110,6 +110,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, if (!intel_vrr_is_capable(connector)) return; + /* + * FIXME all joined pipes share the same transcoder. + * Need to account for that during VRR toggle/push/etc. + */ + if (crtc_state->bigjoiner_pipes) + return; + if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) return; -- GitLab From d29b50a32c274ae660d5e55eda747220a34217ef Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 26 Mar 2024 15:32:46 -0400 Subject: [PATCH 1725/2290] drm/amdkfd: Reset GPU on queue preemption failure commit 8bdfb4ea95ca738d33ef71376c21eba20130f2eb upstream. Currently, with F32 HWS GPU reset is only when unmap queue fails. However, if compute queue doesn't repond to preemption request in time unmap will return without any error. In this case, only preemption error is logged and Reset is not triggered. Call GPU reset in this case also. Reviewed-by: Alex Deucher Signed-off-by: Harish Kasiviswanathan Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0b87034d9dd51..1b7b294264804 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1805,6 +1805,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); while (halt_if_hws_hang) schedule(); + kfd_hws_hang(dqm); return -ETIME; } -- GitLab From 8a6fea3fcb577a543ef67683ca7105bde49a38fb Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Wed, 3 Apr 2024 17:02:46 +0800 Subject: [PATCH 1726/2290] drm/ast: Fix soft lockup commit bc004f5038220b1891ef4107134ccae44be55109 upstream. There is a while-loop in ast_dp_set_on_off() that could lead to infinite-loop. This is because the register, VGACRI-Dx, checked in this API is a scratch register actually controlled by a MCU, named DPMCU, in BMC. These scratch registers are protected by scu-lock. If suc-lock is not off, DPMCU can not update these registers and then host will have soft lockup due to never updated status. DPMCU is used to control DP and relative registers to handshake with host's VGA driver. Even the most time-consuming task, DP's link training, is less than 100ms. 200ms should be enough. Signed-off-by: Jammy Huang Fixes: 594e9c04b586 ("drm/ast: Create the driver for ASPEED proprietory Display-Port") Reviewed-by: Jocelyn Falempe Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Cc: KuoHsiang Chou Cc: Thomas Zimmermann Cc: Dave Airlie Cc: Jocelyn Falempe Cc: dri-devel@lists.freedesktop.org Cc: # v5.19+ Link: https://patchwork.freedesktop.org/patch/msgid/20240403090246.1495487-1-jammy_huang@aspeedtech.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 56483860306b4..a4a23b9623ad3 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -190,6 +190,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on) { struct ast_private *ast = to_ast_private(dev); u8 video_on_off = on; + u32 i = 0; // Video On/Off ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on); @@ -202,6 +203,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on) ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) { // wait 1 ms mdelay(1); + if (++i > 200) + break; } } } -- GitLab From 18c8cc6680ce938d0458859b6a08b4d34f7d8055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 4 Apr 2024 23:33:25 +0300 Subject: [PATCH 1727/2290] drm/client: Fully protect modes[] with dev->mode_config.mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3eadd887dbac1df8f25f701e5d404d1b90fd0fea upstream. The modes[] array contains pointers to modes on the connectors' mode lists, which are protected by dev->mode_config.mutex. Thus we need to extend modes[] the same protection or by the time we use it the elements may already be pointing to freed/reused memory. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10583 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240404203336.10454-2-ville.syrjala@linux.intel.com Reviewed-by: Dmitry Baryshkov Reviewed-by: Jani Nikula Reviewed-by: Thomas Zimmermann Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_client_modeset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 7847020de0a49..9a65806047b5e 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -781,6 +781,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, unsigned int total_modes_count = 0; struct drm_client_offset *offsets; unsigned int connector_count = 0; + /* points to modes protected by mode_config.mutex */ struct drm_display_mode **modes; struct drm_crtc **crtcs; int i, ret = 0; @@ -849,7 +850,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, drm_client_pick_crtcs(client, connectors, connector_count, crtcs, modes, 0, width, height); } - mutex_unlock(&dev->mode_config.mutex); drm_client_modeset_release(client); @@ -879,6 +879,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, modeset->y = offset->y; } } + mutex_unlock(&dev->mode_config.mutex); mutex_unlock(&client->modeset_mutex); out: -- GitLab From f6e2d61dc1598284eab80c8bec57523a4c088b88 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 28 Mar 2024 10:21:47 +1000 Subject: [PATCH 1728/2290] vhost: Add smp_rmb() in vhost_vq_avail_empty() commit 22e1992cf7b034db5325660e98c41ca5afa5f519 upstream. A smp_rmb() has been missed in vhost_vq_avail_empty(), spotted by Will. Otherwise, it's not ensured the available ring entries pushed by guest can be observed by vhost in time, leading to stale available ring entries fetched by vhost in vhost_get_vq_desc(), as reported by Yihuang Yu on NVidia's grace-hopper (ARM64) platform. /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ -accel kvm -machine virt,gic-version=host -cpu host \ -smp maxcpus=1,cpus=1,sockets=1,clusters=1,cores=1,threads=1 \ -m 4096M,slots=16,maxmem=64G \ -object memory-backend-ram,id=mem0,size=4096M \ : \ -netdev tap,id=vnet0,vhost=true \ -device virtio-net-pci,bus=pcie.8,netdev=vnet0,mac=52:54:00:f1:26:b0 : guest# netperf -H 10.26.1.81 -l 60 -C -c -t UDP_STREAM virtio_net virtio0: output.0:id 100 is not a head! Add the missed smp_rmb() in vhost_vq_avail_empty(). When tx_can_batch() returns true, it means there's still pending tx buffers. Since it might read indices, so it still can bypass the smp_rmb() in vhost_get_vq_desc(). Note that it should be safe until vq->avail_idx is changed by commit 275bf960ac697 ("vhost: better detection of available buffers"). Fixes: 275bf960ac69 ("vhost: better detection of available buffers") Cc: # v4.11+ Reported-by: Yihuang Yu Suggested-by: Will Deacon Signed-off-by: Gavin Shan Acked-by: Jason Wang Message-Id: <20240328002149.1141302-2-gshan@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 61c72e62abd49..c3f7a77d9fed3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2523,9 +2523,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) r = vhost_get_avail_idx(vq, &avail_idx); if (unlikely(r)) return false; + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + if (vq->avail_idx != vq->last_avail_idx) { + /* Since we have updated avail_idx, the following + * call to vhost_get_vq_desc() will read available + * ring entries. Make sure that read happens after + * the avail_idx read. + */ + smp_rmb(); + return false; + } - return vq->avail_idx == vq->last_avail_idx; + return true; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); -- GitLab From a2c1c0cfab05421320f65f5a2a60d607a47653a1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 28 Mar 2024 10:21:48 +1000 Subject: [PATCH 1729/2290] vhost: Add smp_rmb() in vhost_enable_notify() commit df9ace7647d4123209395bb9967e998d5758c645 upstream. A smp_rmb() has been missed in vhost_enable_notify(), inspired by Will. Otherwise, it's not ensured the available ring entries pushed by guest can be observed by vhost in time, leading to stale available ring entries fetched by vhost in vhost_get_vq_desc(), as reported by Yihuang Yu on NVidia's grace-hopper (ARM64) platform. /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ -accel kvm -machine virt,gic-version=host -cpu host \ -smp maxcpus=1,cpus=1,sockets=1,clusters=1,cores=1,threads=1 \ -m 4096M,slots=16,maxmem=64G \ -object memory-backend-ram,id=mem0,size=4096M \ : \ -netdev tap,id=vnet0,vhost=true \ -device virtio-net-pci,bus=pcie.8,netdev=vnet0,mac=52:54:00:f1:26:b0 : guest# netperf -H 10.26.1.81 -l 60 -C -c -t UDP_STREAM virtio_net virtio0: output.0:id 100 is not a head! Add the missed smp_rmb() in vhost_enable_notify(). When it returns true, it means there's still pending tx buffers. Since it might read indices, so it still can bypass the smp_rmb() in vhost_get_vq_desc(). Note that it should be safe until vq->avail_idx is changed by commit d3bb267bbdcb ("vhost: cache avail index in vhost_enable_notify()"). Fixes: d3bb267bbdcb ("vhost: cache avail index in vhost_enable_notify()") Cc: # v5.18+ Reported-by: Yihuang Yu Suggested-by: Will Deacon Signed-off-by: Gavin Shan Acked-by: Jason Wang Message-Id: <20240328002149.1141302-3-gshan@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c3f7a77d9fed3..1b00ed5ef1cfa 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2572,9 +2572,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) &vq->avail->idx, r); return false; } + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + if (vq->avail_idx != vq->last_avail_idx) { + /* Since we have updated avail_idx, the following + * call to vhost_get_vq_desc() will read available + * ring entries. Make sure that read happens after + * the avail_idx read. + */ + smp_rmb(); + return true; + } - return vq->avail_idx != vq->last_avail_idx; + return false; } EXPORT_SYMBOL_GPL(vhost_enable_notify); -- GitLab From 0c182182d6d9e5bcc2156faf5e137001ccd0e14b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 5 Mar 2024 22:10:03 -0800 Subject: [PATCH 1730/2290] perf/x86: Fix out of range data commit dec8ced871e17eea46f097542dd074d022be4bd1 upstream. On x86 each struct cpu_hw_events maintains a table for counter assignment but it missed to update one for the deleted event in x86_pmu_del(). This can make perf_clear_dirty_counters() reset used counter if it's called before event scheduling or enabling. Then it would return out of range data which doesn't make sense. The following code can reproduce the problem. $ cat repro.c #include #include #include #include #include #include #include #include struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .disabled = 1, }; void *worker(void *arg) { int cpu = (long)arg; int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0); int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0); void *p; do { ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0); ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); munmap(p, 4096); ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); } while (1); return NULL; } int main(void) { int i; int n = sysconf(_SC_NPROCESSORS_ONLN); pthread_t *th = calloc(n, sizeof(*th)); for (i = 0; i < n; i++) pthread_create(&th[i], NULL, worker, (void *)(long)i); for (i = 0; i < n; i++) pthread_join(th[i], NULL); free(th); return 0; } And you can see the out of range data using perf stat like this. Probably it'd be easier to see on a large machine. $ gcc -o repro repro.c -pthread $ ./repro & $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }' 1.001028462 CPU6 196,719,295,683,763 cycles # 194290.996 GHz (71.54%) 1.001028462 CPU3 396,077,485,787,730 branch-misses # 15804359784.80% of all branches (71.07%) 1.001028462 CPU17 197,608,350,727,877 branch-misses # 14594186554.56% of all branches (71.22%) 2.020064073 CPU4 198,372,472,612,140 cycles # 194681.113 GHz (70.95%) 2.020064073 CPU6 199,419,277,896,696 cycles # 195720.007 GHz (70.57%) 2.020064073 CPU20 198,147,174,025,639 cycles # 194474.654 GHz (71.03%) 2.020064073 CPU20 198,421,240,580,145 stalled-cycles-frontend # 100.14% frontend cycles idle (70.93%) 3.037443155 CPU4 197,382,689,923,416 cycles # 194043.065 GHz (71.30%) 3.037443155 CPU20 196,324,797,879,414 cycles # 193003.773 GHz (71.69%) 3.037443155 CPU5 197,679,956,608,205 stalled-cycles-backend # 1315606428.66% backend cycles idle (71.19%) 3.037443155 CPU5 198,571,860,474,851 instructions # 13215422.58 insn per cycle It should move the contents in the cpuc->assign as well. Fixes: 5471eea5d3bf ("perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task") Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240306061003.1894224-1-namhyung@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 30fb4931d3871..1394312b732a3 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + cpuc->assign[i-1] = cpuc->assign[i]; } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; -- GitLab From 9c09773917fbb77dff85b433e1e89123fc5fb530 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Apr 2024 10:51:05 -0700 Subject: [PATCH 1731/2290] x86/cpu: Actually turn off mitigations by default for SPECULATION_MITIGATIONS=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f337a6a21e2fd67eadea471e93d05dd37baaa9be upstream. Initialize cpu_mitigations to CPU_MITIGATIONS_OFF if the kernel is built with CONFIG_SPECULATION_MITIGATIONS=n, as the help text quite clearly states that disabling SPECULATION_MITIGATIONS is supposed to turn off all mitigations by default. │ If you say N, all mitigations will be disabled. You really │ should know what you are doing to say so. As is, the kernel still defaults to CPU_MITIGATIONS_AUTO, which results in some mitigations being enabled in spite of SPECULATION_MITIGATIONS=n. Fixes: f43b9876e857 ("x86/retbleed: Add fine grained Kconfig knobs") Signed-off-by: Sean Christopherson Signed-off-by: Ingo Molnar Reviewed-by: Daniel Sneddon Cc: stable@vger.kernel.org Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240409175108.1512861-2-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index e6f0101941ed8..2c44dd12a158c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2788,7 +2788,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - CPU_MITIGATIONS_AUTO; + IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { -- GitLab From 881b495ed26beb0b8c9dcd7aca5343c56c082c1d Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Apr 2024 16:26:30 -0700 Subject: [PATCH 1732/2290] selftests: timers: Fix abs() warning in posix_timers test commit ed366de8ec89d4f960d66c85fc37d9de22f7bf6d upstream. Building with clang results in the following warning: posix_timers.c:69:6: warning: absolute value function 'abs' given an argument of type 'long long' but has parameter of type 'int' which may cause truncation of value [-Wabsolute-value] if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { ^ So switch to using llabs() instead. Fixes: 0bc4b0cf1570 ("selftests: add basic posix timers selftests") Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240410232637.4135564-3-jstultz@google.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/timers/posix_timers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 0ba500056e635..193a984f512c3 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end) diff = end.tv_usec - start.tv_usec; diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; - if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { printf("Diff too high: %lld..", diff); return -1; } -- GitLab From 22f51ddb0cc120daf65be585e2df765fe54243ba Mon Sep 17 00:00:00 2001 From: Adam Dunlap Date: Mon, 18 Mar 2024 16:09:27 -0700 Subject: [PATCH 1733/2290] x86/apic: Force native_apic_mem_read() to use the MOV instruction commit 5ce344beaca688f4cdea07045e0b8f03dc537e74 upstream. When done from a virtual machine, instructions that touch APIC memory must be emulated. By convention, MMIO accesses are typically performed via io.h helpers such as readl() or writeq() to simplify instruction emulation/decoding (ex: in KVM hosts and SEV guests) [0]. Currently, native_apic_mem_read() does not follow this convention, allowing the compiler to emit instructions other than the MOV instruction generated by readl(). In particular, when the kernel is compiled with clang and run as a SEV-ES or SEV-SNP guest, the compiler would emit a TESTL instruction which is not supported by the SEV-ES emulator, causing a boot failure in that environment. It is likely the same problem would happen in a TDX guest as that uses the same instruction emulator as SEV-ES. To make sure all emulators can emulate APIC memory reads via MOV, use the readl() function in native_apic_mem_read(). It is expected that any emulator would support MOV in any addressing mode as it is the most generic and is what is usually emitted currently. The TESTL instruction is emitted when native_apic_mem_read() is inlined into apic_mem_wait_icr_idle(). The emulator comes from insn_decode_mmio() in arch/x86/lib/insn-eval.c. It's not worth it to extend insn_decode_mmio() to support more instructions since, in theory, the compiler could choose to output nearly any instruction for such reads which would bloat the emulator beyond reason. [0] https://lore.kernel.org/all/20220405232939.73860-12-kirill.shutemov@linux.intel.com/ [ bp: Massage commit message, fix typos. ] Signed-off-by: Adam Dunlap Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Reviewed-by: Ard Biesheuvel Tested-by: Kevin Loughlin Cc: Link: https://lore.kernel.org/r/20240318230927.2191933-1-acdunlap@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3216da7074bad..36ceecd40fd93 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -12,6 +12,7 @@ #include #include #include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -109,7 +110,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); } extern void native_apic_wait_icr_idle(void); -- GitLab From d447d8de840c26396272f648bfd9438e4bf4b2f5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Apr 2024 09:46:01 +0200 Subject: [PATCH 1734/2290] irqflags: Explicitly ignore lockdep_hrtimer_exit() argument commit c1d11fc2c8320871b40730991071dd0a0b405bc8 upstream. When building with 'make W=1' but CONFIG_TRACE_IRQFLAGS=n, the unused argument to lockdep_hrtimer_exit() causes a warning: kernel/time/hrtimer.c:1655:14: error: variable 'expires_in_hardirq' set but not used [-Werror=unused-but-set-variable] This is intentional behavior, so add a cast to void to shut up the warning. Fixes: 73d20564e0dc ("hrtimer: Don't dereference the hrtimer pointer after the callback") Reported-by: kernel test robot Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Reviewed-by: Sebastian Andrzej Siewior Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240408074609.3170807-1-arnd@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202311191229.55QXHVc6-lkp@intel.com/ Signed-off-by: Greg Kroah-Hartman --- include/linux/irqflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 2b665c32f5fe6..2e09c269bf9d8 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -126,7 +126,7 @@ do { \ # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false -# define lockdep_hrtimer_exit(__context) do { } while (0) +# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) -- GitLab From 0d433e40827d3a896725219f0df92f418ae28dab Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Tue, 9 Apr 2024 16:08:05 -0700 Subject: [PATCH 1735/2290] x86/bugs: Fix return type of spectre_bhi_state() commit 04f4230e2f86a4e961ea5466eda3db8c1762004d upstream. The definition of spectre_bhi_state() incorrectly returns a const char * const. This causes the a compiler warning when building with W=1: warning: type qualifiers ignored on function return type [-Wignored-qualifiers] 2812 | static const char * const spectre_bhi_state(void) Remove the const qualifier from the pointer. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Reported-by: Sean Christopherson Signed-off-by: Daniel Sneddon Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240409230806.1545822-1-daniel.sneddon@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 96bd3ee83a484..e2a357890df64 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2788,7 +2788,7 @@ static char *pbrsb_eibrs_state(void) } } -static const char * const spectre_bhi_state(void) +static const char *spectre_bhi_state(void) { if (!boot_cpu_has_bug(X86_BUG_BHI)) return "; BHI: Not affected"; -- GitLab From 662e341e57ccbd178da8c44f9a356175fc75fcbd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:45 -0700 Subject: [PATCH 1736/2290] x86/bugs: Fix BHI documentation commit dfe648903f42296866d79f10d03f8c85c9dfba30 upstream. Fix up some inaccuracies in the BHI documentation. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Nikolay Borisov Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/8c84f7451bfe0dd08543c6082a383f390d4aa7e2.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 15 ++++++++------- Documentation/admin-guide/kernel-parameters.txt | 12 +++++++----- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 9edb2860a3e19..d4f2606340741 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -439,11 +439,11 @@ The possible values in this file are: - System is protected by retpoline * - BHI: BHI_DIS_S - System is protected by BHI_DIS_S - * - BHI: SW loop; KVM SW loop + * - BHI: SW loop, KVM SW loop - System is protected by software clearing sequence * - BHI: Syscall hardening - Syscalls are hardened against BHI - * - BHI: Syscall hardening; KVM: SW loop + * - BHI: Syscall hardening, KVM: SW loop - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence Full mitigation might require a microcode update from the CPU @@ -666,13 +666,14 @@ kernel command line. of the HW BHI control and the SW BHB clearing sequence. on - unconditionally enable. + (default) Enable the HW or SW mitigation as + needed. off - unconditionally disable. + Disable the mitigation. auto - enable if hardware mitigation - control(BHI_DIS_S) is available, otherwise - enable alternate mitigation in KVM. + Enable the HW mitigation if needed, but + *don't* enable the SW mitigation except for KVM. + The system may be vulnerable. For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b2c7b2f012e90..1ae6ceff0dd39 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3283,6 +3283,7 @@ reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] + spectre_bhi=off [X86] spectre_v2_user=off [X86] srbds=off [X86,INTEL] ssbd=force-off [ARM64] @@ -5739,11 +5740,12 @@ deployment of the HW BHI control and the SW BHB clearing sequence. - on - unconditionally enable. - off - unconditionally disable. - auto - (default) enable hardware mitigation - (BHI_DIS_S) if available, otherwise enable - alternate mitigation in KVM. + on - (default) Enable the HW or SW mitigation + as needed. + off - Disable the mitigation. + auto - Enable the HW mitigation if needed, but + *don't* enable the SW mitigation except + for KVM. The system may be vulnerable. spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. -- GitLab From b1b32586f797bbcde3e286c80ac3e450022e1591 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:46 -0700 Subject: [PATCH 1737/2290] x86/bugs: Cache the value of MSR_IA32_ARCH_CAPABILITIES commit cb2db5bb04d7f778fbc1a1ea2507aab436f1bff3 upstream. There's no need to keep reading MSR_IA32_ARCH_CAPABILITIES over and over. It's even read in the BHI sysfs function which is a big no-no. Just read it once and cache it. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Nikolay Borisov Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/9592a18a814368e75f8f4b9d74d3883aa4fd1eaf.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e2a357890df64..dacd196b71aac 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,6 +60,8 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init ia32_cap; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; @@ -143,6 +145,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + ia32_cap = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -300,8 +304,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -340,7 +342,6 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); if ( (ia32_cap & ARCH_CAP_MDS_NO) && !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; @@ -400,8 +401,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -412,8 +411,6 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. @@ -507,7 +504,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + if (ia32_cap & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -658,8 +655,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -668,7 +663,6 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; @@ -812,7 +806,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(ia32_cap & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1884,8 +1878,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -2797,7 +2789,7 @@ static const char *spectre_bhi_state(void) else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + !(ia32_cap & ARCH_CAP_RRSBA)) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) return "; BHI: Syscall hardening, KVM: SW loop"; -- GitLab From dc2db3e978c5ae3895d28728c4e4a69d468a00cb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Apr 2024 09:25:36 +0200 Subject: [PATCH 1738/2290] x86/bugs: Rename various 'ia32_cap' variables to 'x86_arch_cap_msr' commit d0485730d2189ffe5d986d4e9e191f1e4d5ffd24 upstream. So we are using the 'ia32_cap' value in a number of places, which got its name from MSR_IA32_ARCH_CAPABILITIES MSR register. But there's very little 'IA32' about it - this isn't 32-bit only code, nor does it originate from there, it's just a historic quirk that many Intel MSR names are prefixed with IA32_. This is already clear from the helper method around the MSR: x86_read_arch_cap_msr(), which doesn't have the IA32 prefix. So rename 'ia32_cap' to 'x86_arch_cap_msr' to be consistent with its role and with the naming of the helper function. Signed-off-by: Ingo Molnar Cc: Josh Poimboeuf Cc: Nikolay Borisov Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/9592a18a814368e75f8f4b9d74d3883aa4fd1eaf.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 6 ++--- arch/x86/kernel/cpu/bugs.c | 30 +++++++++++----------- arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++------------------ 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7705571100518..e1672cc77c65f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1760,11 +1760,11 @@ static int x2apic_state; static bool x2apic_hw_locked(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; u64 msr; - ia32_cap = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { + x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) { rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); return (msr & LEGACY_XAPIC_DISABLED); } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index dacd196b71aac..fb2ab86d979a2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); -static u64 __ro_after_init ia32_cap; +static u64 __ro_after_init x86_arch_cap_msr; static DEFINE_MUTEX(spec_ctrl_mutex); @@ -145,7 +145,7 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } - ia32_cap = x86_read_arch_cap_msr(); + x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); @@ -342,8 +342,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -433,7 +433,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -443,10 +443,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -504,7 +504,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -663,7 +663,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -806,7 +806,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(ia32_cap & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1518,14 +1518,14 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) return; - ia32_cap = x86_read_arch_cap_msr(); + x86_arch_cap_msr = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_RRSBA) { + if (x86_arch_cap_msr & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; update_spec_ctrl(x86_spec_ctrl_base); } @@ -1892,7 +1892,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2789,7 +2789,7 @@ static const char *spectre_bhi_state(void) else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !(ia32_cap & ARCH_CAP_RRSBA)) + !(x86_arch_cap_msr & ARCH_CAP_RRSBA)) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) return "; BHI: Syscall hardening, KVM: SW loop"; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 08fe77d2a3f90..f2bc651c0dcd8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1308,25 +1308,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); } -static bool __init vulnerable_to_rfds(u64 ia32_cap) +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) { /* The "immunity" bit trumps everything else: */ - if (ia32_cap & ARCH_CAP_RFDS_NO) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) return false; /* @@ -1334,7 +1334,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) * indicate that mitigation is needed because guest is running on a * vulnerable hardware or may migrate to such hardware: */ - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) return true; /* Only consult the blacklist when there is no enumeration: */ @@ -1343,11 +1343,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1359,7 +1359,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1367,15 +1367,15 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature * flag and protect from vendor-specific bugs via the whitelist. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1394,9 +1394,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1422,7 +1422,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1430,7 +1430,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } @@ -1443,7 +1443,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); @@ -1452,11 +1452,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } - if (vulnerable_to_rfds(ia32_cap)) + if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(ia32_cap & ARCH_CAP_BHI_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && !cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) @@ -1466,7 +1466,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); -- GitLab From 4b0b5d621e89e15e1fce6fcd7ef1b041cb94e405 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:47 -0700 Subject: [PATCH 1739/2290] x86/bugs: Fix BHI handling of RRSBA commit 1cea8a280dfd1016148a3820676f2f03e3f5b898 upstream. The ARCH_CAP_RRSBA check isn't correct: RRSBA may have already been disabled by the Spectre v2 mitigation (or can otherwise be disabled by the BHI mitigation itself if needed). In that case retpolines are fine. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/6f56f13da34a0834b69163467449be7f58f253dc.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fb2ab86d979a2..7e7b4cd5304cc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1515,20 +1515,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 x86_arch_cap_msr; + if (rrsba_disabled) + return; - if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; return; + } - x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; - if (x86_arch_cap_msr & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1629,9 +1634,11 @@ static void __init bhi_select_mitigation(void) return; /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && - !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) - return; + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } if (spec_ctrl_bhi_dis()) return; @@ -2788,8 +2795,7 @@ static const char *spectre_bhi_state(void) return "; BHI: BHI_DIS_S"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; - else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !(x86_arch_cap_msr & ARCH_CAP_RRSBA)) + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) return "; BHI: Syscall hardening, KVM: SW loop"; -- GitLab From d737d8cd8e64e34c429e1ac5d3502159aeed3446 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:48 -0700 Subject: [PATCH 1740/2290] x86/bugs: Clarify that syscall hardening isn't a BHI mitigation commit 5f882f3b0a8bf0788d5a0ee44b1191de5319bb8a upstream. While syscall hardening helps prevent some BHI attacks, there's still other low-hanging fruit remaining. Don't classify it as a mitigation and make it clear that the system may still be vulnerable if it doesn't have a HW or SW mitigation enabled. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/b5951dae3fdee7f1520d5136a27be3bdfe95f88b.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 11 +++++------ Documentation/admin-guide/kernel-parameters.txt | 3 +-- arch/x86/kernel/cpu/bugs.c | 6 +++--- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index d4f2606340741..081f289008987 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -441,10 +441,10 @@ The possible values in this file are: - System is protected by BHI_DIS_S * - BHI: SW loop, KVM SW loop - System is protected by software clearing sequence - * - BHI: Syscall hardening - - Syscalls are hardened against BHI - * - BHI: Syscall hardening, KVM: SW loop - - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence + * - BHI: Vulnerable + - System is vulnerable to BHI + * - BHI: Vulnerable, KVM: SW loop + - System is vulnerable; KVM is protected by software clearing sequence Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will @@ -661,8 +661,7 @@ kernel command line. spectre_bhi= [X86] Control mitigation of Branch History Injection - (BHI) vulnerability. Syscalls are hardened against BHI - regardless of this setting. This setting affects the deployment + (BHI) vulnerability. This setting affects the deployment of the HW BHI control and the SW BHB clearing sequence. on diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1ae6ceff0dd39..e93ecb3886567 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5735,8 +5735,7 @@ See Documentation/admin-guide/laptops/sonypi.rst spectre_bhi= [X86] Control mitigation of Branch History Injection - (BHI) vulnerability. Syscalls are hardened against BHI - reglardless of this setting. This setting affects the + (BHI) vulnerability. This setting affects the deployment of the HW BHI control and the SW BHB clearing sequence. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7e7b4cd5304cc..d3e1f0614f09b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2797,10 +2797,10 @@ static const char *spectre_bhi_state(void) return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled) return "; BHI: Retpoline"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) - return "; BHI: Syscall hardening, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; - return "; BHI: Vulnerable (Syscall hardening enabled)"; + return "; BHI: Vulnerable"; } static ssize_t spectre_v2_show_state(char *buf) -- GitLab From 7f18a0df76217683500979dcb0a2faa7f3eccfa5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:50 -0700 Subject: [PATCH 1741/2290] x86/bugs: Remove CONFIG_BHI_MITIGATION_AUTO and spectre_bhi=auto commit 36d4fe147c870f6d3f6602befd7ef44393a1c87a upstream. Unlike most other mitigations' "auto" options, spectre_bhi=auto only mitigates newer systems, which is confusing and not particularly useful. Remove it. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Nikolay Borisov Cc: Sean Christopherson Cc: Linus Torvalds Link: https://lore.kernel.org/r/412e9dc87971b622bbbaf64740ebc1f140bff343.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 4 ---- Documentation/admin-guide/kernel-parameters.txt | 3 --- arch/x86/Kconfig | 4 ---- arch/x86/kernel/cpu/bugs.c | 10 +--------- 4 files changed, 1 insertion(+), 20 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 081f289008987..e0a1be97fa759 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -669,10 +669,6 @@ kernel command line. needed. off Disable the mitigation. - auto - Enable the HW mitigation if needed, but - *don't* enable the SW mitigation except for KVM. - The system may be vulnerable. For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e93ecb3886567..aebbe2981241a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5742,9 +5742,6 @@ on - (default) Enable the HW or SW mitigation as needed. off - Disable the mitigation. - auto - Enable the HW mitigation if needed, but - *don't* enable the SW mitigation except - for KVM. The system may be vulnerable. spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ba815ac474a1b..be5b3a48c8cde 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2581,10 +2581,6 @@ config SPECTRE_BHI_OFF bool "off" help Equivalent to setting spectre_bhi=off command line parameter. -config SPECTRE_BHI_AUTO - bool "auto" - help - Equivalent to setting spectre_bhi=auto command line parameter. endchoice diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d3e1f0614f09b..0b0fea179b0d7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1602,13 +1602,10 @@ static bool __init spec_ctrl_bhi_dis(void) enum bhi_mitigations { BHI_MITIGATION_OFF, BHI_MITIGATION_ON, - BHI_MITIGATION_AUTO, }; static enum bhi_mitigations bhi_mitigation __ro_after_init = - IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : - IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : - BHI_MITIGATION_AUTO; + IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; static int __init spectre_bhi_parse_cmdline(char *str) { @@ -1619,8 +1616,6 @@ static int __init spectre_bhi_parse_cmdline(char *str) bhi_mitigation = BHI_MITIGATION_OFF; else if (!strcmp(str, "on")) bhi_mitigation = BHI_MITIGATION_ON; - else if (!strcmp(str, "auto")) - bhi_mitigation = BHI_MITIGATION_AUTO; else pr_err("Ignoring unknown spectre_bhi option (%s)", str); @@ -1650,9 +1645,6 @@ static void __init bhi_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); - if (bhi_mitigation == BHI_MITIGATION_AUTO) - return; - /* Mitigate syscalls when the mitigation is forced =on */ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); -- GitLab From d844df110084ef8bd950a52194865f3f63b561ca Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:51 -0700 Subject: [PATCH 1742/2290] x86/bugs: Replace CONFIG_SPECTRE_BHI_{ON,OFF} with CONFIG_MITIGATION_SPECTRE_BHI commit 4f511739c54b549061993b53fc0380f48dfca23b upstream. For consistency with the other CONFIG_MITIGATION_* options, replace the CONFIG_SPECTRE_BHI_{ON,OFF} options with a single CONFIG_MITIGATION_SPECTRE_BHI option. [ mingo: Fix ] Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Sean Christopherson Cc: Linus Torvalds Cc: Nikolay Borisov Link: https://lore.kernel.org/r/3833812ea63e7fdbe36bf8b932e63f70d18e2a2a.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 17 +++-------------- arch/x86/kernel/cpu/bugs.c | 2 +- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be5b3a48c8cde..5f7a86f240db7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2563,27 +2563,16 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also -choice - prompt "Clear branch history" +config MITIGATION_SPECTRE_BHI + bool "Mitigate Spectre-BHB (Branch History Injection)" depends on CPU_SUP_INTEL - default SPECTRE_BHI_ON + default y help Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks where the branch history buffer is poisoned to speculatively steer indirect branches. See -config SPECTRE_BHI_ON - bool "on" - help - Equivalent to setting spectre_bhi=on command line parameter. -config SPECTRE_BHI_OFF - bool "off" - help - Equivalent to setting spectre_bhi=off command line parameter. - -endchoice - endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0b0fea179b0d7..6d69123de3660 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1605,7 +1605,7 @@ enum bhi_mitigations { }; static enum bhi_mitigations bhi_mitigation __ro_after_init = - IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; static int __init spectre_bhi_parse_cmdline(char *str) { -- GitLab From 2bc1796f8eeba648c06d68ece1ea86974363f6d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 2 Apr 2024 18:50:03 +0300 Subject: [PATCH 1743/2290] drm/i915/cdclk: Fix CDCLK programming order when pipes are active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7b1f6b5aaec0f849e19c3e99d4eea75876853cdd upstream. Currently we always reprogram CDCLK from the intel_set_cdclk_pre_plane_update() when using squash/crawl. The code only works correctly for the cd2x update or full modeset cases, and it was simply never updated to deal with squash/crawl. If the CDCLK frequency is increasing we must reprogram it before we do anything else that might depend on the new higher frequency, and conversely we must not decrease the frequency until everything that might still depend on the old higher frequency has been dealt with. Since cdclk_state->pipe is only relevant when doing a cd2x update we can't use it to determine the correct sequence during squash/crawl. To that end introduce cdclk_state->disable_pipes which simply indicates that we must perform the update while the pipes are disable (ie. during intel_set_cdclk_pre_plane_update()). Otherwise we use the same old vs. new CDCLK frequency comparsiong as for cd2x updates. The only remaining problem case is when the voltage_level needs to increase due to a DDI port, but the CDCLK frequency is decreasing (and not all pipes are being disabled). The current approach will not bump the voltage level up until after the port has already been enabled, which is too late. But we'll take care of that case separately. v2: Don't break the "must disable pipes case" v3: Keep the on stack 'pipe' for future use Cc: stable@vger.kernel.org Fixes: d62686ba3b54 ("drm/i915/adl_p: CDCLK crawl support for ADL") Reviewed-by: Uma Shankar Reviewed-by: Gustavo Sousa Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240402155016.13733-2-ville.syrjala@linux.intel.com (cherry picked from commit 3aecee90ac12a351905f12dda7643d5b0676d6ca) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_cdclk.c | 7 +++++-- drivers/gpu/drm/i915/display/intel_cdclk.h | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 25dcdde5feb69..5147718f38d6a 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2152,7 +2152,7 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state) &new_cdclk_state->actual)) return; - if (pipe == INVALID_PIPE || + if (new_cdclk_state->disable_pipes || old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) { drm_WARN_ON(&dev_priv->drm, !new_cdclk_state->base.changed); @@ -2181,7 +2181,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state) &new_cdclk_state->actual)) return; - if (pipe != INVALID_PIPE && + if (!new_cdclk_state->disable_pipes && old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) { drm_WARN_ON(&dev_priv->drm, !new_cdclk_state->base.changed); @@ -2634,6 +2634,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa return NULL; cdclk_state->pipe = INVALID_PIPE; + cdclk_state->disable_pipes = false; return &cdclk_state->base; } @@ -2793,6 +2794,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) if (ret) return ret; + new_cdclk_state->disable_pipes = true; + drm_dbg_kms(&dev_priv->drm, "Modeset required for cdclk change\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index c674879a84a58..c4b3e5938bb3f 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -51,6 +51,9 @@ struct intel_cdclk_state { /* bitmask of active pipes */ u8 active_pipes; + + /* update cdclk with pipes disabled */ + bool disable_pipes; }; int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); -- GitLab From 29bd4d05f2c5b9e84935874d1c0c2c97a8d91a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Apr 2024 00:34:27 +0300 Subject: [PATCH 1744/2290] drm/i915: Disable port sync when bigjoiner is used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0653d501409eeb9f1deb7e4c12e4d0d2c9f1cba1 upstream. The current modeset sequence can't handle port sync and bigjoiner at the same time. Refuse port sync when bigjoiner is needed, at least until we fix the modeset sequence. v2: Add a FIXME (Vandite) Cc: stable@vger.kernel.org Tested-by: Vidya Srinivas Reviewed-by: Vandita Kulkarni Link: https://patchwork.freedesktop.org/patch/msgid/20240404213441.17637-4-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit b37e1347b991459c38c56ec2476087854a4f720b) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_ddi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 706e2d956801d..76277eb3eb252 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3683,7 +3683,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1, static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1, const struct intel_crtc_state *crtc_state2) { + /* + * FIXME the modeset sequence is currently wrong and + * can't deal with bigjoiner + port sync at the same time. + */ return crtc_state1->hw.active && crtc_state2->hw.active && + !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes && crtc_state1->output_types == crtc_state2->output_types && crtc_state1->output_format == crtc_state2->output_format && crtc_state1->lane_count == crtc_state2->lane_count && -- GitLab From 7cc89dbcb8eabd01ae264dc76a5939ea96c63c90 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 14 Feb 2024 17:55:54 +0530 Subject: [PATCH 1745/2290] drm/amdgpu: Reset dGPU if suspend got aborted commit 8b2be55f4d6c1099d7f629b0ed7535a5be788c83 upstream. For SOC21 ASICs, there is an issue in re-enabling PM features if a suspend got aborted. In such cases, reset the device during resume phase. This is a workaround till a proper solution is finalized. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Reviewed-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/soc21.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 56af7b5abac14..da11f78826ea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -780,10 +780,35 @@ static int soc21_common_suspend(void *handle) return soc21_common_hw_fini(adev); } +static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg1, sol_reg2; + + /* Will reset for the following suspend abort cases. + * 1) Only reset dGPU side. + * 2) S3 suspend got aborted and TOS is active. + */ + if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && + !adev->suspend_complete) { + sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + msleep(100); + sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + + return (sol_reg1 != sol_reg2); + } + + return false; +} + static int soc21_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc21_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend aborted, resetting..."); + soc21_asic_reset(adev); + } + return soc21_common_hw_init(adev); } -- GitLab From 90819b1830bc26dfacedae28fce72e1f21133b38 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 23 Mar 2024 20:46:53 -0400 Subject: [PATCH 1746/2290] drm/amdgpu: always force full reset for SOC21 commit 65ff8092e4802f96d87d3d7cde146961f5228265 upstream. There are cases where soft reset seems to succeed, but does not, so always use mode1/2 for now. Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/soc21.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index da11f78826ea8..56cc59629d96b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -460,10 +460,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): - return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - return false; default: return true; } -- GitLab From 724fbc7c0cb88729ec914a3d778f466e5ea3dbb7 Mon Sep 17 00:00:00 2001 From: Fudongwang Date: Tue, 26 Mar 2024 16:03:16 +0800 Subject: [PATCH 1747/2290] drm/amd/display: fix disable otg wa logic in DCN316 commit cf79814cb0bf5749b9f0db53ca231aa540c02768 upstream. [Why] Wrong logic cause screen corruption. [How] Port logic from DCN35/314. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Fudongwang Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../dc/clk_mgr/dcn316/dcn316_clk_mgr.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 187f5b27fdc80..29d2003fb7129 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -112,20 +112,25 @@ static int dcn316_get_active_display_cnt_wa( return display_count; } -static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || - dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || + !pipe->stream->link_enc)) { if (disable) { - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); + reset_sync_context_for_pipe(dc, context, i); } else pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); @@ -222,11 +227,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn316_disable_otg_wa(clk_mgr_base, context, true); + dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn316_disable_otg_wa(clk_mgr_base, context, false); + dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } -- GitLab From 6741e066ec7633450d3186946035c1f80c4226b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Apr 2024 11:18:29 +0200 Subject: [PATCH 1748/2290] Linux 6.1.87 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240415141946.165870434@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Pavel Machek (CIP) Tested-by: Kelsey Steele Tested-by: Mark Brown Tested-by: Ron Economos Tested-by: Yann Sionneau Tested-by: Jon Hunter Tested-by: Mateusz Jończyk Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index baddd8ed81868..e46a57006a34f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 86 +SUBLEVEL = 87 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 1e053399681ccc87f8dc73af4ffe09b66d9fef95 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 7 Apr 2024 22:28:02 -0400 Subject: [PATCH 1749/2290] drm/vmwgfx: Enable DMA mappings with SEV [ Upstream commit 4c08f01934ab67d1d283d5cbaa52b923abcfe4cd ] Enable DMA mappings in vmwgfx after TTM has been fixed in commit 3bf3710e3718 ("drm/ttm: Add a generic TTM memcpy move for page-based iomem") This enables full guest-backed memory support and in particular allows usage of screen targets as the presentation mechanism. Signed-off-by: Zack Rusin Reported-by: Ye Li Tested-by: Ye Li Fixes: 3b0d6458c705 ("drm/vmwgfx: Refuse DMA operation when SEV encryption is active") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v6.6+ Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240408022802.358641-1-zack.rusin@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 9d7a1b710f48f..53f63ad656a41 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -663,11 +663,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; - /* TTM currently doesn't fully support SEV encryption. */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return -EINVAL; - - if (vmw_force_coherent) + /* + * When running with SEV we always want dma mappings, because + * otherwise ttm tt pool pages will bounce through swiotlb running + * out of available space. + */ + if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT)) dev_priv->map_mode = vmw_dma_alloc_coherent; else if (vmw_restrict_iommu) dev_priv->map_mode = vmw_dma_map_bind; -- GitLab From 87f8aac740f17ee140ff90dfb592954596492aec Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Feb 2023 09:06:53 +0800 Subject: [PATCH 1750/2290] drm/amdgpu: fix incorrect active rb bitmap for gfx11 [ Upstream commit f9c35f4fffc6cb5bbb23f546f48c045aef012518 ] GFX v11 changes RB_BACKEND_DISABLE related registers from per SA to global ones. The approach to query active rb bitmap needs to be changed accordingly. Query per SE setting returns wrong active RB bitmap especially in the case when some of SA are disabled. With the new approach, driver will generate the active rb bitmap based on active SA bitmap and global active RB bitmap. Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher Stable-dep-of: bbca7f414ae9 ("drm/amdgpu: fix incorrect number of active RBs for gfx11") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 78 +++++++++++++++++--------- 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 66a6f7a37ebcf..ec40f88da00c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1531,44 +1531,70 @@ static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); } -static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) +static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) { - u32 data, mask; + u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; + + gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); + gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, + CC_GC_SA_UNIT_DISABLE, + SA_DISABLE); + gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); + gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, + GC_USER_SA_UNIT_DISABLE, + SA_DISABLE); + sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * + adev->gfx.config.max_shader_engines); - data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); - data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); + return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); +} - data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; - data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; +static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) +{ + u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; + u32 rb_mask; - mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); + gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, + CC_RB_BACKEND_DISABLE, + BACKEND_DISABLE); + gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); + gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, + GC_USER_RB_BACKEND_DISABLE, + BACKEND_DISABLE); + rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * + adev->gfx.config.max_shader_engines); - return (~data) & mask; + return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); } static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) { - int i, j; - u32 data; - u32 active_rbs = 0; - u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se; + u32 rb_bitmap_width_per_sa; + u32 max_sa; + u32 active_sa_bitmap; + u32 global_active_rb_bitmap; + u32 active_rb_bitmap = 0; + u32 i; - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); - data = gfx_v11_0_get_rb_active_bitmap(adev); - active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * - rb_bitmap_width_per_sh); - } + /* query sa bitmap from SA_UNIT_DISABLE registers */ + active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); + /* query rb bitmap from RB_BACKEND_DISABLE registers */ + global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); + + /* generate active rb bitmap according to active sa bitmap */ + max_sa = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se; + rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + for (i = 0; i < max_sa; i++) { + if (active_sa_bitmap & (1 << i)) + active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - adev->gfx.config.backend_enable_mask = active_rbs; - adev->gfx.config.num_rbs = hweight32(active_rbs); + active_rb_bitmap |= global_active_rb_bitmap; + adev->gfx.config.backend_enable_mask = active_rb_bitmap; + adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } #define DEFAULT_SH_MEM_BASES (0x6000) -- GitLab From 01c227f5a72b08f7da4fa06c67a60555d21c17f9 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 3 Apr 2024 17:28:44 +0800 Subject: [PATCH 1751/2290] drm/amdgpu: fix incorrect number of active RBs for gfx11 [ Upstream commit bbca7f414ae9a12ea231cdbafd79c607e3337ea8 ] The RB bitmap should be global active RB bitmap & active RB bitmap based on active SA. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ec40f88da00c3..5a5787bfbce7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1592,7 +1592,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } -- GitLab From 121a83be215ba5e53c4dbe5ce696927f91eefee1 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 21 Mar 2024 13:49:43 -0400 Subject: [PATCH 1752/2290] drm/amd/display: Do not recursively call manual trigger programming [ Upstream commit 953927587f37b731abdeabe46ad44a3b3ec67a52 ] [WHY&HOW] We should not be recursively calling the manual trigger programming function when FAMS is not in use. Cc: stable@vger.kernel.org Reviewed-by: Alvin Lee Acked-by: Hamza Mahfooz Signed-off-by: Dillon Varone Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index a974f86e718a8..37c645a882dd8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -216,9 +216,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc) OTG_V_TOTAL_MAX_SEL, 1, OTG_FORCE_LOCK_ON_EVENT, 0, OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ - - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); } } -- GitLab From cbe7b911e0c9139347b6ec5ba088283a699cb95f Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Fri, 5 Apr 2024 15:55:51 +0300 Subject: [PATCH 1753/2290] io_uring: Fix io_cqring_wait() not restoring sigmask on get_timespec64() failure Commit 978e5c19dfefc271e5550efba92fcef0d3f62864 upstream. This bug was introduced in commit 950e79dd7313 ("io_uring: minor io_cqring_wait() optimization"), which was made in preparation for adc8682ec690 ("io_uring: Add support for napi_busy_poll"). The latter got reverted in cb3182167325 ("Revert "io_uring: Add support for napi_busy_poll""), so simply undo the former as well. Cc: stable@vger.kernel.org Fixes: 950e79dd7313 ("io_uring: minor io_cqring_wait() optimization") Signed-off-by: Alexey Izbyshev Link: https://lore.kernel.org/r/20240405125551.237142-1-izbyshev@ispras.ru Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 68f1b6f8699a6..958c3b6190205 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2426,6 +2426,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return 0; } while (ret > 0); + if (uts) { + struct timespec64 ts; + + if (get_timespec64(&ts, uts)) + return -EFAULT; + timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); + } + if (sig) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) @@ -2439,14 +2447,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - if (uts) { - struct timespec64 ts; - - if (get_timespec64(&ts, uts)) - return -EFAULT; - timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); - } - init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; INIT_LIST_HEAD(&iowq.wq.entry); -- GitLab From 45eec81eaca624f9278f8be9e62f8ae1b78e46a0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 10 Apr 2024 12:38:13 -0400 Subject: [PATCH 1754/2290] SUNRPC: Fix rpcgss_context trace event acceptor field commit a4833e3abae132d613ce7da0e0c9a9465d1681fa upstream. The rpcgss_context trace event acceptor field is a dynamically sized string that records the "data" parameter. But this parameter is also dependent on the "len" field to determine the size of the data. It needs to use __string_len() helper macro where the length can be passed in. It also incorrectly uses strncpy() to save it instead of __assign_str(). As these macros can change, it is not wise to open code them in trace events. As of commit c759e609030c ("tracing: Remove __assign_str_len()"), __assign_str() can be used for both __string() and __string_len() fields. Before that commit, __assign_str_len() is required to be used. This needs to be noted for backporting. (In actuality, commit c1fa617caeb0 ("tracing: Rework __assign_str() and __string() to not duplicate getting the string") is the commit that makes __string_str_len() obsolete). Cc: stable@vger.kernel.org Fixes: 0c77668ddb4e ("SUNRPC: Introduce trace points in rpc_auth_gss.ko") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- include/trace/events/rpcgss.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index 3f121eed369e8..894d9fc8bd94a 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -587,7 +587,7 @@ TRACE_EVENT(rpcgss_context, __field(unsigned int, timeout) __field(u32, window_size) __field(int, len) - __string(acceptor, data) + __string_len(acceptor, data, len) ), TP_fast_assign( @@ -596,7 +596,7 @@ TRACE_EVENT(rpcgss_context, __entry->timeout = timeout; __entry->window_size = window_size; __entry->len = len; - strncpy(__get_str(acceptor), data, len); + __assign_str(acceptor, data); ), TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s", -- GitLab From bcdd9ce78d66c601d60dd23888af4f4ee9d743bd Mon Sep 17 00:00:00 2001 From: Yuanhe Shu Date: Mon, 26 Feb 2024 11:18:16 +0800 Subject: [PATCH 1755/2290] selftests/ftrace: Limit length in subsystem-enable tests commit 1a4ea83a6e67f1415a1f17c1af5e9c814c882bb5 upstream. While sched* events being traced and sched* events continuously happen, "[xx] event tracing - enable/disable with subsystem level files" would not stop as on some slower systems it seems to take forever. Select the first 100 lines of output would be enough to judge whether there are more than 3 types of sched events. Fixes: 815b18ea66d6 ("ftracetest: Add basic event tracing test cases") Cc: stable@vger.kernel.org Signed-off-by: Yuanhe Shu Acked-by: Masami Hiramatsu (Google) Acked-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- .../selftests/ftrace/test.d/event/subsystem-enable.tc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index b1ede62498667..b7c8f29c09a97 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -18,7 +18,7 @@ echo 'sched:*' > set_event yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -29,7 +29,7 @@ echo 1 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -40,7 +40,7 @@ echo 0 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then fail "any of scheduler events should not be recorded" fi -- GitLab From 07b37f227c8daa27e68f57b1c691fab34a06731e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 17 Apr 2024 13:38:29 +0200 Subject: [PATCH 1756/2290] random: handle creditable entropy from atomic process context commit e871abcda3b67d0820b4182ebe93435624e9c6a4 upstream. The entropy accounting changes a static key when the RNG has initialized, since it only ever initializes once. Static key changes, however, cannot be made from atomic context, so depending on where the last creditable entropy comes from, the static key change might need to be deferred to a worker. Previously the code used the execute_in_process_context() helper function, which accounts for whether or not the caller is in_interrupt(). However, that doesn't account for the case where the caller is actually in process context but is holding a spinlock. This turned out to be the case with input_handle_event() in drivers/input/input.c contributing entropy: [] die+0xa8/0x2fc [] bug_handler+0x44/0xec [] brk_handler+0x90/0x144 [] do_debug_exception+0xa0/0x148 [] el1_dbg+0x60/0x7c [] el1h_64_sync_handler+0x38/0x90 [] el1h_64_sync+0x64/0x6c [] __might_resched+0x1fc/0x2e8 [] __might_sleep+0x44/0x7c [] cpus_read_lock+0x1c/0xec [] static_key_enable+0x14/0x38 [] crng_set_ready+0x14/0x28 [] execute_in_process_context+0xb8/0xf8 [] _credit_init_bits+0x118/0x1dc [] add_timer_randomness+0x264/0x270 [] add_input_randomness+0x38/0x48 [] input_handle_event+0x2b8/0x490 [] input_event+0x6c/0x98 According to Guoyong, it's not really possible to refactor the various drivers to never hold a spinlock there. And in_atomic() isn't reliable. So, rather than trying to be too fancy, just punt the change in the static key to a workqueue always. There's basically no drawback of doing this, as the code already needed to account for the static key not changing immediately, and given that it's just an optimization, there's not exactly a hurry to change the static key right away, so deferal is fine. Reported-by: Guoyong Wang Cc: stable@vger.kernel.org Fixes: f5bda35fba61 ("random: use static branch for crng_ready()") Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 5d1c8e1c99b5b..fd57eb372d492 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -683,7 +683,7 @@ static void extract_entropy(void *buf, size_t len) static void __cold _credit_init_bits(size_t bits) { - static struct execute_work set_ready; + static DECLARE_WORK(set_ready, crng_set_ready); unsigned int new, orig, add; unsigned long flags; @@ -699,8 +699,8 @@ static void __cold _credit_init_bits(size_t bits) if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */ - if (static_key_initialized) - execute_in_process_context(crng_set_ready, &set_ready); + if (static_key_initialized && system_unbound_wq) + queue_work(system_unbound_wq, &set_ready); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); pr_notice("crng init done\n"); @@ -870,8 +870,8 @@ void __init random_init(void) /* * If we were initialized by the cpu or bootloader before jump labels - * are initialized, then we should enable the static branch here, where - * it's guaranteed that jump labels have been initialized. + * or workqueues are initialized, then we should enable the static + * branch here, where it's guaranteed that these have been initialized. */ if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY) crng_set_ready(NULL); -- GitLab From add0ff34863e973fb3a968e3620bb302a5990f4b Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Wed, 17 Apr 2024 10:55:13 +0200 Subject: [PATCH 1757/2290] net: usb: ax88179_178a: avoid writing the mac address before first reading commit 56f78615bcb1c3ba58a5d9911bad3d9185cf141b upstream. After the commit d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets"), reset operation, in which the default mac address from the device is read, is not executed from bind operation and the random address, that is pregenerated just in case, is direclty written the first time in the device, so the default one from the device is not even read. This writing is not dangerous because is volatile and the default mac address is not missed. In order to avoid this and keep the simplification to have only one reset and reduce the delays, restore the reset from bind operation and remove the reset that is commanded from open operation. The behavior is the same but everything is ready for usbnet_probe. Tested with ASIX AX88179 USB Gigabit Ethernet devices. Restore the old behavior for the rest of possible devices because I don't have the hardware to test. cc: stable@vger.kernel.org # 6.6+ Fixes: d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets") Reported-by: Jarkko Palviainen Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20240417085524.219532-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index e0e9b4c53cb02..3078511f76083 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1317,6 +1317,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) netif_set_tso_max_size(dev->net, 16384); + ax88179_reset(dev); + return 0; } @@ -1695,7 +1697,6 @@ static const struct driver_info ax88179_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1708,7 +1709,6 @@ static const struct driver_info ax88178a_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, -- GitLab From 704edc9252f4988ae1ad7dafa23d0db8d90d7190 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 5 Mar 2024 15:35:06 +0100 Subject: [PATCH 1758/2290] drm/i915/vma: Fix UAF on destroy against retire race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0e45882ca829b26b915162e8e86dbb1095768e9e upstream. Object debugging tools were sporadically reporting illegal attempts to free a still active i915 VMA object when parking a GT believed to be idle. [161.359441] ODEBUG: free active (active state 0) object: ffff88811643b958 object type: i915_active hint: __i915_vma_active+0x0/0x50 [i915] [161.360082] WARNING: CPU: 5 PID: 276 at lib/debugobjects.c:514 debug_print_object+0x80/0xb0 ... [161.360304] CPU: 5 PID: 276 Comm: kworker/5:2 Not tainted 6.5.0-rc1-CI_DRM_13375-g003f860e5577+ #1 [161.360314] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S UDIMM 6L RVP, BIOS RKLSFWI1.R00.3173.A03.2204210138 04/21/2022 [161.360322] Workqueue: i915-unordered __intel_wakeref_put_work [i915] [161.360592] RIP: 0010:debug_print_object+0x80/0xb0 ... [161.361347] debug_object_free+0xeb/0x110 [161.361362] i915_active_fini+0x14/0x130 [i915] [161.361866] release_references+0xfe/0x1f0 [i915] [161.362543] i915_vma_parked+0x1db/0x380 [i915] [161.363129] __gt_park+0x121/0x230 [i915] [161.363515] ____intel_wakeref_put_last+0x1f/0x70 [i915] That has been tracked down to be happening when another thread is deactivating the VMA inside __active_retire() helper, after the VMA's active counter has been already decremented to 0, but before deactivation of the VMA's object is reported to the object debugging tool. We could prevent from that race by serializing i915_active_fini() with __active_retire() via ref->tree_lock, but that wouldn't stop the VMA from being used, e.g. from __i915_vma_retire() called at the end of __active_retire(), after that VMA has been already freed by a concurrent i915_vma_destroy() on return from the i915_active_fini(). Then, we should rather fix the issue at the VMA level, not in i915_active. Since __i915_vma_parked() is called from __gt_park() on last put of the GT's wakeref, the issue could be addressed by holding the GT wakeref long enough for __active_retire() to complete before that wakeref is released and the GT parked. I believe the issue was introduced by commit d93939730347 ("drm/i915: Remove the vma refcount") which moved a call to i915_active_fini() from a dropped i915_vma_release(), called on last put of the removed VMA kref, to i915_vma_parked() processing path called on last put of a GT wakeref. However, its visibility to the object debugging tool was suppressed by a bug in i915_active that was fixed two weeks later with commit e92eb246feb9 ("drm/i915/active: Fix missing debug object activation"). A VMA associated with a request doesn't acquire a GT wakeref by itself. Instead, it depends on a wakeref held directly by the request's active intel_context for a GT associated with its VM, and indirectly on that intel_context's engine wakeref if the engine belongs to the same GT as the VMA's VM. Those wakerefs are released asynchronously to VMA deactivation. Fix the issue by getting a wakeref for the VMA's GT when activating it, and putting that wakeref only after the VMA is deactivated. However, exclude global GTT from that processing path, otherwise the GPU never goes idle. Since __i915_vma_retire() may be called from atomic contexts, use async variant of wakeref put. Also, to avoid circular locking dependency, take care of acquiring the wakeref before VM mutex when both are needed. v7: Add inline comments with justifications for: - using untracked variants of intel_gt_pm_get/put() (Nirmoy), - using async variant of _put(), - not getting the wakeref in case of a global GTT, - always getting the first wakeref outside vm->mutex. v6: Since __i915_vma_active/retire() callbacks are not serialized, storing a wakeref tracking handle inside struct i915_vma is not safe, and there is no other good place for that. Use untracked variants of intel_gt_pm_get/put_async(). v5: Replace "tile" with "GT" across commit description (Rodrigo), - avoid mentioning multi-GT case in commit description (Rodrigo), - explain why we need to take a temporary wakeref unconditionally inside i915_vma_pin_ww() (Rodrigo). v4: Refresh on top of commit 5e4e06e4087e ("drm/i915: Track gt pm wakerefs") (Andi), - for more easy backporting, split out removal of former insufficient workarounds and move them to separate patches (Nirmoy). - clean up commit message and description a bit. v3: Identify root cause more precisely, and a commit to blame, - identify and drop former workarounds, - update commit message and description. v2: Get the wakeref before VM mutex to avoid circular locking dependency, - drop questionable Fixes: tag. Fixes: d93939730347 ("drm/i915: Remove the vma refcount") Closes: https://gitlab.freedesktop.org/drm/intel/issues/8875 Signed-off-by: Janusz Krzysztofik Cc: Thomas Hellström Cc: Nirmoy Das Cc: Andi Shyti Cc: Rodrigo Vivi Cc: stable@vger.kernel.org # v5.19+ Reviewed-by: Nirmoy Das Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240305143747.335367-6-janusz.krzysztofik@linux.intel.com (cherry picked from commit f3c71b2ded5c4367144a810ef25f998fd1d6c381) Signed-off-by: Rodrigo Vivi Signed-off-by: Janusz Krzysztofik Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_vma.c | 42 +++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index c8ad8f37e5cfe..58a03da16a10f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -32,6 +32,7 @@ #include "gt/intel_engine.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" #include "i915_drv.h" @@ -98,12 +99,34 @@ static inline struct i915_vma *active_to_vma(struct i915_active *ref) static int __i915_vma_active(struct i915_active *ref) { - return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_tryget(vma)) + return -ENOENT; + + /* + * Exclude global GTT VMA from holding a GT wakeref + * while active, otherwise GPU never goes idle. + */ + if (!i915_vma_is_ggtt(vma)) + intel_gt_pm_get(vma->vm->gt); + + return 0; } static void __i915_vma_retire(struct i915_active *ref) { - i915_vma_put(active_to_vma(ref)); + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we can be called from atomic contexts, + * use an async variant of intel_gt_pm_put(). + */ + intel_gt_pm_put_async(vma->vm->gt); + } + + i915_vma_put(vma); } static struct i915_vma * @@ -1365,7 +1388,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, struct i915_vma_work *work = NULL; struct dma_fence *moving = NULL; struct i915_vma_resource *vma_res = NULL; - intel_wakeref_t wakeref = 0; + intel_wakeref_t wakeref; unsigned int bound; int err; @@ -1385,8 +1408,14 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err; - if (flags & PIN_GLOBAL) - wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + /* + * In case of a global GTT, we must hold a runtime-pm wakeref + * while global PTEs are updated. In other cases, we hold + * the rpm reference while the VMA is active. Since runtime + * resume may require allocations, which are forbidden inside + * vm->mutex, get the first rpm wakeref outside of the mutex. + */ + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); if (flags & vma->vm->bind_async_flags) { /* lock VM */ @@ -1522,8 +1551,7 @@ err_fence: if (work) dma_fence_work_commit_imm(&work->base); err_rpm: - if (wakeref) - intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); + intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); if (moving) dma_fence_put(moving); -- GitLab From f5603f9e13ad2fdacdfea57d303de3b0cdb3cb6f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:07 +0200 Subject: [PATCH 1759/2290] x86/efi: Drop EFI stub .bss from .data section [ Commit 5f51c5d0e905608ba7be126737f7c84a793ae1aa upstream ] Now that the EFI stub always zero inits its BSS section upon entry, there is no longer a need to place the BSS symbols carried by the stub into the .data section. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-18-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/vmlinux.lds.S | 1 - drivers/firmware/efi/libstub/Makefile | 7 ------- 2 files changed, 8 deletions(-) diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 112b2375d021b..32892e81bf61b 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -46,7 +46,6 @@ SECTIONS _data = . ; *(.data) *(.data.*) - *(.bss.efistub) _edata = . ; } . = ALIGN(L1_CACHE_BYTES); diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 473ef18421db0..748781c257871 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -102,13 +102,6 @@ lib-y := $(patsubst %.o,%.stub.o,$(lib-y)) # https://bugs.llvm.org/show_bug.cgi?id=46480 STUBCOPY_FLAGS-y += --remove-section=.note.gnu.property -# -# For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the -# .bss section, so the .bss section of the EFI stub needs to be included in the -# .data section of the compressed kernel to ensure initialization. Rename the -# .bss section here so it's easy to pick out in the linker script. -# -STUBCOPY_FLAGS-$(CONFIG_X86) += --rename-section .bss=.bss.efistub,load,alloc STUBCOPY_RELOC-$(CONFIG_X86_32) := R_386_32 STUBCOPY_RELOC-$(CONFIG_X86_64) := R_X86_64_64 -- GitLab From f46e0e9fbef59927f0926a427bd841ede177837a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:08 +0200 Subject: [PATCH 1760/2290] x86/efi: Disregard setup header of loaded image [ Commit 7e50262229faad0c7b8c54477cd1c883f31cc4a7 upstream ] The native EFI entrypoint does not take a struct boot_params from the loader, but instead, it constructs one from scratch, using the setup header data placed at the start of the image. This setup header is placed in a way that permits legacy loaders to manipulate the contents (i.e., to pass the kernel command line or the address and size of an initial ramdisk), but EFI boot does not use it in that way - it only copies the contents that were placed there at build time, but EFI loaders will not (and should not) manipulate the setup header to configure the boot. (Commit 63bf28ceb3ebbe76 "efi: x86: Wipe setup_data on pure EFI boot" deals with some of the fallout of using setup_data in a way that breaks EFI boot.) Given that none of the non-zero values that are copied from the setup header into the EFI stub's struct boot_params are relevant to the boot now that the EFI stub no longer enters via the legacy decompressor, the copy can be omitted altogether. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-19-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 46 ++++--------------------- 1 file changed, 6 insertions(+), 40 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index dc50dda40239e..c592ecd40dab4 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -426,9 +426,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle, efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg) { - struct boot_params *boot_params; - struct setup_header *hdr; - void *image_base; + static struct boot_params boot_params __page_aligned_bss; + struct setup_header *hdr = &boot_params.hdr; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; int options_size = 0; efi_status_t status; @@ -449,30 +448,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_exit(handle, status); } - image_base = efi_table_attr(image, image_base); - - status = efi_allocate_pages(sizeof(struct boot_params), - (unsigned long *)&boot_params, ULONG_MAX); - if (status != EFI_SUCCESS) { - efi_err("Failed to allocate lowmem for boot params\n"); - efi_exit(handle, status); - } - - memset(boot_params, 0x0, sizeof(struct boot_params)); - - hdr = &boot_params->hdr; - - /* Copy the setup header from the second sector to boot_params */ - memcpy(&hdr->jump, image_base + 512, - sizeof(struct setup_header) - offsetof(struct setup_header, jump)); - - /* - * Fill out some of the header fields ourselves because the - * EFI firmware loader doesn't load the first sector. - */ + /* Assign the setup_header fields that the kernel actually cares about */ hdr->root_flags = 1; hdr->vid_mode = 0xffff; - hdr->boot_flag = 0xAA55; hdr->type_of_loader = 0x21; @@ -481,25 +459,13 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (!cmdline_ptr) goto fail; - efi_set_u64_split((unsigned long)cmdline_ptr, - &hdr->cmd_line_ptr, &boot_params->ext_cmd_line_ptr); - - hdr->ramdisk_image = 0; - hdr->ramdisk_size = 0; - - /* - * Disregard any setup data that was provided by the bootloader: - * setup_data could be pointing anywhere, and we have no way of - * authenticating or validating the payload. - */ - hdr->setup_data = 0; + efi_set_u64_split((unsigned long)cmdline_ptr, &hdr->cmd_line_ptr, + &boot_params.ext_cmd_line_ptr); - efi_stub_entry(handle, sys_table_arg, boot_params); + efi_stub_entry(handle, sys_table_arg, &boot_params); /* not reached */ fail: - efi_free(sizeof(struct boot_params), (unsigned long)boot_params); - efi_exit(handle, status); } -- GitLab From 1800c9628ece13571935d59ce502391c0f86868c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:09 +0200 Subject: [PATCH 1761/2290] x86/efistub: Reinstate soft limit for initrd loading [ Commit decd347c2a75d32984beb8807d470b763a53b542 upstream ] Commit 8117961d98fb2 ("x86/efi: Disregard setup header of loaded image") dropped the memcopy of the image's setup header into the boot_params struct provided to the core kernel, on the basis that EFI boot does not need it and should rely only on a single protocol to interface with the boot chain. It is also a prerequisite for being able to increase the section alignment to 4k, which is needed to enable memory protections when running in the boot services. So only the setup_header fields that matter to the core kernel are populated explicitly, and everything else is ignored. One thing was overlooked, though: the initrd_addr_max field in the setup_header is not used by the core kernel, but it is used by the EFI stub itself when it loads the initrd, where its default value of INT_MAX is used as the soft limit for memory allocation. This means that, in the old situation, the initrd was virtually always loaded in the lower 2G of memory, but now, due to initrd_addr_max being 0x0, the initrd may end up anywhere in memory. This should not be an issue principle, as most systems can deal with this fine. However, it does appear to tickle some problems in older UEFI implementations, where the memory ends up being corrupted, resulting in errors when unpacking the initramfs. So set the initrd_addr_max field to INT_MAX like it was before. Fixes: 8117961d98fb2 ("x86/efi: Disregard setup header of loaded image") Reported-by: Radek Podgorny Closes: https://lore.kernel.org/all/a99a831a-8ad5-4cb0-bff9-be637311f771@podgorny.cz Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index c592ecd40dab4..1f5edcb6339ae 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -453,6 +453,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, hdr->vid_mode = 0xffff; hdr->type_of_loader = 0x21; + hdr->initrd_addr_max = INT_MAX; /* Convert unicode cmdline to ascii */ cmdline_ptr = efi_convert_cmdline(image, &options_size); -- GitLab From 0db16d1dce52faa2129c49b43d9b0edd9930d08f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:10 +0200 Subject: [PATCH 1762/2290] x86/efi: Drop alignment flags from PE section headers [ Commit bfab35f552ab3dd6d017165bf9de1d1d20f198cc upstream ] The section header flags for alignment are documented in the PE/COFF spec as being applicable to PE object files only, not to PE executables such as the Linux bzImage, so let's drop them from the PE header. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-20-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index d31982509654d..38b611eb1a3c1 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -208,8 +208,7 @@ section_table: .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_CODE | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE | \ - IMAGE_SCN_ALIGN_16BYTES # Characteristics + IMAGE_SCN_MEM_EXECUTE # Characteristics # # The EFI application loader requires a relocation section @@ -229,8 +228,7 @@ section_table: .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_DISCARDABLE | \ - IMAGE_SCN_ALIGN_1BYTES # Characteristics + IMAGE_SCN_MEM_DISCARDABLE # Characteristics #ifdef CONFIG_EFI_MIXED # @@ -248,8 +246,7 @@ section_table: .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_DISCARDABLE | \ - IMAGE_SCN_ALIGN_1BYTES # Characteristics + IMAGE_SCN_MEM_DISCARDABLE # Characteristics #endif # @@ -270,8 +267,7 @@ section_table: .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_CODE | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE | \ - IMAGE_SCN_ALIGN_16BYTES # Characteristics + IMAGE_SCN_MEM_EXECUTE # Characteristics .set section_count, (. - section_table) / 40 #endif /* CONFIG_EFI_STUB */ -- GitLab From f31f521ad27e3e476a58c07bf0825c4441317dba Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:11 +0200 Subject: [PATCH 1763/2290] x86/boot: Remove the 'bugger off' message [ Commit 768171d7ebbce005210e1cf8456f043304805c15 upstream ] Ancient (pre-2003) x86 kernels could boot from a floppy disk straight from the BIOS, using a small real mode boot stub at the start of the image where the BIOS would expect the boot record (or boot block) to appear. Due to its limitations (kernel size < 1 MiB, no support for IDE, USB or El Torito floppy emulation), this support was dropped, and a Linux aware bootloader is now always required to boot the kernel from a legacy BIOS. To smoothen this transition, the boot stub was not removed entirely, but replaced with one that just prints an error message telling the user to install a bootloader. As it is unlikely that anyone doing direct floppy boot with such an ancient kernel is going to upgrade to v6.5+ and expect that this boot method still works, printing this message is kind of pointless, and so it should be possible to remove the logic that emits it. Let's free up this space so it can be used to expand the PE header in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Acked-by: H. Peter Anvin (Intel) Link: https://lore.kernel.org/r/20230912090051.4014114-21-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 49 ------------------------------------------ arch/x86/boot/setup.ld | 7 +++--- 2 files changed, 4 insertions(+), 52 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 38b611eb1a3c1..b8d241e57b49a 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -38,63 +38,14 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ .code16 .section ".bstext", "ax" - - .global bootsect_start -bootsect_start: #ifdef CONFIG_EFI_STUB # "MZ", MS-DOS header .word MZ_MAGIC -#endif - - # Normalize the start address - ljmp $BOOTSEG, $start2 - -start2: - movw %cs, %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - xorw %sp, %sp - sti - cld - - movw $bugger_off_msg, %si - -msg_loop: - lodsb - andb %al, %al - jz bs_die - movb $0xe, %ah - movw $7, %bx - int $0x10 - jmp msg_loop - -bs_die: - # Allow the user to press a key, then reboot - xorw %ax, %ax - int $0x16 - int $0x19 - - # int 0x19 should never return. In case it does anyway, - # invoke the BIOS reset code... - ljmp $0xf000,$0xfff0 - -#ifdef CONFIG_EFI_STUB .org 0x3c # # Offset to the PE header. # .long pe_header -#endif /* CONFIG_EFI_STUB */ - - .section ".bsdata", "a" -bugger_off_msg: - .ascii "Use a boot loader.\r\n" - .ascii "\n" - .ascii "Remove disk and press any key to reboot...\r\n" - .byte 0 - -#ifdef CONFIG_EFI_STUB pe_header: .long PE_MAGIC diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 49546c247ae25..b11c45b9e51ed 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -10,10 +10,11 @@ ENTRY(_start) SECTIONS { . = 0; - .bstext : { *(.bstext) } - .bsdata : { *(.bsdata) } + .bstext : { + *(.bstext) + . = 495; + } =0xffffffff - . = 495; .header : { *(.header) } .entrytext : { *(.entrytext) } .inittext : { *(.inittext) } -- GitLab From 5c3e92ad493dd46e0b2345825924d68f5364e93b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:12 +0200 Subject: [PATCH 1764/2290] x86/boot: Omit compression buffer from PE/COFF image memory footprint [ Commit 8eace5b3555606e684739bef5bcdfcfe68235257 upstream ] Now that the EFI stub decompresses the kernel and hands over to the decompressed image directly, there is no longer a need to provide a decompression buffer as part of the .BSS allocation of the PE/COFF image. It also means the PE/COFF image can be loaded anywhere in memory, and setting the preferred image base is unnecessary. So drop the handling of this from the header and from the build tool. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-22-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 6 ++--- arch/x86/boot/tools/build.c | 50 +++++-------------------------------- 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b8d241e57b49a..98dd4c36cccac 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -89,12 +89,10 @@ optional_header: #endif extra_header_fields: - # PE specification requires ImageBase to be 64k aligned - .set image_base, (LOAD_PHYSICAL_ADDR + 0xffff) & ~0xffff #ifdef CONFIG_X86_32 - .long image_base # ImageBase + .long 0 # ImageBase #else - .quad image_base # ImageBase + .quad 0 # ImageBase #endif .long 0x20 # SectionAlignment .long 0x20 # FileAlignment diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index bd247692b7017..0354c223e3549 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -65,7 +65,6 @@ static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; static unsigned long kernel_info; static unsigned long startup_64; -static unsigned long _ehead; static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -229,27 +228,14 @@ static void update_pecoff_setup_and_reloc(unsigned int size) #endif } -static void update_pecoff_text(unsigned int text_start, unsigned int file_sz, - unsigned int init_sz) +static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) { unsigned int pe_header; unsigned int text_sz = file_sz - text_start; - unsigned int bss_sz = init_sz - file_sz; + unsigned int bss_sz = _end - text_sz; pe_header = get_unaligned_le32(&buf[0x3c]); - /* - * The PE/COFF loader may load the image at an address which is - * misaligned with respect to the kernel_alignment field in the setup - * header. - * - * In order to avoid relocating the kernel to correct the misalignment, - * add slack to allow the buffer to be aligned within the declared size - * of the image. - */ - bss_sz += CONFIG_PHYSICAL_ALIGN; - init_sz += CONFIG_PHYSICAL_ALIGN; - /* * Size of code: Subtract the size of the first sector (512 bytes) * which includes the header. @@ -257,7 +243,7 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz, put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]); /* Size of image */ - put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); + put_unaligned_le32(file_sz + bss_sz, &buf[pe_header + 0x50]); /* * Address of entry point for PE/COFF executable @@ -308,8 +294,7 @@ static void efi_stub_entry_update(void) static inline void update_pecoff_setup_and_reloc(unsigned int size) {} static inline void update_pecoff_text(unsigned int text_start, - unsigned int file_sz, - unsigned int init_sz) {} + unsigned int file_sz) {} static inline void efi_stub_defaults(void) {} static inline void efi_stub_entry_update(void) {} @@ -360,7 +345,6 @@ static void parse_zoffset(char *fname) PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, kernel_info); PARSE_ZOFS(p, startup_64); - PARSE_ZOFS(p, _ehead); PARSE_ZOFS(p, _end); p = strchr(p, '\n'); @@ -371,7 +355,7 @@ static void parse_zoffset(char *fname) int main(int argc, char ** argv) { - unsigned int i, sz, setup_sectors, init_sz; + unsigned int i, sz, setup_sectors; int c; u32 sys_size; struct stat sb; @@ -442,31 +426,9 @@ int main(int argc, char ** argv) buf[0x1f1] = setup_sectors-1; put_unaligned_le32(sys_size, &buf[0x1f4]); - init_sz = get_unaligned_le32(&buf[0x260]); -#ifdef CONFIG_EFI_STUB - /* - * The decompression buffer will start at ImageBase. When relocating - * the compressed kernel to its end, we must ensure that the head - * section does not get overwritten. The head section occupies - * [i, i + _ehead), and the destination is [init_sz - _end, init_sz). - * - * At present these should never overlap, because 'i' is at most 32k - * because of SETUP_SECT_MAX, '_ehead' is less than 1k, and the - * calculation of INIT_SIZE in boot/header.S ensures that - * 'init_sz - _end' is at least 64k. - * - * For future-proofing, increase init_sz if necessary. - */ - - if (init_sz - _end < i + _ehead) { - init_sz = (i + _ehead + _end + 4095) & ~4095; - put_unaligned_le32(init_sz, &buf[0x260]); - } -#endif - update_pecoff_text(setup_sectors * 512, i + (sys_size * 16), init_sz); + update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); efi_stub_entry_update(); - /* Update kernel_info offset. */ put_unaligned_le32(kernel_info, &buf[0x268]); -- GitLab From 33d38d9b35e98993b11ec607d8a65570cf246d81 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:13 +0200 Subject: [PATCH 1765/2290] x86/boot: Drop redundant code setting the root device [ Commit 7448e8e5d15a3c4df649bf6d6d460f78396f7e1e upstream ] The root device defaults to 0,0 and is no longer configurable at build time [0], so there is no need for the build tool to ever write to this field. [0] 079f85e624189292 ("x86, build: Do not set the root_dev field in bzImage") This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-23-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 2 +- arch/x86/boot/tools/build.c | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 98dd4c36cccac..f63bf3ec68693 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -235,7 +235,7 @@ root_flags: .word ROOT_RDONLY syssize: .long 0 /* Filled in by build.c */ ram_size: .word 0 /* Obsolete */ vid_mode: .word SVGA_MODE -root_dev: .word 0 /* Filled in by build.c */ +root_dev: .word 0 /* Default to major/minor 0/0 */ boot_flag: .word 0xAA55 # offset 512, entry point diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 0354c223e3549..efa4e9c7d7135 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -40,10 +40,6 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; -#define DEFAULT_MAJOR_ROOT 0 -#define DEFAULT_MINOR_ROOT 0 -#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT) - /* Minimal number of setup sectors */ #define SETUP_SECT_MIN 5 #define SETUP_SECT_MAX 64 @@ -399,9 +395,6 @@ int main(int argc, char ** argv) update_pecoff_setup_and_reloc(i); - /* Set the default root device */ - put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); - /* Open and stat the kernel file */ fd = open(argv[2], O_RDONLY); if (fd < 0) -- GitLab From 67b8dc50890bd1b2a87cc8636672c6186193b17d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:14 +0200 Subject: [PATCH 1766/2290] x86/boot: Drop references to startup_64 [ Commit b618d31f112bea3d2daea19190d63e567f32a4db upstream ] The x86 boot image generation tool assign a default value to startup_64 and subsequently parses the actual value from zoffset.h but it never actually uses the value anywhere. So remove this code. This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-25-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/tools/build.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9e38ffaadb5d9..10ea284697888 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -91,7 +91,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index efa4e9c7d7135..10b0207a6b189 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -60,7 +60,6 @@ static unsigned long efi64_stub_entry; static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; static unsigned long kernel_info; -static unsigned long startup_64; static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -264,7 +263,6 @@ static void efi_stub_defaults(void) efi_pe_entry = 0x10; #else efi_pe_entry = 0x210; - startup_64 = 0x200; #endif } @@ -340,7 +338,6 @@ static void parse_zoffset(char *fname) PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, kernel_info); - PARSE_ZOFS(p, startup_64); PARSE_ZOFS(p, _end); p = strchr(p, '\n'); -- GitLab From e1380c923ccd7f68327372b329dc7c83428405fa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:15 +0200 Subject: [PATCH 1767/2290] x86/boot: Grab kernel_info offset from zoffset header directly [ Commit 2e765c02dcbfc2a8a4527c621a84b9502f6b9bd2 upstream ] Instead of parsing zoffset.h and poking the kernel_info offset value into the header from the build tool, just grab the value directly in the asm file that describes this header. This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-11-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 2 +- arch/x86/boot/tools/build.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f63bf3ec68693..becf39d8115c5 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -525,7 +525,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c -kernel_info_offset: .long 0 # Filled in by build.c +kernel_info_offset: .long ZO_kernel_info # End of setup header ##################################################### diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 10b0207a6b189..14ef13fe7ab02 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -59,7 +59,6 @@ static unsigned long efi32_stub_entry; static unsigned long efi64_stub_entry; static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; -static unsigned long kernel_info; static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -337,7 +336,6 @@ static void parse_zoffset(char *fname) PARSE_ZOFS(p, efi64_stub_entry); PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); - PARSE_ZOFS(p, kernel_info); PARSE_ZOFS(p, _end); p = strchr(p, '\n'); @@ -419,8 +417,6 @@ int main(int argc, char ** argv) update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); efi_stub_entry_update(); - /* Update kernel_info offset. */ - put_unaligned_le32(kernel_info, &buf[0x268]); crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) -- GitLab From f9d68334dd9913f4eb7b49b298473e4d60f4fce3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:16 +0200 Subject: [PATCH 1768/2290] x86/boot: Set EFI handover offset directly in header asm [ Commit eac956345f99dda3d68f4ae6cf7b494105e54780 upstream ] The offsets of the EFI handover entrypoints are available to the assembler when constructing the header, so there is no need to set them from the build tool afterwards. This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-12-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 18 +++++++++++++++++- arch/x86/boot/tools/build.c | 24 ------------------------ 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index becf39d8115c5..34ab46b891e3e 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -523,8 +523,24 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr # define INIT_SIZE VO_INIT_SIZE #endif + .macro __handover_offset +#ifndef CONFIG_EFI_HANDOVER_PROTOCOL + .long 0 +#elif !defined(CONFIG_X86_64) + .long ZO_efi32_stub_entry +#else + /* Yes, this is really how we defined it :( */ + .long ZO_efi64_stub_entry - 0x200 +#ifdef CONFIG_EFI_MIXED + .if ZO_efi32_stub_entry != ZO_efi64_stub_entry - 0x200 + .error "32-bit and 64-bit EFI entry points do not match" + .endif +#endif +#endif + .endm + init_size: .long INIT_SIZE # kernel initialization size -handover_offset: .long 0 # Filled in by build.c +handover_offset: __handover_offset kernel_info_offset: .long ZO_kernel_info # End of setup header ##################################################### diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 14ef13fe7ab02..0694975431645 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -55,8 +55,6 @@ u8 buf[SETUP_SECT_MAX*512]; #define PECOFF_COMPAT_RESERVE 0x0 #endif -static unsigned long efi32_stub_entry; -static unsigned long efi64_stub_entry; static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; static unsigned long _end; @@ -265,31 +263,12 @@ static void efi_stub_defaults(void) #endif } -static void efi_stub_entry_update(void) -{ - unsigned long addr = efi32_stub_entry; - -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL -#ifdef CONFIG_X86_64 - /* Yes, this is really how we defined it :( */ - addr = efi64_stub_entry - 0x200; -#endif - -#ifdef CONFIG_EFI_MIXED - if (efi32_stub_entry != addr) - die("32-bit and 64-bit EFI entry points do not match\n"); -#endif -#endif - put_unaligned_le32(addr, &buf[0x264]); -} - #else static inline void update_pecoff_setup_and_reloc(unsigned int size) {} static inline void update_pecoff_text(unsigned int text_start, unsigned int file_sz) {} static inline void efi_stub_defaults(void) {} -static inline void efi_stub_entry_update(void) {} static inline int reserve_pecoff_reloc_section(int c) { @@ -332,8 +311,6 @@ static void parse_zoffset(char *fname) p = (char *)buf; while (p && *p) { - PARSE_ZOFS(p, efi32_stub_entry); - PARSE_ZOFS(p, efi64_stub_entry); PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, _end); @@ -416,7 +393,6 @@ int main(int argc, char ** argv) update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); - efi_stub_entry_update(); crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) -- GitLab From 1c754c6ec978354afa44977a8183c0656815cf03 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:17 +0200 Subject: [PATCH 1769/2290] x86/boot: Define setup size in linker script [ Commit 093ab258e3fb1d1d3afdfd4a69403d44ce90e360 upstream ] The setup block contains the real mode startup code that is used when booting from a legacy BIOS, along with the boot_params/setup_data that is used by legacy x86 bootloaders to pass the command line and initial ramdisk parameters, among other things. The setup block also contains the PE/COFF header of the entire combined image, which includes the compressed kernel image, the decompressor and the EFI stub. This PE header describes the layout of the executable image in memory, and currently, the fact that the setup block precedes it makes it rather fiddly to get the right values into the right place in the final image. Let's make things a bit easier by defining the setup_size in the linker script so it can be referenced from the asm code directly, rather than having to rely on the build tool to calculate it. For the time being, add 64 bytes of fixed padding for the .reloc and .compat sections - this will be removed in a subsequent patch after the PE/COFF header has been reorganized. This change has no impact on the resulting bzImage binary when configured with CONFIG_EFI_MIXED=y. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-13-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 2 +- arch/x86/boot/setup.ld | 4 ++++ arch/x86/boot/tools/build.c | 6 ------ 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 34ab46b891e3e..6dddf469ca60d 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -230,7 +230,7 @@ sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */ .globl hdr hdr: -setup_sects: .byte 0 /* Filled in by build.c */ + .byte setup_sects - 1 root_flags: .word ROOT_RDONLY syssize: .long 0 /* Filled in by build.c */ ram_size: .word 0 /* Obsolete */ diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index b11c45b9e51ed..9bd5c1ada599d 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -39,6 +39,10 @@ SECTIONS .signature : { setup_sig = .; LONG(0x5a5aaa55) + + /* Reserve some extra space for the reloc and compat sections */ + setup_size = ALIGN(ABSOLUTE(.) + 64, 512); + setup_sects = ABSOLUTE(setup_size / 512); } diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 0694975431645..745d64b6d9303 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -48,12 +48,7 @@ typedef unsigned int u32; u8 buf[SETUP_SECT_MAX*512]; #define PECOFF_RELOC_RESERVE 0x20 - -#ifdef CONFIG_EFI_MIXED #define PECOFF_COMPAT_RESERVE 0x20 -#else -#define PECOFF_COMPAT_RESERVE 0x0 -#endif static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; @@ -388,7 +383,6 @@ int main(int argc, char ** argv) #endif /* Patch the setup code with the appropriate size parameters */ - buf[0x1f1] = setup_sectors-1; put_unaligned_le32(sys_size, &buf[0x1f4]); update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); -- GitLab From 1fa0a2147590930f50b9098a9d81f60c6d096d9e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:18 +0200 Subject: [PATCH 1770/2290] x86/boot: Derive file size from _edata symbol [ Commit aeb92067f6ae994b541d7f9752fe54ed3d108bcc upstream ] Tweak the linker script so that the value of _edata represents the decompressor binary's file size rounded up to the appropriate alignment. This removes the need to calculate it in the build tool, and will make it easier to refer to the file size from the header directly in subsequent changes to the PE header layout. While adding _edata to the sed regex that parses the compressed vmlinux's symbol list, tweak the regex a bit for conciseness. This change has no impact on the resulting bzImage binary when configured with CONFIG_EFI_STUB=y. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-14-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/compressed/vmlinux.lds.S | 3 +++ arch/x86/boot/header.S | 2 +- arch/x86/boot/tools/build.c | 30 ++++++-------------------- 4 files changed, 12 insertions(+), 25 deletions(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 10ea284697888..b92e00836f699 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -91,7 +91,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi.._stub_entry\|efi\(32\)\?_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|_edata\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 32892e81bf61b..aa7e9848cd8f0 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -46,6 +46,9 @@ SECTIONS _data = . ; *(.data) *(.data.*) + + /* Add 4 bytes of extra space for a CRC-32 checksum */ + . = ALIGN(. + 4, 0x20); _edata = . ; } . = ALIGN(L1_CACHE_BYTES); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6dddf469ca60d..b43b551308558 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -232,7 +232,7 @@ sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */ hdr: .byte setup_sects - 1 root_flags: .word ROOT_RDONLY -syssize: .long 0 /* Filled in by build.c */ +syssize: .long ZO__edata / 16 ram_size: .word 0 /* Obsolete */ vid_mode: .word SVGA_MODE root_dev: .word 0 /* Default to major/minor 0/0 */ diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 745d64b6d9303..e792c6c5a634d 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -52,6 +52,7 @@ u8 buf[SETUP_SECT_MAX*512]; static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; +static unsigned long _edata; static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -308,6 +309,7 @@ static void parse_zoffset(char *fname) while (p && *p) { PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); + PARSE_ZOFS(p, _edata); PARSE_ZOFS(p, _end); p = strchr(p, '\n'); @@ -320,7 +322,6 @@ int main(int argc, char ** argv) { unsigned int i, sz, setup_sectors; int c; - u32 sys_size; struct stat sb; FILE *file, *dest; int fd; @@ -368,24 +369,14 @@ int main(int argc, char ** argv) die("Unable to open `%s': %m", argv[2]); if (fstat(fd, &sb)) die("Unable to stat `%s': %m", argv[2]); - sz = sb.st_size; + if (_edata != sb.st_size) + die("Unexpected file size `%s': %u != %u", argv[2], _edata, + sb.st_size); + sz = _edata - 4; kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); if (kernel == MAP_FAILED) die("Unable to mmap '%s': %m", argv[2]); - /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ - sys_size = (sz + 15 + 4) / 16; -#ifdef CONFIG_EFI_STUB - /* - * COFF requires minimum 32-byte alignment of sections, and - * adding a signature is problematic without that alignment. - */ - sys_size = (sys_size + 1) & ~1; -#endif - - /* Patch the setup code with the appropriate size parameters */ - put_unaligned_le32(sys_size, &buf[0x1f4]); - - update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); + update_pecoff_text(setup_sectors * 512, i + _edata); crc = partial_crc32(buf, i, crc); @@ -397,13 +388,6 @@ int main(int argc, char ** argv) if (fwrite(kernel, 1, sz, dest) != sz) die("Writing kernel failed"); - /* Add padding leaving 4 bytes for the checksum */ - while (sz++ < (sys_size*16) - 4) { - crc = partial_crc32_one('\0', crc); - if (fwrite("\0", 1, 1, dest) != 1) - die("Writing padding failed"); - } - /* Write the CRC */ put_unaligned_le32(crc, buf); if (fwrite(buf, 1, 4, dest) != 4) -- GitLab From d03399c3e2526e40ffa69c4c06be71e1a09e2361 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:19 +0200 Subject: [PATCH 1771/2290] x86/boot: Construct PE/COFF .text section from assembler [ Commit efa089e63b56bdc5eca754b995cb039dd7a5457e upstream ] Now that the size of the setup block is visible to the assembler, it is possible to populate the PE/COFF header fields from the asm code directly, instead of poking the values into the binary using the build tool. This will make it easier to reorganize the section layout without having to tweak the build tool in lockstep. This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-15-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 22 ++++++----------- arch/x86/boot/tools/build.c | 47 ------------------------------------- 2 files changed, 7 insertions(+), 62 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b43b551308558..f8f609fb87095 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -74,14 +74,12 @@ optional_header: .byte 0x02 # MajorLinkerVersion .byte 0x14 # MinorLinkerVersion - # Filled in by build.c - .long 0 # SizeOfCode + .long setup_size + ZO__end - 0x200 # SizeOfCode .long 0 # SizeOfInitializedData .long 0 # SizeOfUninitializedData - # Filled in by build.c - .long 0x0000 # AddressOfEntryPoint + .long setup_size + ZO_efi_pe_entry # AddressOfEntryPoint .long 0x0200 # BaseOfCode #ifdef CONFIG_X86_32 @@ -104,10 +102,7 @@ extra_header_fields: .word 0 # MinorSubsystemVersion .long 0 # Win32VersionValue - # - # The size of the bzImage is written in tools/build.c - # - .long 0 # SizeOfImage + .long setup_size + ZO__end # SizeOfImage .long 0x200 # SizeOfHeaders .long 0 # CheckSum @@ -198,18 +193,15 @@ section_table: IMAGE_SCN_MEM_DISCARDABLE # Characteristics #endif - # - # The offset & size fields are filled in by build.c. - # .ascii ".text" .byte 0 .byte 0 .byte 0 - .long 0 - .long 0x0 # startup_{32,64} - .long 0 # Size of initialized data + .long ZO__end + .long setup_size + .long ZO__edata # Size of initialized data # on disk - .long 0x0 # startup_{32,64} + .long setup_size .long 0 # PointerToRelocations .long 0 # PointerToLineNumbers .word 0 # NumberOfRelocations diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index e792c6c5a634d..9712f27e32c10 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -50,10 +50,8 @@ u8 buf[SETUP_SECT_MAX*512]; #define PECOFF_RELOC_RESERVE 0x20 #define PECOFF_COMPAT_RESERVE 0x20 -static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; static unsigned long _edata; -static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -216,32 +214,6 @@ static void update_pecoff_setup_and_reloc(unsigned int size) #endif } -static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) -{ - unsigned int pe_header; - unsigned int text_sz = file_sz - text_start; - unsigned int bss_sz = _end - text_sz; - - pe_header = get_unaligned_le32(&buf[0x3c]); - - /* - * Size of code: Subtract the size of the first sector (512 bytes) - * which includes the header. - */ - put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]); - - /* Size of image */ - put_unaligned_le32(file_sz + bss_sz, &buf[pe_header + 0x50]); - - /* - * Address of entry point for PE/COFF executable - */ - put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]); - - update_pecoff_section_header_fields(".text", text_start, text_sz + bss_sz, - text_sz, text_start); -} - static int reserve_pecoff_reloc_section(int c) { /* Reserve 0x20 bytes for .reloc section */ @@ -249,22 +221,9 @@ static int reserve_pecoff_reloc_section(int c) return PECOFF_RELOC_RESERVE; } -static void efi_stub_defaults(void) -{ - /* Defaults for old kernel */ -#ifdef CONFIG_X86_32 - efi_pe_entry = 0x10; -#else - efi_pe_entry = 0x210; -#endif -} - #else static inline void update_pecoff_setup_and_reloc(unsigned int size) {} -static inline void update_pecoff_text(unsigned int text_start, - unsigned int file_sz) {} -static inline void efi_stub_defaults(void) {} static inline int reserve_pecoff_reloc_section(int c) { @@ -307,10 +266,8 @@ static void parse_zoffset(char *fname) p = (char *)buf; while (p && *p) { - PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, _edata); - PARSE_ZOFS(p, _end); p = strchr(p, '\n'); while (p && (*p == '\r' || *p == '\n')) @@ -328,8 +285,6 @@ int main(int argc, char ** argv) void *kernel; u32 crc = 0xffffffffUL; - efi_stub_defaults(); - if (argc != 5) usage(); parse_zoffset(argv[3]); @@ -376,8 +331,6 @@ int main(int argc, char ** argv) kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); if (kernel == MAP_FAILED) die("Unable to mmap '%s': %m", argv[2]); - update_pecoff_text(setup_sectors * 512, i + _edata); - crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) -- GitLab From 43b1920588fa7852ced850c8351f10bdde393140 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:20 +0200 Subject: [PATCH 1772/2290] x86/boot: Drop PE/COFF .reloc section [ Commit fa5750521e0a4efbc1af05223da9c4bbd6c21c83 upstream ] Ancient buggy EFI loaders may have required a .reloc section to be present at some point in time, but this has not been true for a long time so the .reloc section can just be dropped. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-16-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 20 -------------------- arch/x86/boot/setup.ld | 4 ++-- arch/x86/boot/tools/build.c | 34 +++++----------------------------- 3 files changed, 7 insertions(+), 51 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f8f609fb87095..a01e55ce506fe 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -154,26 +154,6 @@ section_table: IMAGE_SCN_MEM_READ | \ IMAGE_SCN_MEM_EXECUTE # Characteristics - # - # The EFI application loader requires a relocation section - # because EFI applications must be relocatable. The .reloc - # offset & size fields are filled in by build.c. - # - .ascii ".reloc" - .byte 0 - .byte 0 - .long 0 - .long 0 - .long 0 # SizeOfRawData - .long 0 # PointerToRawData - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers - .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ - IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_DISCARDABLE # Characteristics - #ifdef CONFIG_EFI_MIXED # # The offset & size fields are filled in by build.c. diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 9bd5c1ada599d..6d389499565c2 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -40,8 +40,8 @@ SECTIONS setup_sig = .; LONG(0x5a5aaa55) - /* Reserve some extra space for the reloc and compat sections */ - setup_size = ALIGN(ABSOLUTE(.) + 64, 512); + /* Reserve some extra space for the compat section */ + setup_size = ALIGN(ABSOLUTE(.) + 32, 512); setup_sects = ABSOLUTE(setup_size / 512); } diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 9712f27e32c10..faccff9743a3a 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -47,7 +47,6 @@ typedef unsigned int u32; /* This must be large enough to hold the entire setup */ u8 buf[SETUP_SECT_MAX*512]; -#define PECOFF_RELOC_RESERVE 0x20 #define PECOFF_COMPAT_RESERVE 0x20 static unsigned long efi32_pe_entry; @@ -180,24 +179,13 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz update_pecoff_section_header_fields(section_name, offset, size, size, offset); } -static void update_pecoff_setup_and_reloc(unsigned int size) +static void update_pecoff_setup(unsigned int size) { u32 setup_offset = 0x200; - u32 reloc_offset = size - PECOFF_RELOC_RESERVE - PECOFF_COMPAT_RESERVE; -#ifdef CONFIG_EFI_MIXED - u32 compat_offset = reloc_offset + PECOFF_RELOC_RESERVE; -#endif - u32 setup_size = reloc_offset - setup_offset; + u32 compat_offset = size - PECOFF_COMPAT_RESERVE; + u32 setup_size = compat_offset - setup_offset; update_pecoff_section_header(".setup", setup_offset, setup_size); - update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); - - /* - * Modify .reloc section contents with a single entry. The - * relocation is applied to offset 10 of the relocation section. - */ - put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); - put_unaligned_le32(10, &buf[reloc_offset + 4]); #ifdef CONFIG_EFI_MIXED update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE); @@ -214,21 +202,10 @@ static void update_pecoff_setup_and_reloc(unsigned int size) #endif } -static int reserve_pecoff_reloc_section(int c) -{ - /* Reserve 0x20 bytes for .reloc section */ - memset(buf+c, 0, PECOFF_RELOC_RESERVE); - return PECOFF_RELOC_RESERVE; -} - #else -static inline void update_pecoff_setup_and_reloc(unsigned int size) {} +static inline void update_pecoff_setup(unsigned int size) {} -static inline int reserve_pecoff_reloc_section(int c) -{ - return 0; -} #endif /* CONFIG_EFI_STUB */ static int reserve_pecoff_compat_section(int c) @@ -307,7 +284,6 @@ int main(int argc, char ** argv) fclose(file); c += reserve_pecoff_compat_section(c); - c += reserve_pecoff_reloc_section(c); /* Pad unused space with zeros */ setup_sectors = (c + 511) / 512; @@ -316,7 +292,7 @@ int main(int argc, char ** argv) i = setup_sectors*512; memset(buf+c, 0, i-c); - update_pecoff_setup_and_reloc(i); + update_pecoff_setup(i); /* Open and stat the kernel file */ fd = open(argv[2], O_RDONLY); -- GitLab From 581f5d5e02effba6e6be2c2d9df6dc62ad839760 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:21 +0200 Subject: [PATCH 1773/2290] x86/boot: Split off PE/COFF .data section [ Commit 34951f3c28bdf6481d949a20413b2ce7693687b2 upstream ] Describe the code and data of the decompressor binary using separate .text and .data PE/COFF sections, so that we will be able to map them using restricted permissions once we increase the section and file alignment sufficiently. This avoids the need for memory mappings that are writable and executable at the same time, which is something that is best avoided for security reasons. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-17-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/header.S | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b92e00836f699..3c1b3520361c7 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -91,7 +91,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi.._stub_entry\|efi\(32\)\?_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|_edata\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi.._stub_entry\|efi\(32\)\?_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|_e\?data\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index a01e55ce506fe..178252cdccf59 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -74,9 +74,9 @@ optional_header: .byte 0x02 # MajorLinkerVersion .byte 0x14 # MinorLinkerVersion - .long setup_size + ZO__end - 0x200 # SizeOfCode + .long ZO__data # SizeOfCode - .long 0 # SizeOfInitializedData + .long ZO__end - ZO__data # SizeOfInitializedData .long 0 # SizeOfUninitializedData .long setup_size + ZO_efi_pe_entry # AddressOfEntryPoint @@ -177,9 +177,9 @@ section_table: .byte 0 .byte 0 .byte 0 - .long ZO__end + .long ZO__data .long setup_size - .long ZO__edata # Size of initialized data + .long ZO__data # Size of initialized data # on disk .long setup_size .long 0 # PointerToRelocations @@ -190,6 +190,17 @@ section_table: IMAGE_SCN_MEM_READ | \ IMAGE_SCN_MEM_EXECUTE # Characteristics + .ascii ".data\0\0\0" + .long ZO__end - ZO__data # VirtualSize + .long setup_size + ZO__data # VirtualAddress + .long ZO__edata - ZO__data # SizeOfRawData + .long setup_size + ZO__data # PointerToRawData + + .long 0, 0, 0 + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE # Characteristics + .set section_count, (. - section_table) / 40 #endif /* CONFIG_EFI_STUB */ -- GitLab From c4421279b6c278efe129bde7abc64af59ea2dfbd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:22 +0200 Subject: [PATCH 1774/2290] x86/boot: Increase section and file alignment to 4k/512 [ Commit 3e3eabe26dc88692d34cf76ca0e0dd331481cc15 upstream ] Align x86 with other EFI architectures, and increase the section alignment to the EFI page size (4k), so that firmware is able to honour the section permission attributes and map code read-only and data non-executable. There are a number of requirements that have to be taken into account: - the sign tools get cranky when there are gaps between sections in the file view of the image - the virtual offset of each section must be aligned to the image's section alignment - the file offset *and size* of each section must be aligned to the image's file alignment - the image size must be aligned to the section alignment - each section's virtual offset must be greater than or equal to the size of the headers. In order to meet all these requirements, while avoiding the need for lots of padding to accommodate the .compat section, the latter is placed at an arbitrary offset towards the end of the image, but aligned to the minimum file alignment (512 bytes). The space before the .text section is therefore distributed between the PE header, the .setup section and the .compat section, leaving no gaps in the file coverage, making the signing tools happy. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-18-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/vmlinux.lds.S | 4 +- arch/x86/boot/header.S | 75 ++++++++++++--------- arch/x86/boot/setup.ld | 7 +- arch/x86/boot/tools/build.c | 90 +------------------------- 4 files changed, 51 insertions(+), 125 deletions(-) diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index aa7e9848cd8f0..bcf0e4e4c98e8 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -42,13 +42,13 @@ SECTIONS *(.rodata.*) _erodata = . ; } - .data : { + .data : ALIGN(0x1000) { _data = . ; *(.data) *(.data.*) /* Add 4 bytes of extra space for a CRC-32 checksum */ - . = ALIGN(. + 4, 0x20); + . = ALIGN(. + 4, 0x200); _edata = . ; } . = ALIGN(L1_CACHE_BYTES); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 178252cdccf59..6264bbf54fbca 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -36,6 +36,9 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define ROOT_RDONLY 1 #endif + .set salign, 0x1000 + .set falign, 0x200 + .code16 .section ".bstext", "ax" #ifdef CONFIG_EFI_STUB @@ -81,7 +84,7 @@ optional_header: .long setup_size + ZO_efi_pe_entry # AddressOfEntryPoint - .long 0x0200 # BaseOfCode + .long setup_size # BaseOfCode #ifdef CONFIG_X86_32 .long 0 # data #endif @@ -92,8 +95,8 @@ extra_header_fields: #else .quad 0 # ImageBase #endif - .long 0x20 # SectionAlignment - .long 0x20 # FileAlignment + .long salign # SectionAlignment + .long falign # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion .word LINUX_EFISTUB_MAJOR_VERSION # MajorImageVersion @@ -102,9 +105,10 @@ extra_header_fields: .word 0 # MinorSubsystemVersion .long 0 # Win32VersionValue - .long setup_size + ZO__end # SizeOfImage + .long setup_size + ZO__end + pecompat_vsize + # SizeOfImage - .long 0x200 # SizeOfHeaders + .long salign # SizeOfHeaders .long 0 # CheckSum .word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application) #ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES @@ -135,44 +139,51 @@ extra_header_fields: # Section table section_table: - # - # The offset & size fields are filled in by build.c. - # .ascii ".setup" .byte 0 .byte 0 - .long 0 - .long 0x0 # startup_{32,64} - .long 0 # Size of initialized data - # on disk - .long 0x0 # startup_{32,64} - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers - .long IMAGE_SCN_CNT_CODE | \ + .long setup_size - salign # VirtualSize + .long salign # VirtualAddress + .long pecompat_fstart - salign # SizeOfRawData + .long salign # PointerToRawData + + .long 0, 0, 0 + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE # Characteristics + IMAGE_SCN_MEM_DISCARDABLE # Characteristics #ifdef CONFIG_EFI_MIXED - # - # The offset & size fields are filled in by build.c. - # .asciz ".compat" - .long 0 - .long 0x0 - .long 0 # Size of initialized data - # on disk - .long 0x0 - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers + + .long 8 # VirtualSize + .long setup_size + ZO__end # VirtualAddress + .long pecompat_fsize # SizeOfRawData + .long pecompat_fstart # PointerToRawData + + .long 0, 0, 0 .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ IMAGE_SCN_MEM_DISCARDABLE # Characteristics -#endif + /* + * Put the IA-32 machine type and the associated entry point address in + * the .compat section, so loaders can figure out which other execution + * modes this image supports. + */ + .pushsection ".pecompat", "a", @progbits + .balign falign + .set pecompat_vsize, salign + .globl pecompat_fstart +pecompat_fstart: + .byte 0x1 # Version + .byte 8 # Size + .word IMAGE_FILE_MACHINE_I386 # PE machine type + .long setup_size + ZO_efi32_pe_entry # Entrypoint + .popsection +#else + .set pecompat_vsize, 0 + .set pecompat_fstart, setup_size +#endif .ascii ".text" .byte 0 .byte 0 diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 6d389499565c2..83bb7efad8ae7 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -36,16 +36,17 @@ SECTIONS . = ALIGN(16); .data : { *(.data*) } + .pecompat : { *(.pecompat) } + PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); + .signature : { setup_sig = .; LONG(0x5a5aaa55) - /* Reserve some extra space for the compat section */ - setup_size = ALIGN(ABSOLUTE(.) + 32, 512); + setup_size = ALIGN(ABSOLUTE(.), 4096); setup_sects = ABSOLUTE(setup_size / 512); } - . = ALIGN(16); .bss : { diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index faccff9743a3a..10311d77c67f8 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -47,9 +47,6 @@ typedef unsigned int u32; /* This must be large enough to hold the entire setup */ u8 buf[SETUP_SECT_MAX*512]; -#define PECOFF_COMPAT_RESERVE 0x20 - -static unsigned long efi32_pe_entry; static unsigned long _edata; /*----------------------------------------------------------------------*/ @@ -136,85 +133,6 @@ static void usage(void) die("Usage: build setup system zoffset.h image"); } -#ifdef CONFIG_EFI_STUB - -static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) -{ - unsigned int pe_header; - unsigned short num_sections; - u8 *section; - - pe_header = get_unaligned_le32(&buf[0x3c]); - num_sections = get_unaligned_le16(&buf[pe_header + 6]); - -#ifdef CONFIG_X86_32 - section = &buf[pe_header + 0xa8]; -#else - section = &buf[pe_header + 0xb8]; -#endif - - while (num_sections > 0) { - if (strncmp((char*)section, section_name, 8) == 0) { - /* section header size field */ - put_unaligned_le32(size, section + 0x8); - - /* section header vma field */ - put_unaligned_le32(vma, section + 0xc); - - /* section header 'size of initialised data' field */ - put_unaligned_le32(datasz, section + 0x10); - - /* section header 'file offset' field */ - put_unaligned_le32(offset, section + 0x14); - - break; - } - section += 0x28; - num_sections--; - } -} - -static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) -{ - update_pecoff_section_header_fields(section_name, offset, size, size, offset); -} - -static void update_pecoff_setup(unsigned int size) -{ - u32 setup_offset = 0x200; - u32 compat_offset = size - PECOFF_COMPAT_RESERVE; - u32 setup_size = compat_offset - setup_offset; - - update_pecoff_section_header(".setup", setup_offset, setup_size); - -#ifdef CONFIG_EFI_MIXED - update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE); - - /* - * Put the IA-32 machine type (0x14c) and the associated entry point - * address in the .compat section, so loaders can figure out which other - * execution modes this image supports. - */ - buf[compat_offset] = 0x1; - buf[compat_offset + 1] = 0x8; - put_unaligned_le16(0x14c, &buf[compat_offset + 2]); - put_unaligned_le32(efi32_pe_entry + size, &buf[compat_offset + 4]); -#endif -} - -#else - -static inline void update_pecoff_setup(unsigned int size) {} - -#endif /* CONFIG_EFI_STUB */ - -static int reserve_pecoff_compat_section(int c) -{ - /* Reserve 0x20 bytes for .compat section */ - memset(buf+c, 0, PECOFF_COMPAT_RESERVE); - return PECOFF_COMPAT_RESERVE; -} - /* * Parse zoffset.h and find the entry points. We could just #include zoffset.h * but that would mean tools/build would have to be rebuilt every time. It's @@ -243,7 +161,6 @@ static void parse_zoffset(char *fname) p = (char *)buf; while (p && *p) { - PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, _edata); p = strchr(p, '\n'); @@ -283,17 +200,14 @@ int main(int argc, char ** argv) die("Boot block hasn't got boot flag (0xAA55)"); fclose(file); - c += reserve_pecoff_compat_section(c); - /* Pad unused space with zeros */ - setup_sectors = (c + 511) / 512; + setup_sectors = (c + 4095) / 4096; + setup_sectors *= 8; if (setup_sectors < SETUP_SECT_MIN) setup_sectors = SETUP_SECT_MIN; i = setup_sectors*512; memset(buf+c, 0, i-c); - update_pecoff_setup(i); - /* Open and stat the kernel file */ fd = open(argv[2], O_RDONLY); if (fd < 0) -- GitLab From d327e961573fc335af0ae8a160302205327e1f4e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:23 +0200 Subject: [PATCH 1775/2290] x86/efistub: Use 1:1 file:memory mapping for PE/COFF .compat section [ Commit 1ad55cecf22f05f1c884adf63cc09d3c3e609ebf upstream ] The .compat section is a dummy PE section that contains the address of the 32-bit entrypoint of the 64-bit kernel image if it is bootable from 32-bit firmware (i.e., CONFIG_EFI_MIXED=y) This section is only 8 bytes in size and is only referenced from the loader, and so it is placed at the end of the memory view of the image, to avoid the need for padding it to 4k, which is required for sections appearing in the middle of the image. Unfortunately, this violates the PE/COFF spec, and even if most EFI loaders will work correctly (including the Tianocore reference implementation), PE loaders do exist that reject such images, on the basis that both the file and memory views of the file contents should be described by the section headers in a monotonically increasing manner without leaving any gaps. So reorganize the sections to avoid this issue. This results in a slight padding overhead (< 4k) which can be avoided if desired by disabling CONFIG_EFI_MIXED (which is only needed in rare cases these days) Fixes: 3e3eabe26dc8 ("x86/boot: Increase section and file alignment to 4k/512") Reported-by: Mike Beaton Link: https://lkml.kernel.org/r/CAHzAAWQ6srV6LVNdmfbJhOwhBw5ZzxxZZ07aHt9oKkfYAdvuQQ%40mail.gmail.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 14 ++++++-------- arch/x86/boot/setup.ld | 6 +++--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6264bbf54fbca..7593339b529a2 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -105,8 +105,7 @@ extra_header_fields: .word 0 # MinorSubsystemVersion .long 0 # Win32VersionValue - .long setup_size + ZO__end + pecompat_vsize - # SizeOfImage + .long setup_size + ZO__end # SizeOfImage .long salign # SizeOfHeaders .long 0 # CheckSum @@ -142,7 +141,7 @@ section_table: .ascii ".setup" .byte 0 .byte 0 - .long setup_size - salign # VirtualSize + .long pecompat_fstart - salign # VirtualSize .long salign # VirtualAddress .long pecompat_fstart - salign # SizeOfRawData .long salign # PointerToRawData @@ -155,8 +154,8 @@ section_table: #ifdef CONFIG_EFI_MIXED .asciz ".compat" - .long 8 # VirtualSize - .long setup_size + ZO__end # VirtualAddress + .long pecompat_fsize # VirtualSize + .long pecompat_fstart # VirtualAddress .long pecompat_fsize # SizeOfRawData .long pecompat_fstart # PointerToRawData @@ -171,17 +170,16 @@ section_table: * modes this image supports. */ .pushsection ".pecompat", "a", @progbits - .balign falign - .set pecompat_vsize, salign + .balign salign .globl pecompat_fstart pecompat_fstart: .byte 0x1 # Version .byte 8 # Size .word IMAGE_FILE_MACHINE_I386 # PE machine type .long setup_size + ZO_efi32_pe_entry # Entrypoint + .byte 0x0 # Sentinel .popsection #else - .set pecompat_vsize, 0 .set pecompat_fstart, setup_size #endif .ascii ".text" diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 83bb7efad8ae7..3a2d1360abb01 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -24,6 +24,9 @@ SECTIONS .text : { *(.text .text.*) } .text32 : { *(.text32) } + .pecompat : { *(.pecompat) } + PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); + . = ALIGN(16); .rodata : { *(.rodata*) } @@ -36,9 +39,6 @@ SECTIONS . = ALIGN(16); .data : { *(.data*) } - .pecompat : { *(.pecompat) } - PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); - .signature : { setup_sig = .; LONG(0x5a5aaa55) -- GitLab From 20dc656b06570a29274cbc669fa9356ccae24f91 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 19 Apr 2024 10:11:24 +0200 Subject: [PATCH 1776/2290] x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE macros [ Commit 82328227db8f0b9b5f77bb5afcd47e59d0e4d08f upstream ] Other architectures and the common mm/ use P*D_MASK, and P*D_SIZE. Remove the duplicated P*D_PAGE_MASK and P*D_PAGE_SIZE which are only used in x86/*. Signed-off-by: Pasha Tatashin Signed-off-by: Borislav Petkov Reviewed-by: Anshuman Khandual Acked-by: Mike Rapoport Link: https://lore.kernel.org/r/20220516185202.604654-1-tatashin@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/page_types.h | 12 +++--------- arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/mm/mem_encrypt_boot.S | 4 ++-- arch/x86/mm/mem_encrypt_identity.c | 18 +++++++++--------- arch/x86/mm/pat/set_memory.c | 6 +++--- arch/x86/mm/pti.c | 2 +- 7 files changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index a506a411474d4..86bd4311daf8a 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -11,20 +11,14 @@ #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) - -#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) - #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) -/* Cast *PAGE_MASK to a signed type so that it is sign-extended if +/* Cast P*D_MASK to a signed type so that it is sign-extended if virtual addresses are 32-bits but physical addresses are larger (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK) #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 19a0207e529fe..56a917df410d3 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -504,7 +504,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) } a = aper + iommu_size; - iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; + iommu_size -= round_up(a, PMD_SIZE) - a; if (iommu_size < 64*1024*1024) { pr_warn("PCI-DMA: Warning: Small IOMMU %luMB." diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 84adf12a76d3c..34580e1a4cdb9 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -203,7 +203,7 @@ unsigned long __head __startup_64(unsigned long physaddr, load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); /* Is the address not 2M aligned? */ - if (load_delta & ~PMD_PAGE_MASK) + if (load_delta & ~PMD_MASK) for (;;); /* Include the SME encryption mask in the fixup value */ diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 9de3d900bc927..e25288ee33c2d 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -26,7 +26,7 @@ SYM_FUNC_START(sme_encrypt_execute) * RCX - virtual address of the encryption workarea, including: * - stack page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE) - * - intermediate copy buffer (PMD_PAGE_SIZE) + * - intermediate copy buffer (PMD_SIZE) * R8 - physical address of the pagetables to use for encryption */ @@ -123,7 +123,7 @@ SYM_FUNC_START(__enc_copy) wbinvd /* Invalidate any cache entries */ /* Copy/encrypt up to 2MB at a time */ - movq $PMD_PAGE_SIZE, %r12 + movq $PMD_SIZE, %r12 1: cmpq %r12, %r9 jnb 2f diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 06ccbd36e8dcd..f7dfc3f89921f 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -93,7 +93,7 @@ struct sme_populate_pgd_data { * section is 2MB aligned to allow for simple pagetable setup using only * PMD entries (see vmlinux.lds.S). */ -static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); +static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; @@ -197,8 +197,8 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); - ppd->vaddr += PMD_PAGE_SIZE; - ppd->paddr += PMD_PAGE_SIZE; + ppd->vaddr += PMD_SIZE; + ppd->paddr += PMD_SIZE; } } @@ -224,11 +224,11 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, vaddr_end = ppd->vaddr_end; /* If start is not 2MB aligned, create PTE entries */ - ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE); __sme_map_range_pte(ppd); /* Create PMD entries */ - ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + ppd->vaddr_end = vaddr_end & PMD_MASK; __sme_map_range_pmd(ppd); /* If end is not 2MB aligned, create PTE entries */ @@ -325,7 +325,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) /* Physical addresses gives us the identity mapped virtual addresses */ kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); + kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start; initrd_start = 0; @@ -355,12 +355,12 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * executable encryption area size: * stack page (PAGE_SIZE) * encryption routine page (PAGE_SIZE) - * intermediate copy buffer (PMD_PAGE_SIZE) + * intermediate copy buffer (PMD_SIZE) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ execute_start = workarea_start; - execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; + execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start; /* @@ -383,7 +383,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * before it is mapped. */ workarea_len = execute_len + pgtable_area_len; - workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); + workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE); /* * Set the address to the start of where newly created pagetable diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 5f0ce77a259d8..68d4f328f1696 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -747,11 +747,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) switch (level) { case PG_LEVEL_1G: phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PUD_PAGE_MASK; + offset = virt_addr & ~PUD_MASK; break; case PG_LEVEL_2M: phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PMD_PAGE_MASK; + offset = virt_addr & ~PMD_MASK; break; default: phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; @@ -1041,7 +1041,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, case PG_LEVEL_1G: ref_prot = pud_pgprot(*(pud_t *)kpte); ref_pfn = pud_pfn(*(pud_t *)kpte); - pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + pfninc = PMD_SIZE >> PAGE_SHIFT; lpaddr = address & PUD_MASK; lpinc = PMD_SIZE; /* diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ffe3b3a087fea..78414c6d1b5ed 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -592,7 +592,7 @@ static void pti_set_kernel_image_nonglobal(void) * of the image. */ unsigned long start = PFN_ALIGN(_text); - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE); /* * This clears _PAGE_GLOBAL from the entire kernel image. -- GitLab From bbcd0534a3eb7fcedd3e7e56eaf6dc837d252776 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Fri, 19 Apr 2024 10:11:25 +0200 Subject: [PATCH 1777/2290] x86/head/64: Add missing __head annotation to startup_64_load_idt() [ Commit 7f6874eddd81cb2ed784642a7a4321671e158ffe upstream ] This function is currently only used in the head code and is only called from startup_64_setup_env(). Although it would be inlined by the compiler, it would be better to mark it as __head too in case it doesn't. Signed-off-by: Hou Wenlong Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/efcc5b5e18af880e415d884e072bf651c1fa7c34.1689130310.git.houwenlong.hwl@antgroup.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 34580e1a4cdb9..78f3f67565384 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -588,7 +588,7 @@ static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler) } /* This runs while still in the direct mapping */ -static void startup_64_load_idt(unsigned long physbase) +static void __head startup_64_load_idt(unsigned long physbase) { struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase); gate_desc *idt = fixup_pointer(bringup_idt_table, physbase); -- GitLab From e6489cc45fa711e4522c1129e9a43ed44b7aaa96 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Fri, 19 Apr 2024 10:11:26 +0200 Subject: [PATCH 1778/2290] x86/head/64: Move the __head definition to [ Commit d2a285d65bfde3218fd0c3b88794d0135ced680b upstream ] Move the __head section definition to a header to widen its use. An upcoming patch will mark the code as __head in mem_encrypt_identity.c too. Signed-off-by: Hou Wenlong Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/0583f57977be184689c373fe540cbd7d85ca2047.1697525407.git.houwenlong.hwl@antgroup.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/init.h | 2 ++ arch/x86/kernel/head64.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 5f1d3c421f686..cc9ccf61b6bd1 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_INIT_H #define _ASM_X86_INIT_H +#define __head __section(".head.text") + struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ void *context; /* context for alloc_pgt_page */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 78f3f67565384..4fae511b2e2b2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * Manage page tables very early on. @@ -84,8 +85,6 @@ static struct desc_ptr startup_gdt_descr = { .address = 0, }; -#define __head __section(".head.text") - static void __head *fixup_pointer(void *ptr, unsigned long physaddr) { return ptr - (void *)_text + (void *)physaddr; -- GitLab From d6f5bc5ff04ff2e282ec5be05a0cde2091e4e222 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:27 +0200 Subject: [PATCH 1779/2290] x86/sme: Move early SME kernel encryption handling into .head.text [ Commit 48204aba801f1b512b3abed10b8e1a63e03f3dd1 upstream ] The .head.text section is the initial primary entrypoint of the core kernel, and is entered with the CPU executing from a 1:1 mapping of memory. Such code must never access global variables using absolute references, as these are based on the kernel virtual mapping which is not active yet at this point. Given that the SME startup code is also called from this early execution context, move it into .head.text as well. This will allow more thorough build time checks in the future to ensure that early startup code only uses RIP-relative references to global variables. Also replace some occurrences of __pa_symbol() [which relies on the compiler generating an absolute reference, which is not guaranteed] and an open coded RIP-relative access with RIP_REL_REF(). Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Lendacky Link: https://lore.kernel.org/r/20240227151907.387873-18-ardb+git@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mem_encrypt.h | 8 +++--- arch/x86/mm/mem_encrypt_identity.c | 42 ++++++++++++------------------ 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 41d06822bc8cd..853f423b1d138 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -46,8 +46,8 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); void __init sev_setup_arch(void); -void __init sme_encrypt_kernel(struct boot_params *bp); -void __init sme_enable(struct boot_params *bp); +void sme_encrypt_kernel(struct boot_params *bp); +void sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); @@ -80,8 +80,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } static inline void __init sev_setup_arch(void) { } -static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } -static inline void __init sme_enable(struct boot_params *bp) { } +static inline void sme_encrypt_kernel(struct boot_params *bp) { } +static inline void sme_enable(struct boot_params *bp) { } static inline void sev_es_init_vc_handling(void) { } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index f7dfc3f89921f..f176098848749 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -98,7 +99,7 @@ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; -static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; @@ -113,7 +114,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) memset(pgd_p, 0, pgd_size); } -static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) +static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd; p4d_t *p4d; @@ -150,7 +151,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) return pud; } -static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -166,7 +167,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); } -static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -192,7 +193,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); } -static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); @@ -202,7 +203,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) } } -static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd(ppd); @@ -212,7 +213,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) } } -static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, +static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, pmdval_t pmd_flags, pteval_t pte_flags) { unsigned long vaddr_end; @@ -236,22 +237,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, __sme_map_range_pte(ppd); } -static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); } -static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); } -static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); } -static unsigned long __init sme_pgtable_calc(unsigned long len) +static unsigned long __head sme_pgtable_calc(unsigned long len) { unsigned long entries = 0, tables = 0; @@ -288,7 +289,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return entries + tables; } -void __init sme_encrypt_kernel(struct boot_params *bp) +void __head sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; @@ -323,9 +324,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * memory from being cached. */ - /* Physical addresses gives us the identity mapped virtual addresses */ - kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); + kernel_start = (unsigned long)RIP_REL_REF(_text); + kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start; initrd_start = 0; @@ -342,14 +342,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp) } #endif - /* - * We're running identity mapped, so we must obtain the address to the - * SME encryption workarea using rip-relative addressing. - */ - asm ("lea sme_workarea(%%rip), %0" - : "=r" (workarea_start) - : "p" (sme_workarea)); - /* * Calculate required number of workarea bytes needed: * executable encryption area size: @@ -359,7 +351,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ - execute_start = workarea_start; + execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea); execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start; @@ -502,7 +494,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) native_write_cr3(__native_read_cr3()); } -void __init sme_enable(struct boot_params *bp) +void __head sme_enable(struct boot_params *bp) { const char *cmdline_ptr, *cmdline_arg, *cmdline_on; unsigned int eax, ebx, ecx, edx; -- GitLab From f56faf87c1e9008e5d5a58c96b89db20934c5473 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:28 +0200 Subject: [PATCH 1780/2290] x86/sev: Move early startup code into .head.text section [ Commit 428080c9b19bfda37c478cd626dbd3851db1aff9 upstream ] In preparation for implementing rigorous build time checks to enforce that only code that can support it will be called from the early 1:1 mapping of memory, move SEV init code that is called in this manner to the .head.text section. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Lendacky Link: https://lore.kernel.org/r/20240227151907.387873-19-ardb+git@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/sev.c | 3 +++ arch/x86/include/asm/sev.h | 10 +++++----- arch/x86/kernel/sev-shared.c | 23 ++++++++++------------- arch/x86/kernel/sev.c | 11 ++++++----- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index d07e665bb265b..3c5d5c97f8f73 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -118,6 +118,9 @@ static bool fault_in_kernel_space(unsigned long address) #define __init #define __pa(x) ((unsigned long)(x)) +#undef __head +#define __head + #define __BOOT_COMPRESSED /* Basic instruction decoding support needed */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index c57dd21155bd7..bcac2e53d50bb 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -192,15 +192,15 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) struct snp_guest_request_ioctl; void setup_ghcb(void); -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned long npages); -void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned long npages); +void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned long npages); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void __init __noreturn snp_abort(void); +void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); u64 snp_get_unsupported_features(u64 status); diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 271e70d5748ef..3fe76bf17d95e 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -86,7 +86,8 @@ static bool __init sev_es_check_cpu_features(void) return true; } -static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) +static void __head __noreturn +sev_es_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ; @@ -323,13 +324,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid */ static const struct snp_cpuid_table *snp_cpuid_get_table(void) { - void *ptr; - - asm ("lea cpuid_table_copy(%%rip), %0" - : "=r" (ptr) - : "p" (&cpuid_table_copy)); - - return ptr; + return &RIP_REL_REF(cpuid_table_copy); } /* @@ -388,7 +383,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) return xsave_size; } -static bool +static bool __head snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -525,7 +520,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +static int __head +snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -567,7 +563,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code. */ -void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); @@ -1013,7 +1009,8 @@ struct cc_setup_data { * Search for a Confidential Computing blob passed in as a setup_data entry * via the Linux Boot Protocol. */ -static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) +static __head +struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) { struct cc_setup_data *sd = NULL; struct setup_data *hdr; @@ -1040,7 +1037,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) * mapping needs to be updated in sync with all the changes to virtual memory * layout and related mapping facilities throughout the boot process. */ -static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) { const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; int i; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index e35fcc8d4bae4..f8a8249ae1177 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -690,7 +691,7 @@ static void pvalidate_pages(unsigned long vaddr, unsigned long npages, bool vali } } -static void __init early_set_pages_state(unsigned long paddr, unsigned long npages, enum psc_op op) +static void __head early_set_pages_state(unsigned long paddr, unsigned long npages, enum psc_op op) { unsigned long paddr_end; u64 val; @@ -728,7 +729,7 @@ e_term: sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); } -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, +void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { /* @@ -2085,7 +2086,7 @@ fail: * * Scan for the blob in that order. */ -static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2111,7 +2112,7 @@ found_cc_info: return cc_info; } -bool __init snp_init(struct boot_params *bp) +bool __head snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2133,7 +2134,7 @@ bool __init snp_init(struct boot_params *bp) return true; } -void __init __noreturn snp_abort(void) +void __head __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } -- GitLab From 408a43b6c948421fea5e8527623f912f19179e59 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:29 +0200 Subject: [PATCH 1781/2290] x86/efistub: Remap kernel text read-only before dropping NX attribute [ Commit 9c55461040a9264b7e44444c53d26480b438eda6 upstream ] Currently, the EFI stub invokes the EFI memory attributes protocol to strip any NX restrictions from the entire loaded kernel, resulting in all code and data being mapped read-write-execute. The point of the EFI memory attributes protocol is to remove the need for all memory allocations to be mapped with both write and execute permissions by default, and make it the OS loader's responsibility to transition data mappings to code mappings where appropriate. Even though the UEFI specification does not appear to leave room for denying memory attribute changes based on security policy, let's be cautious and avoid relying on the ability to create read-write-execute mappings. This is trivially achievable, given that the amount of kernel code executing via the firmware's 1:1 mapping is rather small and limited to the .head.text region. So let's drop the NX restrictions only on that subregion, but not before remapping it as read-only first. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/misc.c | 1 + arch/x86/include/asm/boot.h | 1 + drivers/firmware/efi/libstub/x86-stub.c | 11 ++++++++++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3965b2c9efee0..6e61baff223f8 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T hostprogs := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include -sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' quiet_cmd_voffset = VOFFSET $@ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 8ae7893d712ff..45435ff883635 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,6 +330,7 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +const unsigned long kernel_text_size = VO___start_rodata - VO__text; const unsigned long kernel_total_size = VO__end - VO__text; static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index a38cc0afc90a0..a3e0be0470a40 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -81,6 +81,7 @@ #ifndef __ASSEMBLY__ extern unsigned int output_len; +extern const unsigned long kernel_text_size; extern const unsigned long kernel_total_size; unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 1f5edcb6339ae..55468debd55d0 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -227,6 +227,15 @@ efi_status_t efi_adjust_memory_range_protection(unsigned long start, rounded_end = roundup(start + size, EFI_PAGE_SIZE); if (memattr != NULL) { + status = efi_call_proto(memattr, set_memory_attributes, + rounded_start, + rounded_end - rounded_start, + EFI_MEMORY_RO); + if (status != EFI_SUCCESS) { + efi_warn("Failed to set EFI_MEMORY_RO attribute\n"); + return status; + } + status = efi_call_proto(memattr, clear_memory_attributes, rounded_start, rounded_end - rounded_start, @@ -778,7 +787,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) *kernel_entry = addr + entry; - return efi_adjust_memory_range_protection(addr, kernel_total_size); + return efi_adjust_memory_range_protection(addr, kernel_text_size); } static void __noreturn enter_kernel(unsigned long kernel_addr, -- GitLab From 8d56bad42ac4c43c6c72ddd6a654a2628bf839c5 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 7 Apr 2024 14:56:04 +0800 Subject: [PATCH 1782/2290] netfilter: nf_tables: Fix potential data-race in __nft_expr_type_get() [ Upstream commit f969eb84ce482331a991079ab7a5c4dc3b7f89bf ] nft_unregister_expr() can concurrent with __nft_expr_type_get(), and there is not any protection when iterate over nf_tables_expressions list in __nft_expr_type_get(). Therefore, there is potential data-race of nf_tables_expressions list entry. Use list_for_each_entry_rcu() to iterate over nf_tables_expressions list in __nft_expr_type_get(), and use rcu_read_lock() in the caller nft_expr_type_get() to protect the entire type query process. Fixes: ef1f7df9170d ("netfilter: nf_tables: expression ops overloading") Signed-off-by: Ziyang Xuan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8152a69d82681..ba63866914f18 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2891,7 +2891,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, { const struct nft_expr_type *type, *candidate = NULL; - list_for_each_entry(type, &nf_tables_expressions, list) { + list_for_each_entry_rcu(type, &nf_tables_expressions, list) { if (!nla_strcmp(nla, type->name)) { if (!type->family && !candidate) candidate = type; @@ -2923,9 +2923,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); + rcu_read_lock(); type = __nft_expr_type_get(family, nla); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES -- GitLab From df7c0fb8c2b9f9cac65659332581b19682a71349 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 7 Apr 2024 14:56:05 +0800 Subject: [PATCH 1783/2290] netfilter: nf_tables: Fix potential data-race in __nft_obj_type_get() [ Upstream commit d78d867dcea69c328db30df665be5be7d0148484 ] nft_unregister_obj() can concurrent with __nft_obj_type_get(), and there is not any protection when iterate over nf_tables_objects list in __nft_obj_type_get(). Therefore, there is potential data-race of nf_tables_objects list entry. Use list_for_each_entry_rcu() to iterate over nf_tables_objects list in __nft_obj_type_get(), and use rcu_read_lock() in the caller nft_obj_type_get() to protect the entire type query process. Fixes: e50092404c1b ("netfilter: nf_tables: add stateful objects") Signed-off-by: Ziyang Xuan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ba63866914f18..1c4b7a8ec2cc6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7175,7 +7175,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; - list_for_each_entry(type, &nf_tables_objects, list) { + list_for_each_entry_rcu(type, &nf_tables_objects, list) { if (type->family != NFPROTO_UNSPEC && type->family != family) continue; @@ -7191,9 +7191,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; + rcu_read_lock(); type = __nft_obj_type_get(objtype, family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES -- GitLab From b13db0d16bc7b2a52abcf5cb71334f63faa5dbd6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Apr 2024 11:24:59 +0200 Subject: [PATCH 1784/2290] netfilter: br_netfilter: skip conntrack input hook for promisc packets [ Upstream commit 751de2012eafa4d46d8081056761fa0e9cc8a178 ] For historical reasons, when bridge device is in promisc mode, packets that are directed to the taps follow bridge input hook path. This patch adds a workaround to reset conntrack for these packets. Jianbo Liu reports warning splats in their test infrastructure where cloned packets reach the br_netfilter input hook to confirm the conntrack object. Scratch one bit from BR_INPUT_SKB_CB to annotate that this packet has reached the input hook because it is passed up to the bridge device to reach the taps. [ 57.571874] WARNING: CPU: 1 PID: 0 at net/bridge/br_netfilter_hooks.c:616 br_nf_local_in+0x157/0x180 [br_netfilter] [ 57.572749] Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_isc si ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5ctl mlx5_core [ 57.575158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0+ #19 [ 57.575700] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 57.576662] RIP: 0010:br_nf_local_in+0x157/0x180 [br_netfilter] [ 57.577195] Code: fe ff ff 41 bd 04 00 00 00 be 04 00 00 00 e9 4a ff ff ff be 04 00 00 00 48 89 ef e8 f3 a9 3c e1 66 83 ad b4 00 00 00 04 eb 91 <0f> 0b e9 f1 fe ff ff 0f 0b e9 df fe ff ff 48 89 df e8 b3 53 47 e1 [ 57.578722] RSP: 0018:ffff88885f845a08 EFLAGS: 00010202 [ 57.579207] RAX: 0000000000000002 RBX: ffff88812dfe8000 RCX: 0000000000000000 [ 57.579830] RDX: ffff88885f845a60 RSI: ffff8881022dc300 RDI: 0000000000000000 [ 57.580454] RBP: ffff88885f845a60 R08: 0000000000000001 R09: 0000000000000003 [ 57.581076] R10: 00000000ffff1300 R11: 0000000000000002 R12: 0000000000000000 [ 57.581695] R13: ffff8881047ffe00 R14: ffff888108dbee00 R15: ffff88814519b800 [ 57.582313] FS: 0000000000000000(0000) GS:ffff88885f840000(0000) knlGS:0000000000000000 [ 57.583040] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 57.583564] CR2: 000000c4206aa000 CR3: 0000000103847001 CR4: 0000000000370eb0 [ 57.584194] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 57.584820] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 57.585440] Call Trace: [ 57.585721] [ 57.585976] ? __warn+0x7d/0x130 [ 57.586323] ? br_nf_local_in+0x157/0x180 [br_netfilter] [ 57.586811] ? report_bug+0xf1/0x1c0 [ 57.587177] ? handle_bug+0x3f/0x70 [ 57.587539] ? exc_invalid_op+0x13/0x60 [ 57.587929] ? asm_exc_invalid_op+0x16/0x20 [ 57.588336] ? br_nf_local_in+0x157/0x180 [br_netfilter] [ 57.588825] nf_hook_slow+0x3d/0xd0 [ 57.589188] ? br_handle_vlan+0x4b/0x110 [ 57.589579] br_pass_frame_up+0xfc/0x150 [ 57.589970] ? br_port_flags_change+0x40/0x40 [ 57.590396] br_handle_frame_finish+0x346/0x5e0 [ 57.590837] ? ipt_do_table+0x32e/0x430 [ 57.591221] ? br_handle_local_finish+0x20/0x20 [ 57.591656] br_nf_hook_thresh+0x4b/0xf0 [br_netfilter] [ 57.592286] ? br_handle_local_finish+0x20/0x20 [ 57.592802] br_nf_pre_routing_finish+0x178/0x480 [br_netfilter] [ 57.593348] ? br_handle_local_finish+0x20/0x20 [ 57.593782] ? nf_nat_ipv4_pre_routing+0x25/0x60 [nf_nat] [ 57.594279] br_nf_pre_routing+0x24c/0x550 [br_netfilter] [ 57.594780] ? br_nf_hook_thresh+0xf0/0xf0 [br_netfilter] [ 57.595280] br_handle_frame+0x1f3/0x3d0 [ 57.595676] ? br_handle_local_finish+0x20/0x20 [ 57.596118] ? br_handle_frame_finish+0x5e0/0x5e0 [ 57.596566] __netif_receive_skb_core+0x25b/0xfc0 [ 57.597017] ? __napi_build_skb+0x37/0x40 [ 57.597418] __netif_receive_skb_list_core+0xfb/0x220 Fixes: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") Reported-by: Jianbo Liu Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/br_input.c | 15 +++++++++++---- net/bridge/br_netfilter_hooks.c | 6 ++++++ net/bridge/br_private.h | 1 + net/bridge/netfilter/nf_conntrack_bridge.c | 14 ++++++++++---- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 6bb272894c960..b94a1783902ea 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) return netif_receive_skb(skb); } -static int br_pass_frame_up(struct sk_buff *skb) +static int br_pass_frame_up(struct sk_buff *skb, bool promisc) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); @@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb) br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), BR_MCAST_DIR_TX); + BR_INPUT_SKB_CB(skb)->promisc = promisc; + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(indev), NULL, skb, indev, NULL, br_netif_receive_skb); @@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb struct net_bridge_mcast *brmctx; struct net_bridge_vlan *vlan; struct net_bridge *br; + bool promisc; u16 vid = 0; u8 state; @@ -120,7 +123,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (p->flags & BR_LEARNING) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); - local_rcv = !!(br->dev->flags & IFF_PROMISC); + promisc = !!(br->dev->flags & IFF_PROMISC); + local_rcv = promisc; + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { /* by definition the broadcast is also a multicast address */ if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { @@ -183,7 +188,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb unsigned long now = jiffies; if (test_bit(BR_FDB_LOCAL, &dst->flags)) - return br_pass_frame_up(skb); + return br_pass_frame_up(skb, false); if (now != dst->used) dst->used = now; @@ -196,7 +201,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } if (local_rcv) - return br_pass_frame_up(skb); + return br_pass_frame_up(skb, promisc); out: return 0; @@ -368,6 +373,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } + BR_INPUT_SKB_CB(skb)->promisc = false; + /* The else clause should be hit when nf_hook(): * - returns < 0 (drop/error) * - returns = 0 (stolen/nf_queue) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index bff48d5763635..9ac70c27da835 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + bool promisc = BR_INPUT_SKB_CB(skb)->promisc; struct nf_conntrack *nfct = skb_nfct(skb); const struct nf_ct_hook *ct_hook; struct nf_conn *ct; int ret; + if (promisc) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 51d010f64e066..940de95167689 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -559,6 +559,7 @@ struct br_input_skb_cb { #endif u8 proxyarp_replied:1; u8 src_port_isolated:1; + u8 promisc:1; #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_filtered:1; #endif diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index c7c27ada67044..e60c38670f220 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - enum ip_conntrack_info ctinfo; + bool promisc = BR_INPUT_SKB_CB(skb)->promisc; + struct nf_conntrack *nfct = skb_nfct(skb); struct nf_conn *ct; - if (skb->pkt_type == PACKET_HOST) + if (promisc) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + + if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; /* nf_conntrack_confirm() cannot handle concurrent clones, * this happens for broad/multicast frames with e.g. macvlan on top * of the bridge device. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + ct = container_of(nfct, struct nf_conn, ct_general); + if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) return NF_ACCEPT; /* let inet prerouting call conntrack again */ -- GitLab From 41d8fdf3afaff312e17466e4ab732937738d5644 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Apr 2024 21:05:13 +0200 Subject: [PATCH 1785/2290] netfilter: nft_set_pipapo: do not free live element [ Upstream commit 3cfc9ec039af60dbd8965ae085b2c2ccdcfbe1cc ] Pablo reports a crash with large batches of elements with a back-to-back add/remove pattern. Quoting Pablo: add_elem("00000000") timeout 100 ms ... add_elem("0000000X") timeout 100 ms del_elem("0000000X") <---------------- delete one that was just added ... add_elem("00005000") timeout 100 ms 1) nft_pipapo_remove() removes element 0000000X Then, KASAN shows a splat. Looking at the remove function there is a chance that we will drop a rule that maps to a non-deactivated element. Removal happens in two steps, first we do a lookup for key k and return the to-be-removed element and mark it as inactive in the next generation. Then, in a second step, the element gets removed from the set/map. The _remove function does not work correctly if we have more than one element that share the same key. This can happen if we insert an element into a set when the set already holds an element with same key, but the element mapping to the existing key has timed out or is not active in the next generation. In such case its possible that removal will unmap the wrong element. If this happens, we will leak the non-deactivated element, it becomes unreachable. The element that got deactivated (and will be freed later) will remain reachable in the set data structure, this can result in a crash when such an element is retrieved during lookup (stale pointer). Add a check that the fully matching key does in fact map to the element that we have marked as inactive in the deactivation step. If not, we need to continue searching. Add a bug/warn trap at the end of the function as well, the remove function must not ever be called with an invisible/unreachable/non-existent element. v2: avoid uneeded temporary variable (Stefano) Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Pablo Neira Ayuso Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 58eca26162735..2299ced939c47 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1994,6 +1994,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, rules_fx = rules_f0; nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1; + if (!pipapo_match_field(f, start, rules_fx, match_start, match_end)) break; @@ -2006,16 +2008,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); - } - if (i == m->field_count) { - priv->dirty = true; - pipapo_drop(m, rulemap); - return; + if (last && f->mt[rulemap[i].to].e == e) { + priv->dirty = true; + pipapo_drop(m, rulemap); + return; + } } first_rule += rules_f0; } + + WARN_ON_ONCE(1); /* elem_priv not found */ } /** -- GitLab From 8bf7c76a2a207ca2b4cfda0a279192adf27678d7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Apr 2024 13:47:33 +0200 Subject: [PATCH 1786/2290] netfilter: flowtable: validate pppoe header [ Upstream commit 87b3593bed1868b2d9fe096c01bcdf0ea86cbebf ] Ensure there is sufficient room to access the protocol field of the PPPoe header. Validate it once before the flowtable lookup, then use a helper function to access protocol field. Reported-by: syzbot+b6f07e1c07ef40199081@syzkaller.appspotmail.com Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_flow_table.h | 12 +++++++++++- net/netfilter/nf_flow_table_inet.c | 3 ++- net/netfilter/nf_flow_table_ip.c | 8 +++++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 4a767b3d20b9d..df7775afb92b9 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -335,7 +335,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); -static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) { __be16 proto; @@ -351,6 +351,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) return 0; } +static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) +{ + if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + return false; + + *inner_proto = __nf_flow_pppoe_proto(skb); + + return true; +} + #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 9505f9d188ff2..6eef15648b7b0 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, proto = veth->h_vlan_encapsulated_proto; break; case htons(ETH_P_PPP_SES): - proto = nf_flow_pppoe_proto(skb); + if (!nf_flow_pppoe_proto(skb, &proto)) + return NF_ACCEPT; break; default: proto = skb->protocol; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 6feaac9ab05c8..306e1ba6012e2 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -267,10 +267,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, +static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, u32 *offset) { struct vlan_ethhdr *veth; + __be16 inner_proto; switch (skb->protocol) { case htons(ETH_P_8021Q): @@ -281,7 +282,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, } break; case htons(ETH_P_PPP_SES): - if (nf_flow_pppoe_proto(skb) == proto) { + if (nf_flow_pppoe_proto(skb, &inner_proto) && + inner_proto == proto) { *offset += PPPOE_SES_HLEN; return true; } @@ -310,7 +312,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): - skb->protocol = nf_flow_pppoe_proto(skb); + skb->protocol = __nf_flow_pppoe_proto(skb); skb_pull(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; -- GitLab From f1c3c61701a0b12f4906152c1626a5de580ea3d2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 11 Apr 2024 00:09:00 +0200 Subject: [PATCH 1787/2290] netfilter: flowtable: incorrect pppoe tuple [ Upstream commit 6db5dc7b351b9569940cd1cf445e237c42cd6d27 ] pppoe traffic reaching ingress path does not match the flowtable entry because the pppoe header is expected to be at the network header offset. This bug causes a mismatch in the flow table lookup, so pppoe packets enter the classical forwarding path. Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_ip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 306e1ba6012e2..22bc0e3d8a0b5 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -156,7 +156,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, tuple->encap[i].proto = skb->protocol; break; case htons(ETH_P_PPP_SES): - phdr = (struct pppoe_hdr *)skb_mac_header(skb); + phdr = (struct pppoe_hdr *)skb_network_header(skb); tuple->encap[i].id = ntohs(phdr->sid); tuple->encap[i].proto = skb->protocol; break; -- GitLab From a1d3e3521f02cac2a215c19c3294a2ce72b69fd4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 10 Apr 2024 10:10:15 -0700 Subject: [PATCH 1788/2290] af_unix: Call manage_oob() for every skb in unix_stream_read_generic(). [ Upstream commit 283454c8a123072e5c386a5a2b5fc576aa455b6f ] When we call recv() for AF_UNIX socket, we first peek one skb and calls manage_oob() to check if the skb is sent with MSG_OOB. However, when we fetch the next (and the following) skb, manage_oob() is not called now, leading a wrong behaviour. Let's say a socket send()s "hello" with MSG_OOB and the peer tries to recv() 5 bytes with MSG_PEEK. Here, we should get only "hell" without 'o', but actually not: >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'hello', MSG_OOB) 5 >>> c2.recv(5, MSG_PEEK) b'hello' The first skb fills 4 bytes, and the next skb is peeked but not properly checked by manage_oob(). Let's move up the again label to call manage_oob() for evry skb. With this patch: >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'hello', MSG_OOB) 5 >>> c2.recv(5, MSG_PEEK) b'hell' Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240410171016.7621-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0a75d76535f75..6af6f82e89464 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2753,6 +2753,7 @@ redo: last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; +again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb) { skb = manage_oob(skb, sk, flags, copied); @@ -2764,7 +2765,6 @@ redo: } } #endif -again: if (skb == NULL) { if (copied >= target) goto unlock; -- GitLab From ba0db4638525b8b054b3d546b45f7e473f477027 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 10 Apr 2024 10:10:16 -0700 Subject: [PATCH 1789/2290] af_unix: Don't peek OOB data without MSG_OOB. [ Upstream commit 22dd70eb2c3d754862964377a75abafd3167346b ] Currently, we can read OOB data without MSG_OOB by using MSG_PEEK when OOB data is sitting on the front row, which is apparently wrong. >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'a', MSG_OOB) 1 >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'a' If manage_oob() is called when no data has been copied, we only check if the socket enables SO_OOBINLINE or MSG_PEEK is not used. Otherwise, the skb is returned as is. However, here we should return NULL if MSG_PEEK is set and no data has been copied. Also, in such a case, we should not jump to the redo label because we will be caught in the loop and hog the CPU until normal data comes in. Then, we need to handle skb == NULL case with the if-clause below the manage_oob() block. With this patch: >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'a', MSG_OOB) 1 >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) Traceback (most recent call last): File "", line 1, in BlockingIOError: [Errno 11] Resource temporarily unavailable Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240410171016.7621-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6af6f82e89464..f28e2956fea58 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2675,7 +2675,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); } - } else if (!(flags & MSG_PEEK)) { + } else if (flags & MSG_PEEK) { + skb = NULL; + } else { skb_unlink(skb, &sk->sk_receive_queue); WRITE_ONCE(u->oob_skb, NULL); if (!WARN_ON_ONCE(skb_unref(skb))) @@ -2757,11 +2759,9 @@ again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb) { skb = manage_oob(skb, sk, flags, copied); - if (!skb) { + if (!skb && copied) { unix_state_unlock(sk); - if (copied) - break; - goto redo; + break; } } #endif -- GitLab From 8635ac7dd9cf033df69d52ecc4d2cf10aea7c98b Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 11 Apr 2024 14:54:39 +0300 Subject: [PATCH 1790/2290] net/mlx5: Lag, restore buckets number to default after hash LAG deactivation [ Upstream commit 37cc10da3a50e6d0cb9808a90b7da9b4868794dd ] The cited patch introduces the concept of buckets in LAG in hash mode. However, the patch doesn't clear the number of buckets in the LAG deactivation. This results in using the wrong number of buckets in case user create a hash mode LAG and afterwards create a non-hash mode LAG. Hence, restore buckets number to default after hash mode LAG deactivation. Fixes: 352899f384d4 ("net/mlx5: Lag, use buckets in hash mode") Signed-off-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240411115444.374475-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index ad32b80e85018..01c0e1ee918d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -679,8 +679,10 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) return err; } - if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { mlx5_lag_port_sel_destroy(ldev); + ldev->buckets = 1; + } if (mlx5_lag_has_drop_rule(ldev)) mlx5_lag_drop_rule_cleanup(ldev); -- GitLab From 46efa4d5930cf3c2af8c01f75e0a47e4fc045e3b Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 11 Apr 2024 14:54:44 +0300 Subject: [PATCH 1791/2290] net/mlx5e: Prevent deadlock while disabling aRFS [ Upstream commit fef965764cf562f28afb997b626fc7c3cec99693 ] When disabling aRFS under the `priv->state_lock`, any scheduled aRFS works are canceled using the `cancel_work_sync` function, which waits for the work to end if it has already started. However, while waiting for the work handler, the handler will try to acquire the `state_lock` which is already acquired. The worker acquires the lock to delete the rules if the state is down, which is not the worker's responsibility since disabling aRFS deletes the rules. Add an aRFS state variable, which indicates whether the aRFS is enabled and prevent adding rules when the aRFS is disabled. Kernel log: ====================================================== WARNING: possible circular locking dependency detected 6.7.0-rc4_net_next_mlx5_5483eb2 #1 Tainted: G I ------------------------------------------------------ ethtool/386089 is trying to acquire lock: ffff88810f21ce68 ((work_completion)(&rule->arfs_work)){+.+.}-{0:0}, at: __flush_work+0x74/0x4e0 but task is already holding lock: ffff8884a1808cc0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_ethtool_set_channels+0x53/0x200 [mlx5_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&priv->state_lock){+.+.}-{3:3}: __mutex_lock+0x80/0xc90 arfs_handle_work+0x4b/0x3b0 [mlx5_core] process_one_work+0x1dc/0x4a0 worker_thread+0x1bf/0x3c0 kthread+0xd7/0x100 ret_from_fork+0x2d/0x50 ret_from_fork_asm+0x11/0x20 -> #0 ((work_completion)(&rule->arfs_work)){+.+.}-{0:0}: __lock_acquire+0x17b4/0x2c80 lock_acquire+0xd0/0x2b0 __flush_work+0x7a/0x4e0 __cancel_work_timer+0x131/0x1c0 arfs_del_rules+0x143/0x1e0 [mlx5_core] mlx5e_arfs_disable+0x1b/0x30 [mlx5_core] mlx5e_ethtool_set_channels+0xcb/0x200 [mlx5_core] ethnl_set_channels+0x28f/0x3b0 ethnl_default_set_doit+0xec/0x240 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x188/0x2c0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1a1/0x270 netlink_sendmsg+0x214/0x460 __sock_sendmsg+0x38/0x60 __sys_sendto+0x113/0x170 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x40/0xe0 entry_SYSCALL_64_after_hwframe+0x46/0x4e other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&priv->state_lock); lock((work_completion)(&rule->arfs_work)); lock(&priv->state_lock); lock((work_completion)(&rule->arfs_work)); *** DEADLOCK *** 3 locks held by ethtool/386089: #0: ffffffff82ea7210 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 #1: ffffffff82e94c88 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xd3/0x240 #2: ffff8884a1808cc0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_ethtool_set_channels+0x53/0x200 [mlx5_core] stack backtrace: CPU: 15 PID: 386089 Comm: ethtool Tainted: G I 6.7.0-rc4_net_next_mlx5_5483eb2 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x60/0xa0 check_noncircular+0x144/0x160 __lock_acquire+0x17b4/0x2c80 lock_acquire+0xd0/0x2b0 ? __flush_work+0x74/0x4e0 ? save_trace+0x3e/0x360 ? __flush_work+0x74/0x4e0 __flush_work+0x7a/0x4e0 ? __flush_work+0x74/0x4e0 ? __lock_acquire+0xa78/0x2c80 ? lock_acquire+0xd0/0x2b0 ? mark_held_locks+0x49/0x70 __cancel_work_timer+0x131/0x1c0 ? mark_held_locks+0x49/0x70 arfs_del_rules+0x143/0x1e0 [mlx5_core] mlx5e_arfs_disable+0x1b/0x30 [mlx5_core] mlx5e_ethtool_set_channels+0xcb/0x200 [mlx5_core] ethnl_set_channels+0x28f/0x3b0 ethnl_default_set_doit+0xec/0x240 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x188/0x2c0 ? ethnl_ops_begin+0xb0/0xb0 ? genl_family_rcv_msg_dumpit+0xf0/0xf0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1a1/0x270 netlink_sendmsg+0x214/0x460 __sock_sendmsg+0x38/0x60 __sys_sendto+0x113/0x170 ? do_user_addr_fault+0x53f/0x8f0 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x40/0xe0 entry_SYSCALL_64_after_hwframe+0x46/0x4e Fixes: 45bf454ae884 ("net/mlx5e: Enabling aRFS mechanism") Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240411115444.374475-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 58eacba6de8cd..ad51edf553185 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -45,6 +45,10 @@ struct arfs_table { struct hlist_head rules_hash[ARFS_HASH_SIZE]; }; +enum { + MLX5E_ARFS_STATE_ENABLED, +}; + enum arfs_type { ARFS_IPV4_TCP, ARFS_IPV6_TCP, @@ -60,6 +64,7 @@ struct mlx5e_arfs_tables { struct list_head rules; int last_filter_id; struct workqueue_struct *wq; + unsigned long state; }; struct arfs_tuple { @@ -170,6 +175,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) return err; } } + set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); + return 0; } @@ -454,6 +461,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs) int i; int j; + clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); + spin_lock_bh(&arfs->arfs_lock); mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) { hlist_del_init(&rule->hlist); @@ -621,17 +630,8 @@ static void arfs_handle_work(struct work_struct *work) struct mlx5_flow_handle *rule; arfs = mlx5e_fs_get_arfs(priv->fs); - mutex_lock(&priv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - spin_lock_bh(&arfs->arfs_lock); - hlist_del(&arfs_rule->hlist); - spin_unlock_bh(&arfs->arfs_lock); - - mutex_unlock(&priv->state_lock); - kfree(arfs_rule); - goto out; - } - mutex_unlock(&priv->state_lock); + if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) + return; if (!arfs_rule->rule) { rule = arfs_add_rule(priv, arfs_rule); @@ -744,6 +744,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, return -EPROTONOSUPPORT; spin_lock_bh(&arfs->arfs_lock); + if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) { + spin_unlock_bh(&arfs->arfs_lock); + return -EPERM; + } + arfs_rule = arfs_find_rule(arfs_t, &fk); if (arfs_rule) { if (arfs_rule->rxq == rxq_index) { -- GitLab From 6a6ebec40820230a5806cafa6ad500cd543ca29e Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 15 Mar 2024 12:08:21 +0100 Subject: [PATCH 1792/2290] ice: tc: allow zero flags in parsing tc flower [ Upstream commit 73278715725a8347032acf233082ca4eb31e6a56 ] The check for flags is done to not pass empty lookups to adding switch rule functions. Since metadata is always added to lookups there is no need to check against the flag. It is also fixing the problem with such rule: $ tc filter add dev gtp_dev ingress protocol ip prio 0 flower \ enc_dst_port 2123 action drop Switch block in case of GTP can't parse the destination port, because it should always be set to GTP specific value. The same with ethertype. The result is that there is no other matching criteria than GTP tunnel. In this case flags is 0, rule can't be added only because of defensive check against flags. Fixes: 9a225f81f540 ("ice: Support GTP-U and GTP-C offload in switchdev") Reviewed-by: Wojciech Drewek Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 652ef09eeb305..ec6628aacc13b 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -663,7 +663,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) int ret; int i; - if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) { + if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)"); return -EOPNOTSUPP; } -- GitLab From 62e27ef18eb4f0d33bbae8e9ef56b99696a74713 Mon Sep 17 00:00:00 2001 From: Lei Chen Date: Sun, 14 Apr 2024 22:02:46 -0400 Subject: [PATCH 1793/2290] tun: limit printing rate when illegal packet received by tun dev [ Upstream commit f8bbc07ac535593139c875ffa19af924b1084540 ] vhost_worker will call tun call backs to receive packets. If too many illegal packets arrives, tun_do_read will keep dumping packet contents. When console is enabled, it will costs much more cpu time to dump packet and soft lockup will be detected. net_ratelimit mechanism can be used to limit the dumping rate. PID: 33036 TASK: ffff949da6f20000 CPU: 23 COMMAND: "vhost-32980" #0 [fffffe00003fce50] crash_nmi_callback at ffffffff89249253 #1 [fffffe00003fce58] nmi_handle at ffffffff89225fa3 #2 [fffffe00003fceb0] default_do_nmi at ffffffff8922642e #3 [fffffe00003fced0] do_nmi at ffffffff8922660d #4 [fffffe00003fcef0] end_repeat_nmi at ffffffff89c01663 [exception RIP: io_serial_in+20] RIP: ffffffff89792594 RSP: ffffa655314979e8 RFLAGS: 00000002 RAX: ffffffff89792500 RBX: ffffffff8af428a0 RCX: 0000000000000000 RDX: 00000000000003fd RSI: 0000000000000005 RDI: ffffffff8af428a0 RBP: 0000000000002710 R8: 0000000000000004 R9: 000000000000000f R10: 0000000000000000 R11: ffffffff8acbf64f R12: 0000000000000020 R13: ffffffff8acbf698 R14: 0000000000000058 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #5 [ffffa655314979e8] io_serial_in at ffffffff89792594 #6 [ffffa655314979e8] wait_for_xmitr at ffffffff89793470 #7 [ffffa65531497a08] serial8250_console_putchar at ffffffff897934f6 #8 [ffffa65531497a20] uart_console_write at ffffffff8978b605 #9 [ffffa65531497a48] serial8250_console_write at ffffffff89796558 #10 [ffffa65531497ac8] console_unlock at ffffffff89316124 #11 [ffffa65531497b10] vprintk_emit at ffffffff89317c07 #12 [ffffa65531497b68] printk at ffffffff89318306 #13 [ffffa65531497bc8] print_hex_dump at ffffffff89650765 #14 [ffffa65531497ca8] tun_do_read at ffffffffc0b06c27 [tun] #15 [ffffa65531497d38] tun_recvmsg at ffffffffc0b06e34 [tun] #16 [ffffa65531497d68] handle_rx at ffffffffc0c5d682 [vhost_net] #17 [ffffa65531497ed0] vhost_worker at ffffffffc0c644dc [vhost] #18 [ffffa65531497f10] kthread at ffffffff892d2e72 #19 [ffffa65531497f50] ret_from_fork at ffffffff89c0022f Fixes: ef3db4a59542 ("tun: avoid BUG, dump packet on GSO errors") Signed-off-by: Lei Chen Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Reviewed-by: Eric Dumazet Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20240415020247.2207781-1-lei.chen@smartx.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/tun.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 922d6f16d99d1..4af1ba5d074c0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2121,14 +2121,16 @@ static ssize_t tun_put_user(struct tun_struct *tun, tun_is_little_endian(tun), true, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb); - pr_err("unexpected GSO type: " - "0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + + if (net_ratelimit()) { + netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), + tun16_to_cpu(tun, gso.hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + } WARN_ON_ONCE(1); return -EINVAL; } -- GitLab From e86c9db58eba290e858e2bb80efcde9e3973a5ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 13 Apr 2024 16:01:39 +0300 Subject: [PATCH 1794/2290] net: dsa: mt7530: fix mirroring frames received on local port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d59cf049c8378677053703e724808836f180888e ] This switch intellectual property provides a bit on the ARL global control register which controls allowing mirroring frames which are received on the local port (monitor port). This bit is unset after reset. This ability must be enabled to fully support the port mirroring feature on this switch intellectual property. Therefore, this patch fixes the traffic not being reflected on a port, which would be configured like below: tc qdisc add dev swp0 clsact tc filter add dev swp0 ingress matchall skip_sw \ action mirred egress mirror dev swp0 As a side note, this configuration provides the hairpinning feature for a single port. Fixes: 37feab6076aa ("net: dsa: mt7530: add support for port mirroring") Signed-off-by: Arınç ÜNAL Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 6 ++++++ drivers/net/dsa/mt7530.h | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d4515c19a5f34..b5f61a9a378eb 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2461,6 +2461,9 @@ mt7530_setup(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + /* Allow mirroring frames received on the local port (monitor port). */ + mt7530_set(priv, MT753X_AGC, LOCAL_EN); + /* Setup VLAN ID 0 for VLAN-unaware bridges */ ret = mt7530_setup_vlan0(priv); if (ret) @@ -2577,6 +2580,9 @@ mt7531_setup_common(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + /* Allow mirroring frames received on the local port (monitor port). */ + mt7530_set(priv, MT753X_AGC, LOCAL_EN); + /* Flush the FDB table */ ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); if (ret < 0) diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 2d1ea390f05ab..af18f47f22141 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -31,6 +31,10 @@ enum mt753x_id { #define SYSC_REG_RSTCTRL 0x34 #define RESET_MCM BIT(2) +/* Register for ARL global control */ +#define MT753X_AGC 0xc +#define LOCAL_EN BIT(7) + /* Registers to mac forward control for unknown frames */ #define MT7530_MFC 0x10 #define BC_FFP(x) (((x) & 0xff) << 24) -- GitLab From 45e811bab2d088a43fb5199e4cc2900d64363df7 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 17 Apr 2024 15:24:25 +0530 Subject: [PATCH 1795/2290] net: ethernet: ti: am65-cpsw-nuss: cleanup DMA Channels before using them [ Upstream commit c24cd679b075b0e953ea167b0aa2b2d59e4eba7f ] The TX and RX DMA Channels used by the driver to exchange data with CPSW are not guaranteed to be in a clean state during driver initialization. The Bootloader could have used the same DMA Channels without cleaning them up in the event of failure. Thus, reset and disable the DMA Channels to ensure that they are in a clean state before using them. Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") Reported-by: Schuyler Patton Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20240417095425.2253876-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 76fabeae512db..33df06a2de13a 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2549,6 +2549,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { + struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns; + struct am65_cpsw_tx_chn *tx_chan = common->tx_chns; struct device *dev = common->dev; struct devlink_port *dl_port; struct am65_cpsw_port *port; @@ -2567,6 +2569,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) return ret; } + /* The DMA Channels are not guaranteed to be in a clean state. + * Reset and disable them to ensure that they are back to the + * clean state and ready to be used. + */ + for (i = 0; i < common->tx_ch_num; i++) { + k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i], + am65_cpsw_nuss_tx_cleanup); + k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn); + } + + for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) + k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan, + am65_cpsw_nuss_rx_cleanup, !!i); + + k3_udma_glue_disable_rx_chn(rx_chan->rx_chn); + ret = am65_cpsw_nuss_register_devlink(common); if (ret) return ret; -- GitLab From 196617d07dd66d7d53de5ce1ca934e8ed92b5d13 Mon Sep 17 00:00:00 2001 From: "Yanjun.Zhu" Date: Thu, 14 Mar 2024 07:51:40 +0100 Subject: [PATCH 1796/2290] RDMA/rxe: Fix the problem "mutex_destroy missing" [ Upstream commit 481047d7e8391d3842ae59025806531cdad710d9 ] When a mutex lock is not used any more, the function mutex_destroy should be called to mark the mutex lock uninitialized. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yanjun.Zhu Link: https://lore.kernel.org/r/20240314065140.27468-1-yanjun.zhu@linux.dev Reviewed-by: Daisuke Matsuda Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 51daac5c4feb7..be3ddfbf3cae3 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -33,6 +33,8 @@ void rxe_dealloc(struct ib_device *ib_dev) if (rxe->tfm) crypto_free_shash(rxe->tfm); + + mutex_destroy(&rxe->usdev_lock); } /* initialize rxe device parameters */ -- GitLab From ea42dbe759921c02f70fb3d0e1e6c34e1779319a Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 22 Mar 2024 13:20:49 +0200 Subject: [PATCH 1797/2290] RDMA/cm: Print the old state when cm_destroy_id gets timeout [ Upstream commit b68e1acb5834ed1a2ad42d9d002815a8bae7c0b6 ] The old state is helpful for debugging, as the current state is always IB_CM_IDLE when timeout happens. Fixes: 96d9cbe2f2ff ("RDMA/cm: add timeout to cm_destroy_id wait") Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/20240322112049.2022994-1-markzhang@nvidia.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 462a10d6a5762..950fe205995b7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1026,23 +1026,26 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) } } -static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id) +static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id, + enum ib_cm_state old_state) { struct cm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__, - cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount)); + pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__, + cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount)); } static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; + enum ib_cm_state old_state; struct cm_work *work; int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irq(&cm_id_priv->lock); + old_state = cm_id->state; retest: switch (cm_id->state) { case IB_CM_LISTEN: @@ -1151,7 +1154,7 @@ retest: msecs_to_jiffies( CM_DESTROY_ID_WAIT_TIMEOUT)); if (!ret) /* timeout happened */ - cm_destroy_id_wait_timeout(cm_id); + cm_destroy_id_wait_timeout(cm_id, old_state); } while (!ret); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) -- GitLab From fe446927f8afc08a5f7d54a5fed84abec480cd1f Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 3 Apr 2024 12:03:46 +0300 Subject: [PATCH 1798/2290] RDMA/mlx5: Fix port number for counter query in multi-port configuration [ Upstream commit be121ffb384f53e966ee7299ffccc6eeb61bc73d ] Set the correct port when querying PPCNT in multi-port configuration. Distinguish between cases where switchdev mode was enabled to multi-port configuration and don't overwrite the queried port to 1 in multi-port case. Fixes: 74b30b3ad5ce ("RDMA/mlx5: Set local port to one when accessing counters") Signed-off-by: Michael Guralnik Link: https://lore.kernel.org/r/9bfcc8ade958b760a51408c3ad654a01b11f7d76.1712134988.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/mad.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9c8a7b206dcf4..e61efed320f11 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -188,7 +188,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, mdev = dev->mdev; mdev_port_num = 1; } - if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) { + if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 && + !mlx5_core_mp_enabled(mdev)) { /* set local port to one for Function-Per-Port HCA. */ mdev = dev->mdev; mdev_port_num = 1; -- GitLab From beb3ff19a55d78d3c70f9284458c9d155818ae27 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 10 Apr 2024 11:46:18 +0200 Subject: [PATCH 1799/2290] s390/qdio: handle deferred cc1 [ Upstream commit 607638faf2ff1cede37458111496e7cc6c977f6f ] A deferred condition code 1 response indicates that I/O was not started and should be retried. The current QDIO implementation handles a cc1 response as I/O error, resulting in a failed QDIO setup. This can happen for example when a path verification request arrives at the same time as QDIO setup I/O is started. Fix this by retrying the QDIO setup I/O when a cc1 response is received. Note that since commit 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") commit 5ef1dc40ffa6 ("s390/cio: fix invalid -EBUSY on ccw_device_start") deferred cc1 responses are much more likely to occur. See the commit message of the latter for more background information. Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Reviewed-by: Alexandra Winter Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- drivers/s390/cio/qdio_main.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 9cde55730b65a..ebcb535809882 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -722,8 +722,8 @@ static void qdio_handle_activate_check(struct qdio_irq *irq_ptr, lgr_info_log(); } -static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, - int dstat) +static int qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, + int dstat, int dcc) { DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq"); @@ -731,15 +731,18 @@ static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, goto error; if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END)) goto error; + if (dcc == 1) + return -EAGAIN; if (!(dstat & DEV_STAT_DEV_END)) goto error; qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED); - return; + return 0; error: DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no); DBF_ERROR("ds: %2x cs:%2x", dstat, cstat); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + return -EIO; } /* qdio interrupt handler */ @@ -748,7 +751,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, { struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; - int cstat, dstat; + int cstat, dstat, rc, dcc; if (!intparm || !irq_ptr) { ccw_device_get_schid(cdev, &schid); @@ -768,10 +771,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, qdio_irq_check_sense(irq_ptr, irb); cstat = irb->scsw.cmd.cstat; dstat = irb->scsw.cmd.dstat; + dcc = scsw_cmd_is_valid_cc(&irb->scsw) ? irb->scsw.cmd.cc : 0; + rc = 0; switch (irq_ptr->state) { case QDIO_IRQ_STATE_INACTIVE: - qdio_establish_handle_irq(irq_ptr, cstat, dstat); + rc = qdio_establish_handle_irq(irq_ptr, cstat, dstat, dcc); break; case QDIO_IRQ_STATE_CLEANUP: qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); @@ -785,12 +790,25 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, if (cstat || dstat) qdio_handle_activate_check(irq_ptr, intparm, cstat, dstat); + else if (dcc == 1) + rc = -EAGAIN; break; case QDIO_IRQ_STATE_STOPPED: break; default: WARN_ON_ONCE(1); } + + if (rc == -EAGAIN) { + DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qint retry"); + rc = ccw_device_start(cdev, irq_ptr->ccw, intparm, 0, 0); + if (!rc) + return; + DBF_ERROR("%4x RETRY ERR", irq_ptr->schid.sch_no); + DBF_ERROR("rc:%4x", rc); + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + } + wake_up(&cdev->private->wait_q); } -- GitLab From 559f3a6333397ab6cd4a696edd65a70b6be62c6e Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 10 Apr 2024 11:46:19 +0200 Subject: [PATCH 1800/2290] s390/cio: fix race condition during online processing [ Upstream commit 2d8527f2f911fab84aec04df4788c0c23af3df48 ] A race condition exists in ccw_device_set_online() that can cause the online process to fail, leaving the affected device in an inconsistent state. As a result, subsequent attempts to set that device online fail with return code ENODEV. The problem occurs when a path verification request arrives after a wait for final device state completed, but before the result state is evaluated. Fix this by ensuring that the CCW-device lock is held between determining final state and checking result state. Note that since: commit 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") path verification requests are much more likely to occur during boot, resulting in an increased chance of this race condition occurring. Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Reviewed-by: Alexandra Winter Reviewed-by: Vineeth Vijayan Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- drivers/s390/cio/device.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 02813b63f90fd..5666b9cc5d296 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -360,10 +360,8 @@ int ccw_device_set_online(struct ccw_device *cdev) spin_lock_irq(cdev->ccwlock); ret = ccw_device_online(cdev); - spin_unlock_irq(cdev->ccwlock); - if (ret == 0) - wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); - else { + if (ret) { + spin_unlock_irq(cdev->ccwlock); CIO_MSG_EVENT(0, "ccw_device_online returned %d, " "device 0.%x.%04x\n", ret, cdev->private->dev_id.ssid, @@ -372,7 +370,12 @@ int ccw_device_set_online(struct ccw_device *cdev) put_device(&cdev->dev); return ret; } - spin_lock_irq(cdev->ccwlock); + /* Wait until a final state is reached */ + while (!dev_fsm_final_state(cdev)) { + spin_unlock_irq(cdev->ccwlock); + wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); + spin_lock_irq(cdev->ccwlock); + } /* Check if online processing was successful */ if ((cdev->private->state != DEV_STATE_ONLINE) && (cdev->private->state != DEV_STATE_W4SENSE)) { -- GitLab From 5fd4b090304e450aa0e7cc9cc2b4873285c6face Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 11 Apr 2024 14:08:52 +0300 Subject: [PATCH 1801/2290] drm: nv04: Fix out of bounds access [ Upstream commit cf92bb778eda7830e79452c6917efa8474a30c1e ] When Output Resource (dcb->or) value is assigned in fabricate_dcb_output(), there may be out of bounds access to dac_users array in case dcb->or is zero because ffs(dcb->or) is used as index there. The 'or' argument of fabricate_dcb_output() must be interpreted as a number of bit to set, not value. Utilize macros from 'enum nouveau_or' in calls instead of hardcoding. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 2e5702aff395 ("drm/nouveau: fabricate DCB encoder table for iMac G4") Fixes: 670820c0e6a9 ("drm/nouveau: Workaround incorrect DCB entry on a GeForce3 Ti 200.") Signed-off-by: Mikhail Kobuk Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240411110854.16701-1-m.kobuk@ispras.ru Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_bios.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 189903b65edc9..48cf593383b34 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -23,6 +23,7 @@ */ #include "nouveau_drv.h" +#include "nouveau_bios.h" #include "nouveau_reg.h" #include "dispnv04/hw.h" #include "nouveau_encoder.h" @@ -1675,7 +1676,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf) */ if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) { if (*conn == 0xf2005014 && *conf == 0xffffffff) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B); return false; } } @@ -1761,26 +1762,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios) #ifdef __powerpc__ /* Apple iMac G4 NV17 */ if (of_machine_is_compatible("PowerMac4,5")) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1); - fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B); + fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C); return; } #endif /* Make up some sane defaults */ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, - bios->legacy.i2c_indices.crt, 1, 1); + bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B); if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0) fabricate_dcb_output(dcb, DCB_OUTPUT_TV, bios->legacy.i2c_indices.tv, - all_heads, 0); + all_heads, DCB_OUTPUT_A); else if (bios->tmds.output0_script_ptr || bios->tmds.output1_script_ptr) fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, bios->legacy.i2c_indices.panel, - all_heads, 1); + all_heads, DCB_OUTPUT_B); } static int -- GitLab From c330a13ab77c67ab09b2332a5e5fe33cded96122 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 4 Apr 2024 13:07:59 +0300 Subject: [PATCH 1802/2290] drm/panel: visionox-rm69299: don't unregister DSI device [ Upstream commit 9e4d3f4f34455abbaa9930bf6b7575a5cd081496 ] The DSI device for the panel was registered by the DSI host, so it is an error to unregister it from the panel driver. Drop the call to mipi_dsi_device_unregister(). Fixes: c7f66d32dd43 ("drm/panel: add support for rm69299 visionox panel") Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240404-drop-panel-unregister-v1-1-9f56953c5fb9@linaro.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-visionox-rm69299.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c index ec228c269146f..b380bbb0e0d0a 100644 --- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c +++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c @@ -261,8 +261,6 @@ static void visionox_rm69299_remove(struct mipi_dsi_device *dsi) struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi); mipi_dsi_detach(ctx->dsi); - mipi_dsi_device_unregister(ctx->dsi); - drm_panel_remove(&ctx->panel); } -- GitLab From 3c55d4396b1577e074d55ba19e9ca69730de72d5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 2 Nov 2022 16:20:02 -0700 Subject: [PATCH 1803/2290] ARM: omap2: n8x0: stop instantiating codec platform data [ Upstream commit faf3b5cb59f84e4056bd84f115a958bc99c61e65 ] As of 0426370b58b2 ("ARM: dts: omap2420-n810: Correct the audio codec (tlv320aic33) node") the DTS properly specifies reset GPIO, and the device name in auxdata lookup table does not even match the one in device tree anymore, so stop instantiating it. Signed-off-by: Dmitry Torokhov Acked-by: Tony Lindgren Link: https://lore.kernel.org/r/20221102232004.1721864-1-dmitry.torokhov@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/board-n8x0.c | 5 ----- arch/arm/mach-omap2/common-board-devices.h | 2 -- arch/arm/mach-omap2/pdata-quirks.c | 1 - 3 files changed, 8 deletions(-) diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 5e86145db0e2a..8897364e550ba 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -567,10 +566,6 @@ struct menelaus_platform_data n8x0_menelaus_platform_data = { .late_init = n8x0_menelaus_late_init, }; -struct aic3x_pdata n810_aic33_data = { - .gpio_reset = 118, -}; - static int __init n8x0_late_initcall(void) { if (!board_caps) diff --git a/arch/arm/mach-omap2/common-board-devices.h b/arch/arm/mach-omap2/common-board-devices.h index b23962c38fb27..69694af714751 100644 --- a/arch/arm/mach-omap2/common-board-devices.h +++ b/arch/arm/mach-omap2/common-board-devices.h @@ -2,12 +2,10 @@ #ifndef __OMAP_COMMON_BOARD_DEVICES__ #define __OMAP_COMMON_BOARD_DEVICES__ -#include #include void *n8x0_legacy_init(void); extern struct menelaus_platform_data n8x0_menelaus_platform_data; -extern struct aic3x_pdata n810_aic33_data; #endif /* __OMAP_COMMON_BOARD_DEVICES__ */ diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 5b99d602c87bc..9deba798cc919 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -440,7 +440,6 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { #ifdef CONFIG_MACH_NOKIA_N8X0 OF_DEV_AUXDATA("ti,omap2420-mmc", 0x4809c000, "mmci-omap.0", NULL), OF_DEV_AUXDATA("menelaus", 0x72, "1-0072", &n8x0_menelaus_platform_data), - OF_DEV_AUXDATA("tlv320aic3x", 0x18, "2-0018", &n810_aic33_data), #endif #ifdef CONFIG_ARCH_OMAP3 OF_DEV_AUXDATA("ti,omap2-iommu", 0x5d000000, "5d000000.mmu", -- GitLab From 57aadcc0288c2db8677ecb784c97b2548a78758b Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 10 Jan 2023 18:56:37 +0200 Subject: [PATCH 1804/2290] PCI: Avoid FLR for SolidRun SNET DPU rev 1 [ Upstream commit d089d69cc1f824936eeaa4fa172f8fa1a0949eaa ] This patch fixes a FLR bug on the SNET DPU rev 1 by setting the PCI_DEV_FLAGS_NO_FLR_RESET flag. As there is a quirk to avoid FLR (quirk_no_flr), I added a new quirk to check the rev ID before calling to quirk_no_flr. Without this patch, a SNET DPU rev 1 may hang when FLR is applied. Signed-off-by: Alvaro Karsz Acked-by: Bjorn Helgaas Message-Id: <20230110165638.123745-3-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 289ba6902e41b..d8d3f817e95cb 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5403,6 +5403,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr); +/* FLR may cause the SolidRun SNET DPU (rev 0x1) to hang */ +static void quirk_no_flr_snet(struct pci_dev *dev) +{ + if (dev->revision == 0x1) + quirk_no_flr(dev); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLIDRUN, 0x1000, quirk_no_flr_snet); + static void quirk_no_ext_tags(struct pci_dev *pdev) { struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus); -- GitLab From c96b07dca169ac58fb6afe16a01e68ff0db3c528 Mon Sep 17 00:00:00 2001 From: David Yang Date: Tue, 7 Feb 2023 12:33:16 +0800 Subject: [PATCH 1805/2290] HID: kye: Sort kye devices [ Upstream commit 8c7b79bc04abb67e7f5864e94286a800b42aa96c ] Sort kye devices by their Produce IDs. Signed-off-by: David Yang Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 2 +- drivers/hid/hid-kye.c | 62 ++++++++++++++++++++-------------------- drivers/hid/hid-quirks.c | 6 ++-- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1be454bafcb91..405d88b08908d 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -717,10 +717,10 @@ #define USB_DEVICE_ID_KYE_GPEN_560 0x5003 #define USB_DEVICE_ID_KYE_EASYPEN_I405X 0x5010 #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X 0x5011 -#define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2 0x501a #define USB_DEVICE_ID_KYE_EASYPEN_M610X 0x5013 #define USB_DEVICE_ID_KYE_PENSKETCH_M912 0x5015 #define USB_DEVICE_ID_KYE_EASYPEN_M406XE 0x5019 +#define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2 0x501A #define USB_VENDOR_ID_LABTEC 0x1020 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006 diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index da903138eee49..dc57e9d4a3e20 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -602,6 +602,18 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[74] = 0x08; } break; + case USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE: + rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, + "Genius Gila Gaming Mouse"); + break; + case USB_DEVICE_ID_GENIUS_MANTICORE: + rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, + "Genius Manticore Keyboard"); + break; + case USB_DEVICE_ID_GENIUS_GX_IMPERATOR: + rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 83, + "Genius Gx Imperator Keyboard"); + break; case USB_DEVICE_ID_KYE_EASYPEN_I405X: if (*rsize == EASYPEN_I405X_RDESC_ORIG_SIZE) { rdesc = easypen_i405x_rdesc_fixed; @@ -638,18 +650,6 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, *rsize = sizeof(pensketch_m912_rdesc_fixed); } break; - case USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE: - rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, - "Genius Gila Gaming Mouse"); - break; - case USB_DEVICE_ID_GENIUS_GX_IMPERATOR: - rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 83, - "Genius Gx Imperator Keyboard"); - break; - case USB_DEVICE_ID_GENIUS_MANTICORE: - rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, - "Genius Manticore Keyboard"); - break; } return rdesc; } @@ -717,26 +717,26 @@ static int kye_probe(struct hid_device *hdev, const struct hid_device_id *id) } switch (id->product) { + case USB_DEVICE_ID_GENIUS_MANTICORE: + /* + * The manticore keyboard needs to have all the interfaces + * opened at least once to be fully functional. + */ + if (hid_hw_open(hdev)) + hid_hw_close(hdev); + break; case USB_DEVICE_ID_KYE_EASYPEN_I405X: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: - case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2: case USB_DEVICE_ID_KYE_EASYPEN_M610X: - case USB_DEVICE_ID_KYE_EASYPEN_M406XE: case USB_DEVICE_ID_KYE_PENSKETCH_M912: + case USB_DEVICE_ID_KYE_EASYPEN_M406XE: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2: ret = kye_tablet_enable(hdev); if (ret) { hid_err(hdev, "tablet enabling failed\n"); goto enabling_err; } break; - case USB_DEVICE_ID_GENIUS_MANTICORE: - /* - * The manticore keyboard needs to have all the interfaces - * opened at least once to be fully functional. - */ - if (hid_hw_open(hdev)) - hid_hw_close(hdev); - break; } return 0; @@ -749,23 +749,23 @@ err: static const struct hid_device_id kye_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_ERGO_525V) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_KYE_EASYPEN_I405X) }, + USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, + USB_DEVICE_ID_GENIUS_MANTICORE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) }, + USB_DEVICE_ID_GENIUS_GX_IMPERATOR) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_KYE_EASYPEN_M610X) }, + USB_DEVICE_ID_KYE_EASYPEN_I405X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_KYE_EASYPEN_M406XE) }, + USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) }, + USB_DEVICE_ID_KYE_EASYPEN_M610X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_GENIUS_GX_IMPERATOR) }, + USB_DEVICE_ID_KYE_PENSKETCH_M912) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_GENIUS_MANTICORE) }, + USB_DEVICE_ID_KYE_EASYPEN_M406XE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_KYE_PENSKETCH_M912) }, + USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) }, { } }; MODULE_DEVICE_TABLE(hid, kye_devices); diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 60884066362a1..debc49272a5c0 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -107,12 +107,12 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_1f4a), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_IDEACOM, USB_DEVICE_ID_IDEACOM_IDC6680), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_INNOMEDIA, USB_DEVICE_ID_INNEX_GENESIS_ATARI), HID_QUIRK_MULTI_INPUT }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X), HID_QUIRK_MULTI_INPUT }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019), HID_QUIRK_ALWAYS_POLL }, -- GitLab From 0fe6a97a5fea6363442bb83500fb473daedadcf8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 23 Mar 2023 20:40:22 +0100 Subject: [PATCH 1806/2290] usb: pci-quirks: Reduce the length of a spinlock section in usb_amd_find_chipset_info() [ Upstream commit c03ff66dc0e0cbad9ed0c29500843e1da8533118 ] 'info' is local to the function. There is no need to zeroing it within a spin_lock section. Moreover, there is no need to explicitly initialize the .need_pll_quirk field. Initialize the structure when defined and remove the now useless memset(). Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/08ee42fced6af6bd56892cd14f2464380ab071fa.1679600396.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/pci-quirks.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index ef08d68b97149..2665832f9addf 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -207,8 +207,7 @@ EXPORT_SYMBOL_GPL(sb800_prefetch); static void usb_amd_find_chipset_info(void) { unsigned long flags; - struct amd_chipset_info info; - info.need_pll_quirk = false; + struct amd_chipset_info info = { }; spin_lock_irqsave(&amd_lock, flags); @@ -218,7 +217,6 @@ static void usb_amd_find_chipset_info(void) spin_unlock_irqrestore(&amd_lock, flags); return; } - memset(&info, 0, sizeof(info)); spin_unlock_irqrestore(&amd_lock, flags); if (!amd_chipset_sb_type_init(&info)) { -- GitLab From f4aae2afe2d5497820d0ced9e4cf8af30088d32c Mon Sep 17 00:00:00 2001 From: Mike Pastore Date: Sun, 7 May 2023 02:35:19 -0500 Subject: [PATCH 1807/2290] PCI: Delay after FLR of Solidigm P44 Pro NVMe [ Upstream commit 0ac448e0d29d6ba978684b3fa2e3ac7294ec2475 ] Prevent KVM hang when a Solidgm P44 Pro NVMe is passed through to a guest via IOMMU and the guest is subsequently rebooted. A similar issue was identified and patched by 51ba09452d11 ("PCI: Delay after FLR of Intel DC P3700 NVMe") and the same fix can be applied for this case. (Intel spun off their NAND and SSD business as Solidigm and sold it to SK Hynix in late 2021.) Link: https://lore.kernel.org/r/20230507073519.9737-1-mike@oobak.org Signed-off-by: Mike Pastore Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 10 ++++++---- include/linux/pci_ids.h | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d8d3f817e95cb..92169dc71468e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4011,10 +4011,11 @@ static int nvme_disable_and_flr(struct pci_dev *dev, bool probe) } /* - * Intel DC P3700 NVMe controller will timeout waiting for ready status - * to change after NVMe enable if the driver starts interacting with the - * device too soon after FLR. A 250ms delay after FLR has heuristically - * proven to produce reliably working results for device assignment cases. + * Some NVMe controllers such as Intel DC P3700 and Solidigm P44 Pro will + * timeout waiting for ready status to change after NVMe enable if the driver + * starts interacting with the device too soon after FLR. A 250ms delay after + * FLR has heuristically proven to produce reliably working results for device + * assignment cases. */ static int delay_250ms_after_flr(struct pci_dev *dev, bool probe) { @@ -4101,6 +4102,7 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { { PCI_VENDOR_ID_SAMSUNG, 0xa804, nvme_disable_and_flr }, { PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr }, { PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr }, + { PCI_VENDOR_ID_SOLIDIGM, 0xf1ac, delay_250ms_after_flr }, { PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID, reset_chelsio_generic_dev }, { PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 73cc1e7dd15ad..9e9794d03c9fc 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -158,6 +158,8 @@ #define PCI_VENDOR_ID_LOONGSON 0x0014 +#define PCI_VENDOR_ID_SOLIDIGM 0x025e + #define PCI_VENDOR_ID_TTTECH 0x0357 #define PCI_DEVICE_ID_TTTECH_MC322 0x000a -- GitLab From 4b7ed2400e0d895a9920c80d63646728bb389baa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 21:35:38 +0200 Subject: [PATCH 1808/2290] x86/quirks: Include linux/pnp.h for arch_pnpbios_disabled() [ Upstream commit 056b44a4d10907ec8153863b2a0564e808ef1440 ] arch_pnpbios_disabled() is defined in architecture code on x86, but this does not include the appropriate header, causing a warning: arch/x86/kernel/platform-quirks.c:42:13: error: no previous prototype for 'arch_pnpbios_disabled' [-Werror=missing-prototypes] Signed-off-by: Arnd Bergmann Signed-off-by: Dave Hansen Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/all/20230516193549.544673-10-arnd%40kernel.org Signed-off-by: Sasha Levin --- arch/x86/kernel/platform-quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index b348a672f71d5..b525fe6d66571 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include -- GitLab From 932a7651002be85bada95f0277b9ec5d6aa6d687 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 3 Feb 2023 15:57:59 +0200 Subject: [PATCH 1809/2290] thunderbolt: Log function name of the called quirk [ Upstream commit f14d177e0be652ef7b265753f08f2a7d31935668 ] This is useful when debugging whether a quirk has been matched or not. Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/thunderbolt/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index 638cb5fb22c11..13719a851c719 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -130,6 +130,7 @@ void tb_check_quirks(struct tb_switch *sw) if (q->device && q->device != sw->device) continue; + tb_sw_dbg(sw, "running %ps\n", q->hook); q->hook(sw); } } -- GitLab From caa7ff1d7d514a668fb6e52ef50cbee17a6f4fc1 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 27 Feb 2023 12:45:09 +0200 Subject: [PATCH 1810/2290] thunderbolt: Add debug log for link controller power quirk [ Upstream commit ccdb0900a0c3b0b56af5f547cceb64ee8d09483f ] Add a debug log to this quirk as well so we can see what quirks have been applied when debugging. Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/thunderbolt/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index 13719a851c719..e81de9c30eac9 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -10,6 +10,7 @@ static void quirk_force_power_link(struct tb_switch *sw) { sw->quirks |= QUIRK_FORCE_POWER_LINK_CONTROLLER; + tb_sw_dbg(sw, "forcing power to link controller\n"); } static void quirk_dp_credit_allocation(struct tb_switch *sw) -- GitLab From 89a9196aec6b666bd69d571a82569b24baf9d66c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 11 Jun 2023 18:19:23 +0100 Subject: [PATCH 1811/2290] PCI: Execute quirk_enable_clear_retrain_link() earlier [ Upstream commit 07a8d698de50c4740ac6f709c43e23a6da6e4dbc ] Make quirk_enable_clear_retrain_link() an early quirk so that any later fixups can rely on dev->clear_retrain_link to have been already initialised. [bhelgaas: reorder to just before it becomes possible to call pcie_retrain_link() earlier] Link: https://lore.kernel.org/r/alpine.DEB.2.21.2305310049000.59226@angie.orcam.me.uk Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 92169dc71468e..3959ea7b106b6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2425,9 +2425,9 @@ static void quirk_enable_clear_retrain_link(struct pci_dev *dev) dev->clear_retrain_link = 1; pci_info(dev, "Enable PCIe Retrain Link quirk\n"); } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe110, quirk_enable_clear_retrain_link); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe111, quirk_enable_clear_retrain_link); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe130, quirk_enable_clear_retrain_link); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe110, quirk_enable_clear_retrain_link); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe111, quirk_enable_clear_retrain_link); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe130, quirk_enable_clear_retrain_link); static void fixup_rev1_53c810(struct pci_dev *dev) { -- GitLab From 87709f7ecdb884ab75e3de6f951dffe500ee95b2 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Fri, 23 Jun 2023 17:00:02 -0700 Subject: [PATCH 1812/2290] PCI: switchtec: Use normal comment style [ Upstream commit 846691f5483d61259db2f4d6a3dce8b98d518794 ] Use normal comment style '/* */' for device ID description. Link: https://lore.kernel.org/r/20230624000003.2315364-2-kelvin.cao@microchip.com Signed-off-by: Kelvin Cao Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe Stable-dep-of: 0fb53e64705a ("PCI: switchtec: Add support for PCIe Gen5 devices") Signed-off-by: Sasha Levin --- drivers/pci/switch/switchtec.c | 114 ++++++++++++++++----------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index d05a482639e3c..f0322e9dbee93 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1740,63 +1740,63 @@ static void switchtec_pci_remove(struct pci_dev *pdev) } static const struct pci_device_id switchtec_pci_tbl[] = { - SWITCHTEC_PCI_DEVICE(0x8531, SWITCHTEC_GEN3), //PFX 24xG3 - SWITCHTEC_PCI_DEVICE(0x8532, SWITCHTEC_GEN3), //PFX 32xG3 - SWITCHTEC_PCI_DEVICE(0x8533, SWITCHTEC_GEN3), //PFX 48xG3 - SWITCHTEC_PCI_DEVICE(0x8534, SWITCHTEC_GEN3), //PFX 64xG3 - SWITCHTEC_PCI_DEVICE(0x8535, SWITCHTEC_GEN3), //PFX 80xG3 - SWITCHTEC_PCI_DEVICE(0x8536, SWITCHTEC_GEN3), //PFX 96xG3 - SWITCHTEC_PCI_DEVICE(0x8541, SWITCHTEC_GEN3), //PSX 24xG3 - SWITCHTEC_PCI_DEVICE(0x8542, SWITCHTEC_GEN3), //PSX 32xG3 - SWITCHTEC_PCI_DEVICE(0x8543, SWITCHTEC_GEN3), //PSX 48xG3 - SWITCHTEC_PCI_DEVICE(0x8544, SWITCHTEC_GEN3), //PSX 64xG3 - SWITCHTEC_PCI_DEVICE(0x8545, SWITCHTEC_GEN3), //PSX 80xG3 - SWITCHTEC_PCI_DEVICE(0x8546, SWITCHTEC_GEN3), //PSX 96xG3 - SWITCHTEC_PCI_DEVICE(0x8551, SWITCHTEC_GEN3), //PAX 24XG3 - SWITCHTEC_PCI_DEVICE(0x8552, SWITCHTEC_GEN3), //PAX 32XG3 - SWITCHTEC_PCI_DEVICE(0x8553, SWITCHTEC_GEN3), //PAX 48XG3 - SWITCHTEC_PCI_DEVICE(0x8554, SWITCHTEC_GEN3), //PAX 64XG3 - SWITCHTEC_PCI_DEVICE(0x8555, SWITCHTEC_GEN3), //PAX 80XG3 - SWITCHTEC_PCI_DEVICE(0x8556, SWITCHTEC_GEN3), //PAX 96XG3 - SWITCHTEC_PCI_DEVICE(0x8561, SWITCHTEC_GEN3), //PFXL 24XG3 - SWITCHTEC_PCI_DEVICE(0x8562, SWITCHTEC_GEN3), //PFXL 32XG3 - SWITCHTEC_PCI_DEVICE(0x8563, SWITCHTEC_GEN3), //PFXL 48XG3 - SWITCHTEC_PCI_DEVICE(0x8564, SWITCHTEC_GEN3), //PFXL 64XG3 - SWITCHTEC_PCI_DEVICE(0x8565, SWITCHTEC_GEN3), //PFXL 80XG3 - SWITCHTEC_PCI_DEVICE(0x8566, SWITCHTEC_GEN3), //PFXL 96XG3 - SWITCHTEC_PCI_DEVICE(0x8571, SWITCHTEC_GEN3), //PFXI 24XG3 - SWITCHTEC_PCI_DEVICE(0x8572, SWITCHTEC_GEN3), //PFXI 32XG3 - SWITCHTEC_PCI_DEVICE(0x8573, SWITCHTEC_GEN3), //PFXI 48XG3 - SWITCHTEC_PCI_DEVICE(0x8574, SWITCHTEC_GEN3), //PFXI 64XG3 - SWITCHTEC_PCI_DEVICE(0x8575, SWITCHTEC_GEN3), //PFXI 80XG3 - SWITCHTEC_PCI_DEVICE(0x8576, SWITCHTEC_GEN3), //PFXI 96XG3 - SWITCHTEC_PCI_DEVICE(0x4000, SWITCHTEC_GEN4), //PFX 100XG4 - SWITCHTEC_PCI_DEVICE(0x4084, SWITCHTEC_GEN4), //PFX 84XG4 - SWITCHTEC_PCI_DEVICE(0x4068, SWITCHTEC_GEN4), //PFX 68XG4 - SWITCHTEC_PCI_DEVICE(0x4052, SWITCHTEC_GEN4), //PFX 52XG4 - SWITCHTEC_PCI_DEVICE(0x4036, SWITCHTEC_GEN4), //PFX 36XG4 - SWITCHTEC_PCI_DEVICE(0x4028, SWITCHTEC_GEN4), //PFX 28XG4 - SWITCHTEC_PCI_DEVICE(0x4100, SWITCHTEC_GEN4), //PSX 100XG4 - SWITCHTEC_PCI_DEVICE(0x4184, SWITCHTEC_GEN4), //PSX 84XG4 - SWITCHTEC_PCI_DEVICE(0x4168, SWITCHTEC_GEN4), //PSX 68XG4 - SWITCHTEC_PCI_DEVICE(0x4152, SWITCHTEC_GEN4), //PSX 52XG4 - SWITCHTEC_PCI_DEVICE(0x4136, SWITCHTEC_GEN4), //PSX 36XG4 - SWITCHTEC_PCI_DEVICE(0x4128, SWITCHTEC_GEN4), //PSX 28XG4 - SWITCHTEC_PCI_DEVICE(0x4200, SWITCHTEC_GEN4), //PAX 100XG4 - SWITCHTEC_PCI_DEVICE(0x4284, SWITCHTEC_GEN4), //PAX 84XG4 - SWITCHTEC_PCI_DEVICE(0x4268, SWITCHTEC_GEN4), //PAX 68XG4 - SWITCHTEC_PCI_DEVICE(0x4252, SWITCHTEC_GEN4), //PAX 52XG4 - SWITCHTEC_PCI_DEVICE(0x4236, SWITCHTEC_GEN4), //PAX 36XG4 - SWITCHTEC_PCI_DEVICE(0x4228, SWITCHTEC_GEN4), //PAX 28XG4 - SWITCHTEC_PCI_DEVICE(0x4352, SWITCHTEC_GEN4), //PFXA 52XG4 - SWITCHTEC_PCI_DEVICE(0x4336, SWITCHTEC_GEN4), //PFXA 36XG4 - SWITCHTEC_PCI_DEVICE(0x4328, SWITCHTEC_GEN4), //PFXA 28XG4 - SWITCHTEC_PCI_DEVICE(0x4452, SWITCHTEC_GEN4), //PSXA 52XG4 - SWITCHTEC_PCI_DEVICE(0x4436, SWITCHTEC_GEN4), //PSXA 36XG4 - SWITCHTEC_PCI_DEVICE(0x4428, SWITCHTEC_GEN4), //PSXA 28XG4 - SWITCHTEC_PCI_DEVICE(0x4552, SWITCHTEC_GEN4), //PAXA 52XG4 - SWITCHTEC_PCI_DEVICE(0x4536, SWITCHTEC_GEN4), //PAXA 36XG4 - SWITCHTEC_PCI_DEVICE(0x4528, SWITCHTEC_GEN4), //PAXA 28XG4 + SWITCHTEC_PCI_DEVICE(0x8531, SWITCHTEC_GEN3), /* PFX 24xG3 */ + SWITCHTEC_PCI_DEVICE(0x8532, SWITCHTEC_GEN3), /* PFX 32xG3 */ + SWITCHTEC_PCI_DEVICE(0x8533, SWITCHTEC_GEN3), /* PFX 48xG3 */ + SWITCHTEC_PCI_DEVICE(0x8534, SWITCHTEC_GEN3), /* PFX 64xG3 */ + SWITCHTEC_PCI_DEVICE(0x8535, SWITCHTEC_GEN3), /* PFX 80xG3 */ + SWITCHTEC_PCI_DEVICE(0x8536, SWITCHTEC_GEN3), /* PFX 96xG3 */ + SWITCHTEC_PCI_DEVICE(0x8541, SWITCHTEC_GEN3), /* PSX 24xG3 */ + SWITCHTEC_PCI_DEVICE(0x8542, SWITCHTEC_GEN3), /* PSX 32xG3 */ + SWITCHTEC_PCI_DEVICE(0x8543, SWITCHTEC_GEN3), /* PSX 48xG3 */ + SWITCHTEC_PCI_DEVICE(0x8544, SWITCHTEC_GEN3), /* PSX 64xG3 */ + SWITCHTEC_PCI_DEVICE(0x8545, SWITCHTEC_GEN3), /* PSX 80xG3 */ + SWITCHTEC_PCI_DEVICE(0x8546, SWITCHTEC_GEN3), /* PSX 96xG3 */ + SWITCHTEC_PCI_DEVICE(0x8551, SWITCHTEC_GEN3), /* PAX 24XG3 */ + SWITCHTEC_PCI_DEVICE(0x8552, SWITCHTEC_GEN3), /* PAX 32XG3 */ + SWITCHTEC_PCI_DEVICE(0x8553, SWITCHTEC_GEN3), /* PAX 48XG3 */ + SWITCHTEC_PCI_DEVICE(0x8554, SWITCHTEC_GEN3), /* PAX 64XG3 */ + SWITCHTEC_PCI_DEVICE(0x8555, SWITCHTEC_GEN3), /* PAX 80XG3 */ + SWITCHTEC_PCI_DEVICE(0x8556, SWITCHTEC_GEN3), /* PAX 96XG3 */ + SWITCHTEC_PCI_DEVICE(0x8561, SWITCHTEC_GEN3), /* PFXL 24XG3 */ + SWITCHTEC_PCI_DEVICE(0x8562, SWITCHTEC_GEN3), /* PFXL 32XG3 */ + SWITCHTEC_PCI_DEVICE(0x8563, SWITCHTEC_GEN3), /* PFXL 48XG3 */ + SWITCHTEC_PCI_DEVICE(0x8564, SWITCHTEC_GEN3), /* PFXL 64XG3 */ + SWITCHTEC_PCI_DEVICE(0x8565, SWITCHTEC_GEN3), /* PFXL 80XG3 */ + SWITCHTEC_PCI_DEVICE(0x8566, SWITCHTEC_GEN3), /* PFXL 96XG3 */ + SWITCHTEC_PCI_DEVICE(0x8571, SWITCHTEC_GEN3), /* PFXI 24XG3 */ + SWITCHTEC_PCI_DEVICE(0x8572, SWITCHTEC_GEN3), /* PFXI 32XG3 */ + SWITCHTEC_PCI_DEVICE(0x8573, SWITCHTEC_GEN3), /* PFXI 48XG3 */ + SWITCHTEC_PCI_DEVICE(0x8574, SWITCHTEC_GEN3), /* PFXI 64XG3 */ + SWITCHTEC_PCI_DEVICE(0x8575, SWITCHTEC_GEN3), /* PFXI 80XG3 */ + SWITCHTEC_PCI_DEVICE(0x8576, SWITCHTEC_GEN3), /* PFXI 96XG3 */ + SWITCHTEC_PCI_DEVICE(0x4000, SWITCHTEC_GEN4), /* PFX 100XG4 */ + SWITCHTEC_PCI_DEVICE(0x4084, SWITCHTEC_GEN4), /* PFX 84XG4 */ + SWITCHTEC_PCI_DEVICE(0x4068, SWITCHTEC_GEN4), /* PFX 68XG4 */ + SWITCHTEC_PCI_DEVICE(0x4052, SWITCHTEC_GEN4), /* PFX 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4036, SWITCHTEC_GEN4), /* PFX 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4028, SWITCHTEC_GEN4), /* PFX 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4100, SWITCHTEC_GEN4), /* PSX 100XG4 */ + SWITCHTEC_PCI_DEVICE(0x4184, SWITCHTEC_GEN4), /* PSX 84XG4 */ + SWITCHTEC_PCI_DEVICE(0x4168, SWITCHTEC_GEN4), /* PSX 68XG4 */ + SWITCHTEC_PCI_DEVICE(0x4152, SWITCHTEC_GEN4), /* PSX 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4136, SWITCHTEC_GEN4), /* PSX 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4128, SWITCHTEC_GEN4), /* PSX 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4200, SWITCHTEC_GEN4), /* PAX 100XG4 */ + SWITCHTEC_PCI_DEVICE(0x4284, SWITCHTEC_GEN4), /* PAX 84XG4 */ + SWITCHTEC_PCI_DEVICE(0x4268, SWITCHTEC_GEN4), /* PAX 68XG4 */ + SWITCHTEC_PCI_DEVICE(0x4252, SWITCHTEC_GEN4), /* PAX 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4236, SWITCHTEC_GEN4), /* PAX 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4228, SWITCHTEC_GEN4), /* PAX 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4352, SWITCHTEC_GEN4), /* PFXA 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4336, SWITCHTEC_GEN4), /* PFXA 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4328, SWITCHTEC_GEN4), /* PFXA 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4452, SWITCHTEC_GEN4), /* PSXA 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4436, SWITCHTEC_GEN4), /* PSXA 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4428, SWITCHTEC_GEN4), /* PSXA 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4552, SWITCHTEC_GEN4), /* PAXA 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4536, SWITCHTEC_GEN4), /* PAXA 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4528, SWITCHTEC_GEN4), /* PAXA 28XG4 */ {0} }; MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl); -- GitLab From bbdfa144820566e142b90018cab7b8214c54e418 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Fri, 23 Jun 2023 17:00:03 -0700 Subject: [PATCH 1813/2290] PCI: switchtec: Add support for PCIe Gen5 devices [ Upstream commit 0fb53e64705ae0fabd9593102e0f0e6812968802 ] Advertise support of Gen5 devices in the driver's device ID table and add the same IDs for the switchtec quirks. Also update driver code to accommodate them. Link: https://lore.kernel.org/r/20230624000003.2315364-3-kelvin.cao@microchip.com Signed-off-by: Kelvin Cao Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 36 ++++++++++++++++++++++++++++ drivers/pci/switch/switchtec.c | 44 ++++++++++++++++++++++++++++++---- include/linux/switchtec.h | 1 + 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 3959ea7b106b6..47099d00fcf1d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5818,6 +5818,42 @@ SWITCHTEC_QUIRK(0x4428); /* PSXA 28XG4 */ SWITCHTEC_QUIRK(0x4552); /* PAXA 52XG4 */ SWITCHTEC_QUIRK(0x4536); /* PAXA 36XG4 */ SWITCHTEC_QUIRK(0x4528); /* PAXA 28XG4 */ +SWITCHTEC_QUIRK(0x5000); /* PFX 100XG5 */ +SWITCHTEC_QUIRK(0x5084); /* PFX 84XG5 */ +SWITCHTEC_QUIRK(0x5068); /* PFX 68XG5 */ +SWITCHTEC_QUIRK(0x5052); /* PFX 52XG5 */ +SWITCHTEC_QUIRK(0x5036); /* PFX 36XG5 */ +SWITCHTEC_QUIRK(0x5028); /* PFX 28XG5 */ +SWITCHTEC_QUIRK(0x5100); /* PSX 100XG5 */ +SWITCHTEC_QUIRK(0x5184); /* PSX 84XG5 */ +SWITCHTEC_QUIRK(0x5168); /* PSX 68XG5 */ +SWITCHTEC_QUIRK(0x5152); /* PSX 52XG5 */ +SWITCHTEC_QUIRK(0x5136); /* PSX 36XG5 */ +SWITCHTEC_QUIRK(0x5128); /* PSX 28XG5 */ +SWITCHTEC_QUIRK(0x5200); /* PAX 100XG5 */ +SWITCHTEC_QUIRK(0x5284); /* PAX 84XG5 */ +SWITCHTEC_QUIRK(0x5268); /* PAX 68XG5 */ +SWITCHTEC_QUIRK(0x5252); /* PAX 52XG5 */ +SWITCHTEC_QUIRK(0x5236); /* PAX 36XG5 */ +SWITCHTEC_QUIRK(0x5228); /* PAX 28XG5 */ +SWITCHTEC_QUIRK(0x5300); /* PFXA 100XG5 */ +SWITCHTEC_QUIRK(0x5384); /* PFXA 84XG5 */ +SWITCHTEC_QUIRK(0x5368); /* PFXA 68XG5 */ +SWITCHTEC_QUIRK(0x5352); /* PFXA 52XG5 */ +SWITCHTEC_QUIRK(0x5336); /* PFXA 36XG5 */ +SWITCHTEC_QUIRK(0x5328); /* PFXA 28XG5 */ +SWITCHTEC_QUIRK(0x5400); /* PSXA 100XG5 */ +SWITCHTEC_QUIRK(0x5484); /* PSXA 84XG5 */ +SWITCHTEC_QUIRK(0x5468); /* PSXA 68XG5 */ +SWITCHTEC_QUIRK(0x5452); /* PSXA 52XG5 */ +SWITCHTEC_QUIRK(0x5436); /* PSXA 36XG5 */ +SWITCHTEC_QUIRK(0x5428); /* PSXA 28XG5 */ +SWITCHTEC_QUIRK(0x5500); /* PAXA 100XG5 */ +SWITCHTEC_QUIRK(0x5584); /* PAXA 84XG5 */ +SWITCHTEC_QUIRK(0x5568); /* PAXA 68XG5 */ +SWITCHTEC_QUIRK(0x5552); /* PAXA 52XG5 */ +SWITCHTEC_QUIRK(0x5536); /* PAXA 36XG5 */ +SWITCHTEC_QUIRK(0x5528); /* PAXA 28XG5 */ /* * The PLX NTB uses devfn proxy IDs to move TLPs between NT endpoints. diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index f0322e9dbee93..332af6938d7fd 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -372,7 +372,7 @@ static ssize_t field ## _show(struct device *dev, \ if (stdev->gen == SWITCHTEC_GEN3) \ return io_string_show(buf, &si->gen3.field, \ sizeof(si->gen3.field)); \ - else if (stdev->gen == SWITCHTEC_GEN4) \ + else if (stdev->gen >= SWITCHTEC_GEN4) \ return io_string_show(buf, &si->gen4.field, \ sizeof(si->gen4.field)); \ else \ @@ -663,7 +663,7 @@ static int ioctl_flash_info(struct switchtec_dev *stdev, if (stdev->gen == SWITCHTEC_GEN3) { info.flash_length = ioread32(&fi->gen3.flash_length); info.num_partitions = SWITCHTEC_NUM_PARTITIONS_GEN3; - } else if (stdev->gen == SWITCHTEC_GEN4) { + } else if (stdev->gen >= SWITCHTEC_GEN4) { info.flash_length = ioread32(&fi->gen4.flash_length); info.num_partitions = SWITCHTEC_NUM_PARTITIONS_GEN4; } else { @@ -870,7 +870,7 @@ static int ioctl_flash_part_info(struct switchtec_dev *stdev, ret = flash_part_info_gen3(stdev, &info); if (ret) return ret; - } else if (stdev->gen == SWITCHTEC_GEN4) { + } else if (stdev->gen >= SWITCHTEC_GEN4) { ret = flash_part_info_gen4(stdev, &info); if (ret) return ret; @@ -1606,7 +1606,7 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, if (stdev->gen == SWITCHTEC_GEN3) part_id = &stdev->mmio_sys_info->gen3.partition_id; - else if (stdev->gen == SWITCHTEC_GEN4) + else if (stdev->gen >= SWITCHTEC_GEN4) part_id = &stdev->mmio_sys_info->gen4.partition_id; else return -EOPNOTSUPP; @@ -1797,6 +1797,42 @@ static const struct pci_device_id switchtec_pci_tbl[] = { SWITCHTEC_PCI_DEVICE(0x4552, SWITCHTEC_GEN4), /* PAXA 52XG4 */ SWITCHTEC_PCI_DEVICE(0x4536, SWITCHTEC_GEN4), /* PAXA 36XG4 */ SWITCHTEC_PCI_DEVICE(0x4528, SWITCHTEC_GEN4), /* PAXA 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x5000, SWITCHTEC_GEN5), /* PFX 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5084, SWITCHTEC_GEN5), /* PFX 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5068, SWITCHTEC_GEN5), /* PFX 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5052, SWITCHTEC_GEN5), /* PFX 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5036, SWITCHTEC_GEN5), /* PFX 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5028, SWITCHTEC_GEN5), /* PFX 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5100, SWITCHTEC_GEN5), /* PSX 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5184, SWITCHTEC_GEN5), /* PSX 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5168, SWITCHTEC_GEN5), /* PSX 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5152, SWITCHTEC_GEN5), /* PSX 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5136, SWITCHTEC_GEN5), /* PSX 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5128, SWITCHTEC_GEN5), /* PSX 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5200, SWITCHTEC_GEN5), /* PAX 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5284, SWITCHTEC_GEN5), /* PAX 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5268, SWITCHTEC_GEN5), /* PAX 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5252, SWITCHTEC_GEN5), /* PAX 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5236, SWITCHTEC_GEN5), /* PAX 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5228, SWITCHTEC_GEN5), /* PAX 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5300, SWITCHTEC_GEN5), /* PFXA 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5384, SWITCHTEC_GEN5), /* PFXA 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5368, SWITCHTEC_GEN5), /* PFXA 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5352, SWITCHTEC_GEN5), /* PFXA 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5336, SWITCHTEC_GEN5), /* PFXA 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5328, SWITCHTEC_GEN5), /* PFXA 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5400, SWITCHTEC_GEN5), /* PSXA 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5484, SWITCHTEC_GEN5), /* PSXA 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5468, SWITCHTEC_GEN5), /* PSXA 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5452, SWITCHTEC_GEN5), /* PSXA 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5436, SWITCHTEC_GEN5), /* PSXA 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5428, SWITCHTEC_GEN5), /* PSXA 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5500, SWITCHTEC_GEN5), /* PAXA 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5584, SWITCHTEC_GEN5), /* PAXA 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5568, SWITCHTEC_GEN5), /* PAXA 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5552, SWITCHTEC_GEN5), /* PAXA 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5536, SWITCHTEC_GEN5), /* PAXA 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5528, SWITCHTEC_GEN5), /* PAXA 28XG5 */ {0} }; MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl); diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 48fabe36509ee..8d8fac1626bd9 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -41,6 +41,7 @@ enum { enum switchtec_gen { SWITCHTEC_GEN3, SWITCHTEC_GEN4, + SWITCHTEC_GEN5, }; struct mrpc_regs { -- GitLab From 4d74cb9cdfd51e6d854f8da54113e716335cb2ca Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 15 Sep 2023 03:02:16 +0930 Subject: [PATCH 1814/2290] ALSA: scarlett2: Move USB IDs out from device_info struct [ Upstream commit d98cc489029dba4d99714c2e8ec4f5ba249f6851 ] By moving the USB IDs from the device_info struct into scarlett2_devices[], that will allow for devices with different USB IDs to share the same device_info. Tested-by: Philippe Perrot Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/8263368e8d49e6fcebc709817bd82ab79b404468.1694705811.git.g@b4.vu Signed-off-by: Takashi Iwai Stable-dep-of: b9a98cdd3ac7 ("ALSA: scarlett2: Add support for Clarett 8Pre USB") Signed-off-by: Sasha Levin --- sound/usb/mixer_scarlett_gen2.c | 63 ++++++++++++--------------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 1bcb05c73e0ad..2668bc1b918ba 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -317,8 +317,6 @@ struct scarlett2_mux_entry { }; struct scarlett2_device_info { - u32 usb_id; /* USB device identifier */ - /* Gen 3 devices have an internal MSD mode switch that needs * to be disabled in order to access the full functionality of * the device. @@ -440,8 +438,6 @@ struct scarlett2_data { /*** Model-specific data ***/ static const struct scarlett2_device_info s6i6_gen2_info = { - .usb_id = USB_ID(0x1235, 0x8203), - .config_set = SCARLETT2_CONFIG_SET_GEN_2, .level_input_count = 2, .pad_input_count = 2, @@ -486,8 +482,6 @@ static const struct scarlett2_device_info s6i6_gen2_info = { }; static const struct scarlett2_device_info s18i8_gen2_info = { - .usb_id = USB_ID(0x1235, 0x8204), - .config_set = SCARLETT2_CONFIG_SET_GEN_2, .level_input_count = 2, .pad_input_count = 4, @@ -535,8 +529,6 @@ static const struct scarlett2_device_info s18i8_gen2_info = { }; static const struct scarlett2_device_info s18i20_gen2_info = { - .usb_id = USB_ID(0x1235, 0x8201), - .config_set = SCARLETT2_CONFIG_SET_GEN_2, .line_out_hw_vol = 1, @@ -589,8 +581,6 @@ static const struct scarlett2_device_info s18i20_gen2_info = { }; static const struct scarlett2_device_info solo_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8211), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_NO_MIXER, .level_input_count = 1, @@ -602,8 +592,6 @@ static const struct scarlett2_device_info solo_gen3_info = { }; static const struct scarlett2_device_info s2i2_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8210), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_NO_MIXER, .level_input_count = 2, @@ -614,8 +602,6 @@ static const struct scarlett2_device_info s2i2_gen3_info = { }; static const struct scarlett2_device_info s4i4_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8212), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_GEN_3, .level_input_count = 2, @@ -660,8 +646,6 @@ static const struct scarlett2_device_info s4i4_gen3_info = { }; static const struct scarlett2_device_info s8i6_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8213), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_GEN_3, .level_input_count = 2, @@ -713,8 +697,6 @@ static const struct scarlett2_device_info s8i6_gen3_info = { }; static const struct scarlett2_device_info s18i8_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8214), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_GEN_3, .line_out_hw_vol = 1, @@ -783,8 +765,6 @@ static const struct scarlett2_device_info s18i8_gen3_info = { }; static const struct scarlett2_device_info s18i20_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8215), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_GEN_3, .line_out_hw_vol = 1, @@ -848,8 +828,6 @@ static const struct scarlett2_device_info s18i20_gen3_info = { }; static const struct scarlett2_device_info clarett_8pre_info = { - .usb_id = USB_ID(0x1235, 0x820c), - .config_set = SCARLETT2_CONFIG_SET_CLARETT, .line_out_hw_vol = 1, .level_input_count = 2, @@ -902,25 +880,30 @@ static const struct scarlett2_device_info clarett_8pre_info = { } }, }; -static const struct scarlett2_device_info *scarlett2_devices[] = { +struct scarlett2_device_entry { + const u32 usb_id; /* USB device identifier */ + const struct scarlett2_device_info *info; +}; + +static const struct scarlett2_device_entry scarlett2_devices[] = { /* Supported Gen 2 devices */ - &s6i6_gen2_info, - &s18i8_gen2_info, - &s18i20_gen2_info, + { USB_ID(0x1235, 0x8203), &s6i6_gen2_info }, + { USB_ID(0x1235, 0x8204), &s18i8_gen2_info }, + { USB_ID(0x1235, 0x8201), &s18i20_gen2_info }, /* Supported Gen 3 devices */ - &solo_gen3_info, - &s2i2_gen3_info, - &s4i4_gen3_info, - &s8i6_gen3_info, - &s18i8_gen3_info, - &s18i20_gen3_info, + { USB_ID(0x1235, 0x8211), &solo_gen3_info }, + { USB_ID(0x1235, 0x8210), &s2i2_gen3_info }, + { USB_ID(0x1235, 0x8212), &s4i4_gen3_info }, + { USB_ID(0x1235, 0x8213), &s8i6_gen3_info }, + { USB_ID(0x1235, 0x8214), &s18i8_gen3_info }, + { USB_ID(0x1235, 0x8215), &s18i20_gen3_info }, /* Supported Clarett+ devices */ - &clarett_8pre_info, + { USB_ID(0x1235, 0x820c), &clarett_8pre_info }, /* End of list */ - NULL + { 0, NULL }, }; /* get the starting port index number for a given port type/direction */ @@ -4149,17 +4132,17 @@ static int scarlett2_init_notify(struct usb_mixer_interface *mixer) static int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer) { - const struct scarlett2_device_info **info = scarlett2_devices; + const struct scarlett2_device_entry *entry = scarlett2_devices; int err; - /* Find device in scarlett2_devices */ - while (*info && (*info)->usb_id != mixer->chip->usb_id) - info++; - if (!*info) + /* Find entry in scarlett2_devices */ + while (entry->usb_id && entry->usb_id != mixer->chip->usb_id) + entry++; + if (!entry->usb_id) return -EINVAL; /* Initialise private data */ - err = scarlett2_init_private(mixer, *info); + err = scarlett2_init_private(mixer, entry->info); if (err < 0) return err; -- GitLab From e2b8480b709d48ca1723eed7258f26a0df9a1965 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 15 Sep 2023 03:02:37 +0930 Subject: [PATCH 1815/2290] ALSA: scarlett2: Add support for Clarett 8Pre USB [ Upstream commit b9a98cdd3ac7b80d8ea0f6acd81c88ad3d8bcb4a ] The Clarett 8Pre USB works the same as the Clarett+ 8Pre, only the USB ID is different. Tested-by: Philippe Perrot Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/e59f47b29e2037f031b56bde10474c6e96e31ba5.1694705811.git.g@b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_quirks.c | 1 + sound/usb/mixer_scarlett_gen2.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 1f32e3ae3aa31..b122d7aedb443 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3447,6 +3447,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x8213): /* Focusrite Scarlett 8i6 3rd Gen */ case USB_ID(0x1235, 0x8214): /* Focusrite Scarlett 18i8 3rd Gen */ case USB_ID(0x1235, 0x8215): /* Focusrite Scarlett 18i20 3rd Gen */ + case USB_ID(0x1235, 0x8208): /* Focusrite Clarett 8Pre USB */ case USB_ID(0x1235, 0x820c): /* Focusrite Clarett+ 8Pre */ err = snd_scarlett_gen2_init(mixer); break; diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 2668bc1b918ba..f949d22da382d 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -1,13 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Focusrite Scarlett Gen 2/3 and Clarett+ Driver for ALSA + * Focusrite Scarlett Gen 2/3 and Clarett USB/Clarett+ Driver for ALSA * * Supported models: * - 6i6/18i8/18i20 Gen 2 * - Solo/2i2/4i4/8i6/18i8/18i20 Gen 3 + * - Clarett 8Pre USB * - Clarett+ 8Pre * - * Copyright (c) 2018-2022 by Geoffrey D. Bennett + * Copyright (c) 2018-2023 by Geoffrey D. Bennett * Copyright (c) 2020-2021 by Vladimir Sadovnikov * Copyright (c) 2022 by Christian Colglazier * @@ -56,6 +57,9 @@ * Support for Clarett+ 8Pre added in Aug 2022 by Christian * Colglazier. * + * Support for Clarett 8Pre USB added in Sep 2023 (thanks to Philippe + * Perrot for confirmation). + * * This ALSA mixer gives access to (model-dependent): * - input, output, mixer-matrix muxes * - mixer-matrix gain stages @@ -899,7 +903,8 @@ static const struct scarlett2_device_entry scarlett2_devices[] = { { USB_ID(0x1235, 0x8214), &s18i8_gen3_info }, { USB_ID(0x1235, 0x8215), &s18i20_gen3_info }, - /* Supported Clarett+ devices */ + /* Supported Clarett USB/Clarett+ devices */ + { USB_ID(0x1235, 0x8208), &clarett_8pre_info }, { USB_ID(0x1235, 0x820c), &clarett_8pre_info }, /* End of list */ -- GitLab From 0f4048e1a0c6e9d3d31ce5b684600fd137cebfca Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 26 Sep 2023 15:25:32 +0200 Subject: [PATCH 1816/2290] ASoC: ti: Convert Pandora ASoC to GPIO descriptors [ Upstream commit 319e6ac143b9e9048e527ab9dd2aabb8fdf3d60f ] The Pandora uses GPIO descriptors pretty much exclusively, but not for ASoC, so let's fix it. Register the pins in a descriptor table in the machine since the ASoC device is not using device tree. Use static locals for the GPIO descriptors because I'm not able to experient with better state storage on any real hardware. Others using the Pandora can come afterwards and improve this. Signed-off-by: Linus Walleij Acked-by: Jarkko Nikula Link: https://lore.kernel.org/r/20230926-descriptors-asoc-ti-v1-4-60cf4f8adbc5@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/pdata-quirks.c | 10 +++++ sound/soc/ti/omap3pandora.c | 63 +++++++++++------------------- 2 files changed, 33 insertions(+), 40 deletions(-) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 9deba798cc919..44da1e14a3740 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -257,9 +257,19 @@ static struct platform_device pandora_backlight = { .id = -1, }; +static struct gpiod_lookup_table pandora_soc_audio_gpios = { + .dev_id = "soc-audio", + .table = { + GPIO_LOOKUP("gpio-112-127", 6, "dac", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-0-15", 14, "amp", GPIO_ACTIVE_HIGH), + { } + }, +}; + static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); + gpiod_add_lookup_table(&pandora_soc_audio_gpios); } #endif /* CONFIG_ARCH_OMAP3 */ diff --git a/sound/soc/ti/omap3pandora.c b/sound/soc/ti/omap3pandora.c index a287e9747c2a1..fa92ed97dfe3b 100644 --- a/sound/soc/ti/omap3pandora.c +++ b/sound/soc/ti/omap3pandora.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include @@ -21,12 +21,11 @@ #include "omap-mcbsp.h" -#define OMAP3_PANDORA_DAC_POWER_GPIO 118 -#define OMAP3_PANDORA_AMP_POWER_GPIO 14 - #define PREFIX "ASoC omap3pandora: " static struct regulator *omap3pandora_dac_reg; +static struct gpio_desc *dac_power_gpio; +static struct gpio_desc *amp_power_gpio; static int omap3pandora_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -78,9 +77,9 @@ static int omap3pandora_dac_event(struct snd_soc_dapm_widget *w, return ret; } mdelay(1); - gpio_set_value(OMAP3_PANDORA_DAC_POWER_GPIO, 1); + gpiod_set_value(dac_power_gpio, 1); } else { - gpio_set_value(OMAP3_PANDORA_DAC_POWER_GPIO, 0); + gpiod_set_value(dac_power_gpio, 0); mdelay(1); regulator_disable(omap3pandora_dac_reg); } @@ -92,9 +91,9 @@ static int omap3pandora_hp_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event) { if (SND_SOC_DAPM_EVENT_ON(event)) - gpio_set_value(OMAP3_PANDORA_AMP_POWER_GPIO, 1); + gpiod_set_value(amp_power_gpio, 1); else - gpio_set_value(OMAP3_PANDORA_AMP_POWER_GPIO, 0); + gpiod_set_value(amp_power_gpio, 0); return 0; } @@ -229,35 +228,10 @@ static int __init omap3pandora_soc_init(void) pr_info("OMAP3 Pandora SoC init\n"); - ret = gpio_request(OMAP3_PANDORA_DAC_POWER_GPIO, "dac_power"); - if (ret) { - pr_err(PREFIX "Failed to get DAC power GPIO\n"); - return ret; - } - - ret = gpio_direction_output(OMAP3_PANDORA_DAC_POWER_GPIO, 0); - if (ret) { - pr_err(PREFIX "Failed to set DAC power GPIO direction\n"); - goto fail0; - } - - ret = gpio_request(OMAP3_PANDORA_AMP_POWER_GPIO, "amp_power"); - if (ret) { - pr_err(PREFIX "Failed to get amp power GPIO\n"); - goto fail0; - } - - ret = gpio_direction_output(OMAP3_PANDORA_AMP_POWER_GPIO, 0); - if (ret) { - pr_err(PREFIX "Failed to set amp power GPIO direction\n"); - goto fail1; - } - omap3pandora_snd_device = platform_device_alloc("soc-audio", -1); if (omap3pandora_snd_device == NULL) { pr_err(PREFIX "Platform device allocation failed\n"); - ret = -ENOMEM; - goto fail1; + return -ENOMEM; } platform_set_drvdata(omap3pandora_snd_device, &snd_soc_card_omap3pandora); @@ -268,6 +242,20 @@ static int __init omap3pandora_soc_init(void) goto fail2; } + dac_power_gpio = devm_gpiod_get(&omap3pandora_snd_device->dev, + "dac", GPIOD_OUT_LOW); + if (IS_ERR(dac_power_gpio)) { + ret = PTR_ERR(dac_power_gpio); + goto fail3; + } + + amp_power_gpio = devm_gpiod_get(&omap3pandora_snd_device->dev, + "amp", GPIOD_OUT_LOW); + if (IS_ERR(amp_power_gpio)) { + ret = PTR_ERR(amp_power_gpio); + goto fail3; + } + omap3pandora_dac_reg = regulator_get(&omap3pandora_snd_device->dev, "vcc"); if (IS_ERR(omap3pandora_dac_reg)) { pr_err(PREFIX "Failed to get DAC regulator from %s: %ld\n", @@ -283,10 +271,7 @@ fail3: platform_device_del(omap3pandora_snd_device); fail2: platform_device_put(omap3pandora_snd_device); -fail1: - gpio_free(OMAP3_PANDORA_AMP_POWER_GPIO); -fail0: - gpio_free(OMAP3_PANDORA_DAC_POWER_GPIO); + return ret; } module_init(omap3pandora_soc_init); @@ -295,8 +280,6 @@ static void __exit omap3pandora_soc_exit(void) { regulator_put(omap3pandora_dac_reg); platform_device_unregister(omap3pandora_snd_device); - gpio_free(OMAP3_PANDORA_AMP_POWER_GPIO); - gpio_free(OMAP3_PANDORA_DAC_POWER_GPIO); } module_exit(omap3pandora_soc_exit); -- GitLab From b928cde9c07cf031a9a5bffbfb94048f340d34ee Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 15 Sep 2023 03:01:57 +0930 Subject: [PATCH 1817/2290] ALSA: scarlett2: Default mixer driver to enabled [ Upstream commit bc83058f598757a908b30f8f536338cb1478ab5b ] Early versions of this mixer driver did not work on all hardware, so out of caution the driver was disabled by default and had to be explicitly enabled with device_setup=1. Since commit 764fa6e686e0 ("ALSA: usb-audio: scarlett2: Fix device hang with ehci-pci") no more problems of this nature have been reported. Therefore, enable the driver by default but provide a new device_setup option to disable the driver in case that is needed. - device_setup value of 0 now means "enable" rather than "disable". - device_setup value of 1 is now ignored. - device_setup value of 4 now means "disable". Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/89600a35b40307f2766578ad1ca2f21801286b58.1694705811.git.g@b4.vu Signed-off-by: Takashi Iwai Stable-dep-of: b61a3acada00 ("ALSA: scarlett2: Add Focusrite Clarett+ 2Pre and 4Pre support") Signed-off-by: Sasha Levin --- sound/usb/mixer_scarlett_gen2.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index f949d22da382d..2bd46fe91394d 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -145,12 +145,12 @@ #include "mixer_scarlett_gen2.h" -/* device_setup value to enable */ -#define SCARLETT2_ENABLE 0x01 - /* device_setup value to allow turning MSD mode back on */ #define SCARLETT2_MSD_ENABLE 0x02 +/* device_setup value to disable this mixer driver */ +#define SCARLETT2_DISABLE 0x04 + /* some gui mixers can't handle negative ctl values */ #define SCARLETT2_VOLUME_BIAS 127 @@ -4237,19 +4237,20 @@ int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) if (!mixer->protocol) return 0; - if (!(chip->setup & SCARLETT2_ENABLE)) { + if (chip->setup & SCARLETT2_DISABLE) { usb_audio_info(chip, - "Focusrite Scarlett Gen 2/3 Mixer Driver disabled; " - "use options snd_usb_audio vid=0x%04x pid=0x%04x " - "device_setup=1 to enable and report any issues " - "to g@b4.vu", + "Focusrite Scarlett Gen 2/3 Mixer Driver disabled " + "by modprobe options (snd_usb_audio " + "vid=0x%04x pid=0x%04x device_setup=%d)\n", USB_ID_VENDOR(chip->usb_id), - USB_ID_PRODUCT(chip->usb_id)); + USB_ID_PRODUCT(chip->usb_id), + SCARLETT2_DISABLE); return 0; } usb_audio_info(chip, - "Focusrite Scarlett Gen 2/3 Mixer Driver enabled pid=0x%04x", + "Focusrite Scarlett Gen 2/3 Mixer Driver enabled (pid=0x%04x); " + "report any issues to g@b4.vu", USB_ID_PRODUCT(chip->usb_id)); err = snd_scarlett_gen2_controls_create(mixer); -- GitLab From 7c02a4a6ccea9a2ab98a07ab4a5f9743877deccd Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 15 Sep 2023 03:03:03 +0930 Subject: [PATCH 1818/2290] ALSA: scarlett2: Add correct product series name to messages [ Upstream commit 6e743781d62e28f5fa095e5f31f878819622c143 ] This driver was originally developed for the Focusrite Scarlett Gen 2 series, but now also supports the Scarlett Gen 3 series, the Clarett 8Pre USB, and the Clarett+ 8Pre. The messages output by the driver on initialisation and error include the identifying text "Scarlett Gen 2/3", but this is no longer accurate, and writing "Scarlett Gen 2/3/Clarett USB/Clarett+" would be unwieldy. Add series_name field to the scarlett2_device_entry struct so that concise and accurate messages can be output. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/3774b9d35bf1fbdd6fdad9f3f4f97e9b82ac76bf.1694705811.git.g@b4.vu Signed-off-by: Takashi Iwai Stable-dep-of: b61a3acada00 ("ALSA: scarlett2: Add Focusrite Clarett+ 2Pre and 4Pre support") Signed-off-by: Sasha Levin --- sound/usb/mixer_scarlett_gen2.c | 81 ++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 2bd46fe91394d..328a593aceaa9 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -391,6 +391,7 @@ struct scarlett2_data { struct mutex data_mutex; /* lock access to this data */ struct delayed_work work; const struct scarlett2_device_info *info; + const char *series_name; __u8 bInterfaceNumber; __u8 bEndpointAddress; __u16 wMaxPacketSize; @@ -887,25 +888,26 @@ static const struct scarlett2_device_info clarett_8pre_info = { struct scarlett2_device_entry { const u32 usb_id; /* USB device identifier */ const struct scarlett2_device_info *info; + const char *series_name; }; static const struct scarlett2_device_entry scarlett2_devices[] = { /* Supported Gen 2 devices */ - { USB_ID(0x1235, 0x8203), &s6i6_gen2_info }, - { USB_ID(0x1235, 0x8204), &s18i8_gen2_info }, - { USB_ID(0x1235, 0x8201), &s18i20_gen2_info }, + { USB_ID(0x1235, 0x8203), &s6i6_gen2_info, "Scarlett Gen 2" }, + { USB_ID(0x1235, 0x8204), &s18i8_gen2_info, "Scarlett Gen 2" }, + { USB_ID(0x1235, 0x8201), &s18i20_gen2_info, "Scarlett Gen 2" }, /* Supported Gen 3 devices */ - { USB_ID(0x1235, 0x8211), &solo_gen3_info }, - { USB_ID(0x1235, 0x8210), &s2i2_gen3_info }, - { USB_ID(0x1235, 0x8212), &s4i4_gen3_info }, - { USB_ID(0x1235, 0x8213), &s8i6_gen3_info }, - { USB_ID(0x1235, 0x8214), &s18i8_gen3_info }, - { USB_ID(0x1235, 0x8215), &s18i20_gen3_info }, + { USB_ID(0x1235, 0x8211), &solo_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8210), &s2i2_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8212), &s4i4_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8213), &s8i6_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8214), &s18i8_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8215), &s18i20_gen3_info, "Scarlett Gen 3" }, /* Supported Clarett USB/Clarett+ devices */ - { USB_ID(0x1235, 0x8208), &clarett_8pre_info }, - { USB_ID(0x1235, 0x820c), &clarett_8pre_info }, + { USB_ID(0x1235, 0x8208), &clarett_8pre_info, "Clarett USB" }, + { USB_ID(0x1235, 0x820c), &clarett_8pre_info, "Clarett+" }, /* End of list */ { 0, NULL }, @@ -1205,8 +1207,8 @@ static int scarlett2_usb( if (err != req_buf_size) { usb_audio_err( mixer->chip, - "Scarlett Gen 2/3 USB request result cmd %x was %d\n", - cmd, err); + "%s USB request result cmd %x was %d\n", + private->series_name, cmd, err); err = -EINVAL; goto unlock; } @@ -1222,9 +1224,8 @@ static int scarlett2_usb( if (err != resp_buf_size) { usb_audio_err( mixer->chip, - "Scarlett Gen 2/3 USB response result cmd %x was %d " - "expected %zu\n", - cmd, err, resp_buf_size); + "%s USB response result cmd %x was %d expected %zu\n", + private->series_name, cmd, err, resp_buf_size); err = -EINVAL; goto unlock; } @@ -1240,9 +1241,10 @@ static int scarlett2_usb( resp->pad) { usb_audio_err( mixer->chip, - "Scarlett Gen 2/3 USB invalid response; " + "%s USB invalid response; " "cmd tx/rx %d/%d seq %d/%d size %d/%d " "error %d pad %d\n", + private->series_name, le32_to_cpu(req->cmd), le32_to_cpu(resp->cmd), le16_to_cpu(req->seq), le16_to_cpu(resp->seq), resp_size, le16_to_cpu(resp->size), @@ -3798,7 +3800,7 @@ static int scarlett2_find_fc_interface(struct usb_device *dev, /* Initialise private data */ static int scarlett2_init_private(struct usb_mixer_interface *mixer, - const struct scarlett2_device_info *info) + const struct scarlett2_device_entry *entry) { struct scarlett2_data *private = kzalloc(sizeof(struct scarlett2_data), GFP_KERNEL); @@ -3814,7 +3816,8 @@ static int scarlett2_init_private(struct usb_mixer_interface *mixer, mixer->private_free = scarlett2_private_free; mixer->private_suspend = scarlett2_private_suspend; - private->info = info; + private->info = entry->info; + private->series_name = entry->series_name; scarlett2_count_mux_io(private); private->scarlett2_seq = 0; private->mixer = mixer; @@ -4135,19 +4138,28 @@ static int scarlett2_init_notify(struct usb_mixer_interface *mixer) return usb_submit_urb(mixer->urb, GFP_KERNEL); } -static int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer) +static const struct scarlett2_device_entry *get_scarlett2_device_entry( + struct usb_mixer_interface *mixer) { const struct scarlett2_device_entry *entry = scarlett2_devices; - int err; /* Find entry in scarlett2_devices */ while (entry->usb_id && entry->usb_id != mixer->chip->usb_id) entry++; if (!entry->usb_id) - return -EINVAL; + return NULL; + + return entry; +} + +static int snd_scarlett_gen2_controls_create( + struct usb_mixer_interface *mixer, + const struct scarlett2_device_entry *entry) +{ + int err; /* Initialise private data */ - err = scarlett2_init_private(mixer, entry->info); + err = scarlett2_init_private(mixer, entry); if (err < 0) return err; @@ -4231,17 +4243,30 @@ static int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer) int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) { struct snd_usb_audio *chip = mixer->chip; + const struct scarlett2_device_entry *entry; int err; /* only use UAC_VERSION_2 */ if (!mixer->protocol) return 0; + /* find entry in scarlett2_devices */ + entry = get_scarlett2_device_entry(mixer); + if (!entry) { + usb_audio_err(mixer->chip, + "%s: missing device entry for %04x:%04x\n", + __func__, + USB_ID_VENDOR(chip->usb_id), + USB_ID_PRODUCT(chip->usb_id)); + return 0; + } + if (chip->setup & SCARLETT2_DISABLE) { usb_audio_info(chip, - "Focusrite Scarlett Gen 2/3 Mixer Driver disabled " + "Focusrite %s Mixer Driver disabled " "by modprobe options (snd_usb_audio " "vid=0x%04x pid=0x%04x device_setup=%d)\n", + entry->series_name, USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id), SCARLETT2_DISABLE); @@ -4249,14 +4274,16 @@ int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) } usb_audio_info(chip, - "Focusrite Scarlett Gen 2/3 Mixer Driver enabled (pid=0x%04x); " + "Focusrite %s Mixer Driver enabled (pid=0x%04x); " "report any issues to g@b4.vu", + entry->series_name, USB_ID_PRODUCT(chip->usb_id)); - err = snd_scarlett_gen2_controls_create(mixer); + err = snd_scarlett_gen2_controls_create(mixer, entry); if (err < 0) usb_audio_err(mixer->chip, - "Error initialising Scarlett Mixer Driver: %d", + "Error initialising %s Mixer Driver: %d", + entry->series_name, err); return err; -- GitLab From 03bfe0e93613fbd9039f16b313b918aeb395d704 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Wed, 27 Sep 2023 01:11:30 +0930 Subject: [PATCH 1819/2290] ALSA: scarlett2: Add Focusrite Clarett+ 2Pre and 4Pre support [ Upstream commit b61a3acada0031e7a4922d1340b4296ab95c260b ] The Focusrite Clarett+ series uses the same protocol as the Scarlett Gen 2 and Gen 3 series. This patch adds support for the Clarett+ 2Pre and Clarett+ 4Pre similarly to the existing 8Pre support by adding appropriate entries to the scarlett2 driver. The Clarett 2Pre USB and 4Pre USB presumably use the same protocol as well, so support for them can easily be added if someone can test. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/ZRL7qjC3tYQllT3H@m.b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_quirks.c | 2 + sound/usb/mixer_scarlett_gen2.c | 97 ++++++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index b122d7aedb443..3721d59a56809 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3448,6 +3448,8 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x8214): /* Focusrite Scarlett 18i8 3rd Gen */ case USB_ID(0x1235, 0x8215): /* Focusrite Scarlett 18i20 3rd Gen */ case USB_ID(0x1235, 0x8208): /* Focusrite Clarett 8Pre USB */ + case USB_ID(0x1235, 0x820a): /* Focusrite Clarett+ 2Pre */ + case USB_ID(0x1235, 0x820b): /* Focusrite Clarett+ 4Pre */ case USB_ID(0x1235, 0x820c): /* Focusrite Clarett+ 8Pre */ err = snd_scarlett_gen2_init(mixer); break; diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 328a593aceaa9..e6088fdafe7a3 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -6,7 +6,7 @@ * - 6i6/18i8/18i20 Gen 2 * - Solo/2i2/4i4/8i6/18i8/18i20 Gen 3 * - Clarett 8Pre USB - * - Clarett+ 8Pre + * - Clarett+ 2Pre/4Pre/8Pre * * Copyright (c) 2018-2023 by Geoffrey D. Bennett * Copyright (c) 2020-2021 by Vladimir Sadovnikov @@ -60,6 +60,10 @@ * Support for Clarett 8Pre USB added in Sep 2023 (thanks to Philippe * Perrot for confirmation). * + * Support for Clarett+ 4Pre and 2Pre added in Sep 2023 (thanks to + * Gregory Rozzo for donating a 4Pre, and David Sherwood and Patrice + * Peterson for usbmon output). + * * This ALSA mixer gives access to (model-dependent): * - input, output, mixer-matrix muxes * - mixer-matrix gain stages @@ -832,6 +836,95 @@ static const struct scarlett2_device_info s18i20_gen3_info = { } }, }; +static const struct scarlett2_device_info clarett_2pre_info = { + .config_set = SCARLETT2_CONFIG_SET_CLARETT, + .line_out_hw_vol = 1, + .level_input_count = 2, + .air_input_count = 2, + + .line_out_descrs = { + "Monitor L", + "Monitor R", + "Headphones L", + "Headphones R", + }, + + .port_count = { + [SCARLETT2_PORT_TYPE_NONE] = { 1, 0 }, + [SCARLETT2_PORT_TYPE_ANALOGUE] = { 2, 4 }, + [SCARLETT2_PORT_TYPE_SPDIF] = { 2, 0 }, + [SCARLETT2_PORT_TYPE_ADAT] = { 8, 0 }, + [SCARLETT2_PORT_TYPE_MIX] = { 10, 18 }, + [SCARLETT2_PORT_TYPE_PCM] = { 4, 12 }, + }, + + .mux_assignment = { { + { SCARLETT2_PORT_TYPE_PCM, 0, 12 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 4 }, + { SCARLETT2_PORT_TYPE_MIX, 0, 18 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 8 }, + { 0, 0, 0 }, + }, { + { SCARLETT2_PORT_TYPE_PCM, 0, 8 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 4 }, + { SCARLETT2_PORT_TYPE_MIX, 0, 18 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 8 }, + { 0, 0, 0 }, + }, { + { SCARLETT2_PORT_TYPE_PCM, 0, 2 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 4 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 26 }, + { 0, 0, 0 }, + } }, +}; + +static const struct scarlett2_device_info clarett_4pre_info = { + .config_set = SCARLETT2_CONFIG_SET_CLARETT, + .line_out_hw_vol = 1, + .level_input_count = 2, + .air_input_count = 4, + + .line_out_descrs = { + "Monitor L", + "Monitor R", + "Headphones 1 L", + "Headphones 1 R", + "Headphones 2 L", + "Headphones 2 R", + }, + + .port_count = { + [SCARLETT2_PORT_TYPE_NONE] = { 1, 0 }, + [SCARLETT2_PORT_TYPE_ANALOGUE] = { 8, 6 }, + [SCARLETT2_PORT_TYPE_SPDIF] = { 2, 2 }, + [SCARLETT2_PORT_TYPE_ADAT] = { 8, 0 }, + [SCARLETT2_PORT_TYPE_MIX] = { 10, 18 }, + [SCARLETT2_PORT_TYPE_PCM] = { 8, 18 }, + }, + + .mux_assignment = { { + { SCARLETT2_PORT_TYPE_PCM, 0, 18 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 6 }, + { SCARLETT2_PORT_TYPE_SPDIF, 0, 2 }, + { SCARLETT2_PORT_TYPE_MIX, 0, 18 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 8 }, + { 0, 0, 0 }, + }, { + { SCARLETT2_PORT_TYPE_PCM, 0, 14 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 6 }, + { SCARLETT2_PORT_TYPE_SPDIF, 0, 2 }, + { SCARLETT2_PORT_TYPE_MIX, 0, 18 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 8 }, + { 0, 0, 0 }, + }, { + { SCARLETT2_PORT_TYPE_PCM, 0, 12 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 6 }, + { SCARLETT2_PORT_TYPE_SPDIF, 0, 2 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 24 }, + { 0, 0, 0 }, + } }, +}; + static const struct scarlett2_device_info clarett_8pre_info = { .config_set = SCARLETT2_CONFIG_SET_CLARETT, .line_out_hw_vol = 1, @@ -907,6 +1000,8 @@ static const struct scarlett2_device_entry scarlett2_devices[] = { /* Supported Clarett USB/Clarett+ devices */ { USB_ID(0x1235, 0x8208), &clarett_8pre_info, "Clarett USB" }, + { USB_ID(0x1235, 0x820a), &clarett_2pre_info, "Clarett+" }, + { USB_ID(0x1235, 0x820b), &clarett_4pre_info, "Clarett+" }, { USB_ID(0x1235, 0x820c), &clarett_8pre_info, "Clarett+" }, /* End of list */ -- GitLab From aedbd0961467843ff4276db5f484acd78ff54974 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Sat, 7 Oct 2023 22:03:04 +1030 Subject: [PATCH 1820/2290] ALSA: scarlett2: Add Focusrite Clarett 2Pre and 4Pre USB support [ Upstream commit 2b17b489e47a956c8e93c8f1bcabb0343c851d90 ] It has been confirmed that all devices in the Focusrite Clarett USB series work the same as the devices in the Clarett+ series. Add the missing PIDs to enable support for the Clarett 2Pre and 4Pre USB. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/ZSFB8EVTG1PK1eq/@m.b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_quirks.c | 2 ++ sound/usb/mixer_scarlett_gen2.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 3721d59a56809..a331732fed890 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3447,6 +3447,8 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x8213): /* Focusrite Scarlett 8i6 3rd Gen */ case USB_ID(0x1235, 0x8214): /* Focusrite Scarlett 18i8 3rd Gen */ case USB_ID(0x1235, 0x8215): /* Focusrite Scarlett 18i20 3rd Gen */ + case USB_ID(0x1235, 0x8206): /* Focusrite Clarett 2Pre USB */ + case USB_ID(0x1235, 0x8207): /* Focusrite Clarett 4Pre USB */ case USB_ID(0x1235, 0x8208): /* Focusrite Clarett 8Pre USB */ case USB_ID(0x1235, 0x820a): /* Focusrite Clarett+ 2Pre */ case USB_ID(0x1235, 0x820b): /* Focusrite Clarett+ 4Pre */ diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index e6088fdafe7a3..cbdef89ab987f 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -5,7 +5,7 @@ * Supported models: * - 6i6/18i8/18i20 Gen 2 * - Solo/2i2/4i4/8i6/18i8/18i20 Gen 3 - * - Clarett 8Pre USB + * - Clarett 2Pre/4Pre/8Pre USB * - Clarett+ 2Pre/4Pre/8Pre * * Copyright (c) 2018-2023 by Geoffrey D. Bennett @@ -64,6 +64,8 @@ * Gregory Rozzo for donating a 4Pre, and David Sherwood and Patrice * Peterson for usbmon output). * + * Support for Clarett 2Pre and 4Pre USB added in Oct 2023. + * * This ALSA mixer gives access to (model-dependent): * - input, output, mixer-matrix muxes * - mixer-matrix gain stages @@ -999,6 +1001,8 @@ static const struct scarlett2_device_entry scarlett2_devices[] = { { USB_ID(0x1235, 0x8215), &s18i20_gen3_info, "Scarlett Gen 3" }, /* Supported Clarett USB/Clarett+ devices */ + { USB_ID(0x1235, 0x8206), &clarett_2pre_info, "Clarett USB" }, + { USB_ID(0x1235, 0x8207), &clarett_4pre_info, "Clarett USB" }, { USB_ID(0x1235, 0x8208), &clarett_8pre_info, "Clarett USB" }, { USB_ID(0x1235, 0x820a), &clarett_2pre_info, "Clarett+" }, { USB_ID(0x1235, 0x820b), &clarett_4pre_info, "Clarett+" }, @@ -1197,7 +1201,7 @@ static const struct scarlett2_config [SCARLETT2_CONFIG_TALKBACK_MAP] = { .offset = 0xb0, .size = 16, .activate = 10 }, -/* Clarett+ 8Pre */ +/* Clarett USB and Clarett+ devices: 2Pre, 4Pre, 8Pre */ }, { [SCARLETT2_CONFIG_DIM_MUTE] = { .offset = 0x31, .size = 8, .activate = 2 }, -- GitLab From 636f0fdb3623848deb5b738a78e56e7cf0ec309f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 18 Oct 2023 14:32:51 +0300 Subject: [PATCH 1821/2290] PCI/DPC: Use FIELD_GET() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9a9eec4765737b9b2a8d6ae03de6480a5f12dd5c ] Use FIELD_GET() to remove dependencies on the field position, i.e., the shift value. No functional change intended. Link: https://lore.kernel.org/r/20231018113254.17616-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/dpc.c | 5 +++-- drivers/pci/quirks.c | 2 +- include/uapi/linux/pci_regs.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 08800282825e1..acdbf9e770a8a 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -9,6 +9,7 @@ #define dev_fmt(fmt) "DPC: " fmt #include +#include #include #include #include @@ -203,7 +204,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) /* Get First Error Pointer */ pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &dpc_status); - first_error = (dpc_status & 0x1f00) >> 8; + first_error = FIELD_GET(PCI_EXP_DPC_RP_PIO_FEP, dpc_status); for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) { if ((status & ~mask) & (1 << i)) @@ -339,7 +340,7 @@ void pci_dpc_init(struct pci_dev *pdev) /* Quirks may set dpc_rp_log_size if device or firmware is buggy */ if (!pdev->dpc_rp_log_size) { pdev->dpc_rp_log_size = - (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; + FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, cap); if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { pci_err(pdev, "RP PIO log size %u is invalid\n", pdev->dpc_rp_log_size); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 47099d00fcf1d..1131e353d8c9a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -6103,7 +6103,7 @@ static void dpc_log_size(struct pci_dev *dev) if (!(val & PCI_EXP_DPC_CAP_RP_EXT)) return; - if (!((val & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8)) { + if (FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, val) == 0) { pci_info(dev, "Overriding RP PIO Log Size to 4\n"); dev->dpc_rp_log_size = 4; } diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 57b8e2ffb1dd3..3325155036c80 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1043,6 +1043,7 @@ #define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ #define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ #define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ +#define PCI_EXP_DPC_RP_PIO_FEP 0x1f00 /* RP PIO First Err Ptr */ #define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */ -- GitLab From 39f932d2953bdf17361b64cdfc0c05c0b2487eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 26 Oct 2023 15:19:23 +0300 Subject: [PATCH 1822/2290] PCI: Simplify pcie_capability_clear_and_set_word() to ..._clear_word() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0fce6e5c87faec2c8bf28d2abc8cb595f4e244b6 ] When using pcie_capability_clear_and_set_word() but not actually *setting* anything, use pcie_capability_clear_word() instead. Link: https://lore.kernel.org/r/20231026121924.2164-1-ilpo.jarvinen@linux.intel.com Link: https://lore.kernel.org/r/20231026121924.2164-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: squash] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/aspm.c | 8 ++++---- drivers/pci/quirks.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 25736d408e88e..2a3d973658dac 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -743,10 +743,10 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) * in pcie_config_aspm_link(). */ if (enable_req & (ASPM_STATE_L1_1 | ASPM_STATE_L1_2)) { - pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1, 0); - pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1, 0); + pcie_capability_clear_word(child, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPM_L1); + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPM_L1); } val = 0; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1131e353d8c9a..56dce858a6934 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4476,9 +4476,9 @@ static void quirk_disable_root_port_attributes(struct pci_dev *pdev) pci_info(root_port, "Disabling No Snoop/Relaxed Ordering Attributes to avoid PCIe Completion erratum in %s\n", dev_name(&pdev->dev)); - pcie_capability_clear_and_set_word(root_port, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_RELAX_EN | - PCI_EXP_DEVCTL_NOSNOOP_EN, 0); + pcie_capability_clear_word(root_port, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_RELAX_EN | + PCI_EXP_DEVCTL_NOSNOOP_EN); } /* -- GitLab From 7989b04d6cc4f1d83c696d30e8c9d27a4928e28c Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 27 Oct 2023 04:31:28 +1030 Subject: [PATCH 1823/2290] ALSA: scarlett2: Rename scarlett_gen2 to scarlett2 [ Upstream commit efc3d7d20361cc59325a9f0525e079333b4459c0 ] This driver was originally developed for the Focusrite Scarlett Gen 2 series. Since then Focusrite have used a similar protocol for their Gen 3, Gen 4, Clarett USB, Clarett+, and Vocaster series. Let's call this common protocol the "Scarlett 2 Protocol" and rename the driver to scarlett2 to not imply that it is restricted to Gen 2 series devices. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/e1ad7f69a1e20cdb39094164504389160c1a0a0b.1698342632.git.g@b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- MAINTAINERS | 2 +- sound/usb/Makefile | 2 +- sound/usb/mixer_quirks.c | 4 ++-- .../usb/{mixer_scarlett_gen2.c => mixer_scarlett2.c} | 12 +++++++----- sound/usb/mixer_scarlett2.h | 7 +++++++ sound/usb/mixer_scarlett_gen2.h | 7 ------- 6 files changed, 18 insertions(+), 16 deletions(-) rename sound/usb/{mixer_scarlett_gen2.c => mixer_scarlett2.c} (99%) create mode 100644 sound/usb/mixer_scarlett2.h delete mode 100644 sound/usb/mixer_scarlett_gen2.h diff --git a/MAINTAINERS b/MAINTAINERS index bbfedb0b20938..ecf4d0c8f446e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8031,7 +8031,7 @@ M: Geoffrey D. Bennett L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git -F: sound/usb/mixer_scarlett_gen2.c +F: sound/usb/mixer_scarlett2.c FORCEDETH GIGABIT ETHERNET DRIVER M: Rain River diff --git a/sound/usb/Makefile b/sound/usb/Makefile index 9ccb21a4ff8a8..64a718c766a7a 100644 --- a/sound/usb/Makefile +++ b/sound/usb/Makefile @@ -12,7 +12,7 @@ snd-usb-audio-objs := card.o \ mixer.o \ mixer_quirks.o \ mixer_scarlett.o \ - mixer_scarlett_gen2.o \ + mixer_scarlett2.o \ mixer_us16x08.o \ mixer_s1810c.o \ pcm.o \ diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index a331732fed890..c8d48566e1759 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -33,7 +33,7 @@ #include "mixer.h" #include "mixer_quirks.h" #include "mixer_scarlett.h" -#include "mixer_scarlett_gen2.h" +#include "mixer_scarlett2.h" #include "mixer_us16x08.h" #include "mixer_s1810c.h" #include "helper.h" @@ -3453,7 +3453,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x820a): /* Focusrite Clarett+ 2Pre */ case USB_ID(0x1235, 0x820b): /* Focusrite Clarett+ 4Pre */ case USB_ID(0x1235, 0x820c): /* Focusrite Clarett+ 8Pre */ - err = snd_scarlett_gen2_init(mixer); + err = snd_scarlett2_init(mixer); break; case USB_ID(0x041e, 0x323b): /* Creative Sound Blaster E1 */ diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett2.c similarity index 99% rename from sound/usb/mixer_scarlett_gen2.c rename to sound/usb/mixer_scarlett2.c index cbdef89ab987f..bcb8b76174065 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett2.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Focusrite Scarlett Gen 2/3 and Clarett USB/Clarett+ Driver for ALSA + * Focusrite Scarlett 2 Protocol Driver for ALSA + * (including Scarlett 2nd Gen, 3rd Gen, Clarett USB, and Clarett+ + * series products) * * Supported models: * - 6i6/18i8/18i20 Gen 2 @@ -149,7 +151,7 @@ #include "mixer.h" #include "helper.h" -#include "mixer_scarlett_gen2.h" +#include "mixer_scarlett2.h" /* device_setup value to allow turning MSD mode back on */ #define SCARLETT2_MSD_ENABLE 0x02 @@ -4251,7 +4253,7 @@ static const struct scarlett2_device_entry *get_scarlett2_device_entry( return entry; } -static int snd_scarlett_gen2_controls_create( +static int snd_scarlett2_controls_create( struct usb_mixer_interface *mixer, const struct scarlett2_device_entry *entry) { @@ -4339,7 +4341,7 @@ static int snd_scarlett_gen2_controls_create( return 0; } -int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) +int snd_scarlett2_init(struct usb_mixer_interface *mixer) { struct snd_usb_audio *chip = mixer->chip; const struct scarlett2_device_entry *entry; @@ -4378,7 +4380,7 @@ int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) entry->series_name, USB_ID_PRODUCT(chip->usb_id)); - err = snd_scarlett_gen2_controls_create(mixer, entry); + err = snd_scarlett2_controls_create(mixer, entry); if (err < 0) usb_audio_err(mixer->chip, "Error initialising %s Mixer Driver: %d", diff --git a/sound/usb/mixer_scarlett2.h b/sound/usb/mixer_scarlett2.h new file mode 100644 index 0000000000000..d209362cf41a6 --- /dev/null +++ b/sound/usb/mixer_scarlett2.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __USB_MIXER_SCARLETT2_H +#define __USB_MIXER_SCARLETT2_H + +int snd_scarlett2_init(struct usb_mixer_interface *mixer); + +#endif /* __USB_MIXER_SCARLETT2_H */ diff --git a/sound/usb/mixer_scarlett_gen2.h b/sound/usb/mixer_scarlett_gen2.h deleted file mode 100644 index 668c6b0cb50a6..0000000000000 --- a/sound/usb/mixer_scarlett_gen2.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __USB_MIXER_SCARLETT_GEN2_H -#define __USB_MIXER_SCARLETT_GEN2_H - -int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer); - -#endif /* __USB_MIXER_SCARLETT_GEN2_H */ -- GitLab From 1d011d972f588f94e29773791abde6da3fa423be Mon Sep 17 00:00:00 2001 From: Brenton Simpson Date: Tue, 14 Nov 2023 23:38:59 +0000 Subject: [PATCH 1824/2290] drm: panel-orientation-quirks: Add quirk for Lenovo Legion Go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 430143b0d3611f4a9c8434319e5e504244749e79 ] The Legion Go has a 2560x1600 portrait screen, with the native "up" facing the right controller (90° CW from the rest of the device). Signed-off-by: Brenton Simpson Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231114233859.274189-1-appsforartists@google.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 3fe5e6439c401..aa93129c3397e 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -348,6 +348,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Legion Go 8APU1 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Legion Go 8APU1"), + }, + .driver_data = (void *)&lcd1600x2560_leftside_up, }, { /* Lenovo Yoga Book X90F / X90L */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), -- GitLab From 5f9b63193bcac6938298ec8f079e533da6db5e86 Mon Sep 17 00:00:00 2001 From: Hardik Gajjar Date: Fri, 27 Oct 2023 17:20:28 +0200 Subject: [PATCH 1825/2290] usb: xhci: Add timeout argument in address_device USB HCD callback [ Upstream commit a769154c7cac037914ba375ae88aae55b2c853e0 ] - The HCD address_device callback now accepts a user-defined timeout value in milliseconds, providing better control over command execution times. - The default timeout value for the address_device command has been set to 5000 ms, aligning with the USB 3.2 specification. However, this timeout can be adjusted as needed. - The xhci_setup_device function has been updated to accept the timeout value, allowing it to specify the maximum wait time for the command operation to complete. - The hub driver has also been updated to accommodate the newly added timeout parameter during the SET_ADDRESS request. Signed-off-by: Hardik Gajjar Reviewed-by: Mathias Nyman Link: https://lore.kernel.org/r/20231027152029.104363-1-hgajjar@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 5a1ccf0c72cf ("usb: new quirk to reduce the SET_ADDRESS request timeout") Signed-off-by: Sasha Levin --- drivers/usb/core/hub.c | 2 +- drivers/usb/host/xhci-mem.c | 2 ++ drivers/usb/host/xhci-ring.c | 11 ++++++----- drivers/usb/host/xhci.c | 23 ++++++++++++++++------- drivers/usb/host/xhci.h | 9 +++++++-- include/linux/usb/hcd.h | 5 +++-- 6 files changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b1fb04e5247c3..a661f6ac1ad14 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4661,7 +4661,7 @@ static int hub_set_address(struct usb_device *udev, int devnum) if (udev->state != USB_STATE_DEFAULT) return -EINVAL; if (hcd->driver->address_device) - retval = hcd->driver->address_device(hcd, udev); + retval = hcd->driver->address_device(hcd, udev, USB_CTRL_SET_TIMEOUT); else retval = usb_control_msg(udev, usb_sndaddr0pipe(), USB_REQ_SET_ADDRESS, 0, devnum, 0, diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 019dcbe55dbdc..62808c98713ec 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1752,6 +1752,8 @@ struct xhci_command *xhci_alloc_command(struct xhci_hcd *xhci, } command->status = 0; + /* set default timeout to 5000 ms */ + command->timeout_ms = XHCI_CMD_DEFAULT_TIMEOUT; INIT_LIST_HEAD(&command->cmd_list); return command; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 239b5edee3268..4a039e42694bc 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -332,9 +332,10 @@ void xhci_ring_cmd_db(struct xhci_hcd *xhci) readl(&xhci->dba->doorbell[0]); } -static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay) +static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci) { - return mod_delayed_work(system_wq, &xhci->cmd_timer, delay); + return mod_delayed_work(system_wq, &xhci->cmd_timer, + msecs_to_jiffies(xhci->current_cmd->timeout_ms)); } static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci) @@ -378,7 +379,7 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && !(xhci->xhc_state & XHCI_STATE_DYING)) { xhci->current_cmd = cur_cmd; - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci); xhci_ring_cmd_db(xhci); } } @@ -1762,7 +1763,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, if (!list_is_singular(&xhci->cmd_list)) { xhci->current_cmd = list_first_entry(&cmd->cmd_list, struct xhci_command, cmd_list); - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci); } else if (xhci->current_cmd == cmd) { xhci->current_cmd = NULL; } @@ -4339,7 +4340,7 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, /* if there are no other commands queued we start the timeout timer */ if (list_empty(&xhci->cmd_list)) { xhci->current_cmd = cmd; - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci); } list_add_tail(&cmd->cmd_list, &xhci->cmd_list); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 565aba6b99860..27e01671d3865 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4170,12 +4170,18 @@ disable_slot: return 0; } -/* - * Issue an Address Device command and optionally send a corresponding - * SetAddress request to the device. +/** + * xhci_setup_device - issues an Address Device command to assign a unique + * USB bus address. + * @hcd: USB host controller data structure. + * @udev: USB dev structure representing the connected device. + * @setup: Enum specifying setup mode: address only or with context. + * @timeout_ms: Max wait time (ms) for the command operation to complete. + * + * Return: 0 if successful; otherwise, negative error code. */ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, - enum xhci_setup_dev setup) + enum xhci_setup_dev setup, unsigned int timeout_ms) { const char *act = setup == SETUP_CONTEXT_ONLY ? "context" : "address"; unsigned long flags; @@ -4232,6 +4238,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, } command->in_ctx = virt_dev->in_ctx; + command->timeout_ms = timeout_ms; slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->in_ctx); ctrl_ctx = xhci_get_input_control_ctx(virt_dev->in_ctx); @@ -4358,14 +4365,16 @@ out: return ret; } -static int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) +static int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev, + unsigned int timeout_ms) { - return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ADDRESS); + return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ADDRESS, timeout_ms); } static int xhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) { - return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ONLY); + return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ONLY, + XHCI_CMD_DEFAULT_TIMEOUT); } /* diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index fc25a5b09710c..fa9e87141e0bf 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -815,6 +815,8 @@ struct xhci_command { struct completion *completion; union xhci_trb *command_trb; struct list_head cmd_list; + /* xHCI command response timeout in milliseconds */ + unsigned int timeout_ms; }; /* drop context bitmasks */ @@ -1574,8 +1576,11 @@ struct xhci_td { unsigned int num_trbs; }; -/* xHCI command default timeout value */ -#define XHCI_CMD_DEFAULT_TIMEOUT (5 * HZ) +/* + * xHCI command default timeout value in milliseconds. + * USB 3.2 spec, section 9.2.6.1 + */ +#define XHCI_CMD_DEFAULT_TIMEOUT 5000 /* command descriptor */ struct xhci_cd { diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 5a89928ea9534..cd667acf62672 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -371,8 +371,9 @@ struct hc_driver { * or bandwidth constraints. */ void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); - /* Returns the hardware-chosen device address */ - int (*address_device)(struct usb_hcd *, struct usb_device *udev); + /* Set the hardware-chosen device address */ + int (*address_device)(struct usb_hcd *, struct usb_device *udev, + unsigned int timeout_ms); /* prepares the hardware to send commands to the device */ int (*enable_device)(struct usb_hcd *, struct usb_device *udev); /* Notifies the HCD after a hub descriptor is fetched. -- GitLab From f6ac4fdfa519ba66a7beb1ba4077f1a7319aecea Mon Sep 17 00:00:00 2001 From: Hardik Gajjar Date: Fri, 27 Oct 2023 17:20:29 +0200 Subject: [PATCH 1826/2290] usb: new quirk to reduce the SET_ADDRESS request timeout [ Upstream commit 5a1ccf0c72cf917ff3ccc131d1bb8d19338ffe52 ] This patch introduces a new USB quirk, USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT, which modifies the timeout value for the SET_ADDRESS request. The standard timeout for USB request/command is 5000 ms, as recommended in the USB 3.2 specification (section 9.2.6.1). However, certain scenarios, such as connecting devices through an APTIV hub, can lead to timeout errors when the device enumerates as full speed initially and later switches to high speed during chirp negotiation. In such cases, USB analyzer logs reveal that the bus suspends for 5 seconds due to incorrect chirp parsing and resumes only after two consecutive timeout errors trigger a hub driver reset. Packet(54) Dir(?) Full Speed J(997.100 us) Idle( 2.850 us) _______| Time Stamp(28 . 105 910 682) _______|_____________________________________________________________Ch0 Packet(55) Dir(?) Full Speed J(997.118 us) Idle( 2.850 us) _______| Time Stamp(28 . 106 910 632) _______|_____________________________________________________________Ch0 Packet(56) Dir(?) Full Speed J(399.650 us) Idle(222.582 us) _______| Time Stamp(28 . 107 910 600) _______|_____________________________________________________________Ch0 Packet(57) Dir Chirp J( 23.955 ms) Idle(115.169 ms) _______| Time Stamp(28 . 108 532 832) _______|_____________________________________________________________Ch0 Packet(58) Dir(?) Full Speed J (Suspend)( 5.347 sec) Idle( 5.366 us) _______| Time Stamp(28 . 247 657 600) _______|_____________________________________________________________Ch0 This 5-second delay in device enumeration is undesirable, particularly in automotive applications where quick enumeration is crucial (ideally within 3 seconds). The newly introduced quirks provide the flexibility to align with a 3-second time limit, as required in specific contexts like automotive applications. By reducing the SET_ADDRESS request timeout to 500 ms, the system can respond more swiftly to errors, initiate rapid recovery, and ensure efficient device enumeration. This change is vital for scenarios where rapid smartphone enumeration and screen projection are essential. To use the quirk, please write "vendor_id:product_id:p" to /sys/bus/usb/drivers/hub/module/parameter/quirks For example, echo "0x2c48:0x0132:p" > /sys/bus/usb/drivers/hub/module/parameters/quirks" Signed-off-by: Hardik Gajjar Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20231027152029.104363-2-hgajjar@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ drivers/usb/core/hub.c | 15 +++++++++++++-- drivers/usb/core/quirks.c | 7 +++++++ include/linux/usb/quirks.h | 3 +++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index aebbe2981241a..e6f0570cf4900 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6603,6 +6603,9 @@ pause after every control message); o = USB_QUIRK_HUB_SLOW_RESET (Hub needs extra delay after resetting its port); + p = USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT + (Reduce timeout of the SET_ADDRESS + request from 5000 ms to 500 ms); Example: quirks=0781:5580:bk,0a5c:5834:gij usbhid.mousepoll= diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a661f6ac1ad14..dea110241ee71 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -60,6 +60,12 @@ #define USB_PING_RESPONSE_TIME 400 /* ns */ #define USB_REDUCE_FRAME_INTR_BINTERVAL 9 +/* + * The SET_ADDRESS request timeout will be 500 ms when + * USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT quirk flag is set. + */ +#define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT 500 /* ms */ + /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ @@ -4648,7 +4654,12 @@ EXPORT_SYMBOL_GPL(usb_ep0_reinit); static int hub_set_address(struct usb_device *udev, int devnum) { int retval; + unsigned int timeout_ms = USB_CTRL_SET_TIMEOUT; struct usb_hcd *hcd = bus_to_hcd(udev->bus); + struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); + + if (hub->hdev->quirks & USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT) + timeout_ms = USB_SHORT_SET_ADDRESS_REQ_TIMEOUT; /* * The host controller will choose the device address, @@ -4661,11 +4672,11 @@ static int hub_set_address(struct usb_device *udev, int devnum) if (udev->state != USB_STATE_DEFAULT) return -EINVAL; if (hcd->driver->address_device) - retval = hcd->driver->address_device(hcd, udev, USB_CTRL_SET_TIMEOUT); + retval = hcd->driver->address_device(hcd, udev, timeout_ms); else retval = usb_control_msg(udev, usb_sndaddr0pipe(), USB_REQ_SET_ADDRESS, 0, devnum, 0, - NULL, 0, USB_CTRL_SET_TIMEOUT); + NULL, 0, timeout_ms); if (retval == 0) { update_devnum(udev, devnum); /* Device now using proper address. */ diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 15e9bd180a1d2..b4783574b8e66 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -138,6 +138,9 @@ static int quirks_param_set(const char *value, const struct kernel_param *kp) case 'o': flags |= USB_QUIRK_HUB_SLOW_RESET; break; + case 'p': + flags |= USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT; + break; /* Ignore unrecognized flag characters */ } } @@ -527,6 +530,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2386, 0x350e), .driver_info = USB_QUIRK_NO_LPM }, + /* APTIV AUTOMOTIVE HUB */ + { USB_DEVICE(0x2c48, 0x0132), .driver_info = + USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT }, + /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index eeb7c2157c72f..59409c1fc3dee 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -72,4 +72,7 @@ /* device has endpoints that should be ignored */ #define USB_QUIRK_ENDPOINT_IGNORE BIT(15) +/* short SET_ADDRESS request timeout */ +#define USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT BIT(16) + #endif /* __LINUX_USB_QUIRKS_H */ -- GitLab From 349dbfd65f862baf06099e559419443c6fe09ecf Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:55 -0700 Subject: [PATCH 1827/2290] clk: Remove prepare_lock hold assertion in __clk_release() [ Upstream commit 8358a76cfb47c9a5af627a0c4e7168aa14fa25f6 ] Removing this assertion lets us move the kref_put() call outside the prepare_lock section. We don't need to hold the prepare_lock here to free memory and destroy the clk_core structure. We've already unlinked the clk from the clk tree and by the time the release function runs nothing holds a reference to the clk_core anymore so anything with the pointer can't access the memory that's being freed anyway. Way back in commit 496eadf821c2 ("clk: Use lockdep asserts to find missing hold of prepare_lock") we didn't need to have this assertion either. Fixes: 496eadf821c2 ("clk: Use lockdep asserts to find missing hold of prepare_lock") Cc: Krzysztof Kozlowski Reviewed-by: Douglas Anderson Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-2-sboyd@kernel.org Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 9004e07182259..ad40913d80a8b 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4230,8 +4230,6 @@ static void __clk_release(struct kref *ref) { struct clk_core *core = container_of(ref, struct clk_core, ref); - lockdep_assert_held(&prepare_lock); - clk_core_free_parent_map(core); kfree_const(core->name); kfree(core); -- GitLab From 43bc4cfef2b18fad0141af7aff7703b5081aa4a5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 7 Mar 2023 14:29:28 +0100 Subject: [PATCH 1828/2290] clk: Print an info line before disabling unused clocks [ Upstream commit 12ca59b91d04df32e41be5a52f0cabba912c11de ] Currently, the regulator framework informs us before calling into their unused cleanup paths, which eases at least some debugging. The same could be beneficial for clocks, so that random shutdowns shortly after most initcalls are done can be less of a guess. Add a pr_info before disabling unused clocks to do so. Reviewed-by: Marijn Suijten Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230307132928.3887737-1-konrad.dybcio@linaro.org Signed-off-by: Stephen Boyd Stable-dep-of: e581cf5d2162 ("clk: Get runtime PM before walking tree during disable_unused") Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index ad40913d80a8b..d841a9d7281c6 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1388,6 +1388,8 @@ static int __init clk_disable_unused(void) return 0; } + pr_info("clk: Disabling unused clocks\n"); + clk_prepare_lock(); hlist_for_each_entry(core, &clk_root_list, child_node) -- GitLab From 5558b3b68c241998c1e601172c8b0ff02c6332c1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:57 -0700 Subject: [PATCH 1829/2290] clk: Initialize struct clk_core kref earlier [ Upstream commit 9d05ae531c2cff20d5d527f04e28d28e04379929 ] Initialize this kref once we allocate memory for the struct clk_core so that we can reuse the release function to free any memory associated with the structure. This mostly consolidates code, but also clarifies that the kref lifetime exists once the container structure (struct clk_core) is allocated instead of leaving it in a half-baked state for most of __clk_core_init(). Reviewed-by: Douglas Anderson Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-4-sboyd@kernel.org Stable-dep-of: e581cf5d2162 ("clk: Get runtime PM before walking tree during disable_unused") Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index d841a9d7281c6..4f9f55cff9231 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3838,8 +3838,6 @@ static int __clk_core_init(struct clk_core *core) } clk_core_reparent_orphans_nolock(); - - kref_init(&core->ref); out: clk_pm_runtime_put(core); unlock: @@ -4068,6 +4066,16 @@ static void clk_core_free_parent_map(struct clk_core *core) kfree(core->parents); } +/* Free memory allocated for a struct clk_core */ +static void __clk_release(struct kref *ref) +{ + struct clk_core *core = container_of(ref, struct clk_core, ref); + + clk_core_free_parent_map(core); + kfree_const(core->name); + kfree(core); +} + static struct clk * __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) { @@ -4088,6 +4096,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) goto fail_out; } + kref_init(&core->ref); + core->name = kstrdup_const(init->name, GFP_KERNEL); if (!core->name) { ret = -ENOMEM; @@ -4142,12 +4152,10 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) hw->clk = NULL; fail_create_clk: - clk_core_free_parent_map(core); fail_parents: fail_ops: - kfree_const(core->name); fail_name: - kfree(core); + kref_put(&core->ref, __clk_release); fail_out: return ERR_PTR(ret); } @@ -4227,16 +4235,6 @@ int of_clk_hw_register(struct device_node *node, struct clk_hw *hw) } EXPORT_SYMBOL_GPL(of_clk_hw_register); -/* Free memory allocated for a clock. */ -static void __clk_release(struct kref *ref) -{ - struct clk_core *core = container_of(ref, struct clk_core, ref); - - clk_core_free_parent_map(core); - kfree_const(core->name); - kfree(core); -} - /* * Empty clk_ops for unregistered clocks. These are used temporarily * after clk_unregister() was called on a clock and until last clock -- GitLab From a424e713e0cc33d4b969cfda25b9f46df4d7b5bc Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:58 -0700 Subject: [PATCH 1830/2290] clk: Get runtime PM before walking tree during disable_unused [ Upstream commit e581cf5d216289ef292d1a4036d53ce90e122469 ] Doug reported [1] the following hung task: INFO: task swapper/0:1 blocked for more than 122 seconds. Not tainted 5.15.149-21875-gf795ebc40eb8 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:swapper/0 state:D stack: 0 pid: 1 ppid: 0 flags:0x00000008 Call trace: __switch_to+0xf4/0x1f4 __schedule+0x418/0xb80 schedule+0x5c/0x10c rpm_resume+0xe0/0x52c rpm_resume+0x178/0x52c __pm_runtime_resume+0x58/0x98 clk_pm_runtime_get+0x30/0xb0 clk_disable_unused_subtree+0x58/0x208 clk_disable_unused_subtree+0x38/0x208 clk_disable_unused_subtree+0x38/0x208 clk_disable_unused_subtree+0x38/0x208 clk_disable_unused_subtree+0x38/0x208 clk_disable_unused+0x4c/0xe4 do_one_initcall+0xcc/0x2d8 do_initcall_level+0xa4/0x148 do_initcalls+0x5c/0x9c do_basic_setup+0x24/0x30 kernel_init_freeable+0xec/0x164 kernel_init+0x28/0x120 ret_from_fork+0x10/0x20 INFO: task kworker/u16:0:9 blocked for more than 122 seconds. Not tainted 5.15.149-21875-gf795ebc40eb8 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u16:0 state:D stack: 0 pid: 9 ppid: 2 flags:0x00000008 Workqueue: events_unbound deferred_probe_work_func Call trace: __switch_to+0xf4/0x1f4 __schedule+0x418/0xb80 schedule+0x5c/0x10c schedule_preempt_disabled+0x2c/0x48 __mutex_lock+0x238/0x488 __mutex_lock_slowpath+0x1c/0x28 mutex_lock+0x50/0x74 clk_prepare_lock+0x7c/0x9c clk_core_prepare_lock+0x20/0x44 clk_prepare+0x24/0x30 clk_bulk_prepare+0x40/0xb0 mdss_runtime_resume+0x54/0x1c8 pm_generic_runtime_resume+0x30/0x44 __genpd_runtime_resume+0x68/0x7c genpd_runtime_resume+0x108/0x1f4 __rpm_callback+0x84/0x144 rpm_callback+0x30/0x88 rpm_resume+0x1f4/0x52c rpm_resume+0x178/0x52c __pm_runtime_resume+0x58/0x98 __device_attach+0xe0/0x170 device_initial_probe+0x1c/0x28 bus_probe_device+0x3c/0x9c device_add+0x644/0x814 mipi_dsi_device_register_full+0xe4/0x170 devm_mipi_dsi_device_register_full+0x28/0x70 ti_sn_bridge_probe+0x1dc/0x2c0 auxiliary_bus_probe+0x4c/0x94 really_probe+0xcc/0x2c8 __driver_probe_device+0xa8/0x130 driver_probe_device+0x48/0x110 __device_attach_driver+0xa4/0xcc bus_for_each_drv+0x8c/0xd8 __device_attach+0xf8/0x170 device_initial_probe+0x1c/0x28 bus_probe_device+0x3c/0x9c deferred_probe_work_func+0x9c/0xd8 process_one_work+0x148/0x518 worker_thread+0x138/0x350 kthread+0x138/0x1e0 ret_from_fork+0x10/0x20 The first thread is walking the clk tree and calling clk_pm_runtime_get() to power on devices required to read the clk hardware via struct clk_ops::is_enabled(). This thread holds the clk prepare_lock, and is trying to runtime PM resume a device, when it finds that the device is in the process of resuming so the thread schedule()s away waiting for the device to finish resuming before continuing. The second thread is runtime PM resuming the same device, but the runtime resume callback is calling clk_prepare(), trying to grab the prepare_lock waiting on the first thread. This is a classic ABBA deadlock. To properly fix the deadlock, we must never runtime PM resume or suspend a device with the clk prepare_lock held. Actually doing that is near impossible today because the global prepare_lock would have to be dropped in the middle of the tree, the device runtime PM resumed/suspended, and then the prepare_lock grabbed again to ensure consistency of the clk tree topology. If anything changes with the clk tree in the meantime, we've lost and will need to start the operation all over again. Luckily, most of the time we're simply incrementing or decrementing the runtime PM count on an active device, so we don't have the chance to schedule away with the prepare_lock held. Let's fix this immediate problem that can be triggered more easily by simply booting on Qualcomm sc7180. Introduce a list of clk_core structures that have been registered, or are in the process of being registered, that require runtime PM to operate. Iterate this list and call clk_pm_runtime_get() on each of them without holding the prepare_lock during clk_disable_unused(). This way we can be certain that the runtime PM state of the devices will be active and resumed so we can't schedule away while walking the clk tree with the prepare_lock held. Similarly, call clk_pm_runtime_put() without the prepare_lock held to properly drop the runtime PM reference. We remove the calls to clk_pm_runtime_{get,put}() in this path because they're superfluous now that we know the devices are runtime resumed. Reported-by: Douglas Anderson Closes: https://lore.kernel.org/all/20220922084322.RFC.2.I375b6b9e0a0a5348962f004beb3dafee6a12dfbb@changeid/ [1] Closes: https://issuetracker.google.com/328070191 Cc: Marek Szyprowski Cc: Ulf Hansson Cc: Krzysztof Kozlowski Fixes: 9a34b45397e5 ("clk: Add support for runtime PM") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-5-sboyd@kernel.org Reviewed-by: Douglas Anderson Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 117 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 105 insertions(+), 12 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 4f9f55cff9231..75d8f7f0de9ba 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -37,6 +37,10 @@ static HLIST_HEAD(clk_root_list); static HLIST_HEAD(clk_orphan_list); static LIST_HEAD(clk_notifier_list); +/* List of registered clks that use runtime PM */ +static HLIST_HEAD(clk_rpm_list); +static DEFINE_MUTEX(clk_rpm_list_lock); + static const struct hlist_head *all_lists[] = { &clk_root_list, &clk_orphan_list, @@ -59,6 +63,7 @@ struct clk_core { struct clk_hw *hw; struct module *owner; struct device *dev; + struct hlist_node rpm_node; struct device_node *of_node; struct clk_core *parent; struct clk_parent_map *parents; @@ -122,6 +127,89 @@ static void clk_pm_runtime_put(struct clk_core *core) pm_runtime_put_sync(core->dev); } +/** + * clk_pm_runtime_get_all() - Runtime "get" all clk provider devices + * + * Call clk_pm_runtime_get() on all runtime PM enabled clks in the clk tree so + * that disabling unused clks avoids a deadlock where a device is runtime PM + * resuming/suspending and the runtime PM callback is trying to grab the + * prepare_lock for something like clk_prepare_enable() while + * clk_disable_unused_subtree() holds the prepare_lock and is trying to runtime + * PM resume/suspend the device as well. + * + * Context: Acquires the 'clk_rpm_list_lock' and returns with the lock held on + * success. Otherwise the lock is released on failure. + * + * Return: 0 on success, negative errno otherwise. + */ +static int clk_pm_runtime_get_all(void) +{ + int ret; + struct clk_core *core, *failed; + + /* + * Grab the list lock to prevent any new clks from being registered + * or unregistered until clk_pm_runtime_put_all(). + */ + mutex_lock(&clk_rpm_list_lock); + + /* + * Runtime PM "get" all the devices that are needed for the clks + * currently registered. Do this without holding the prepare_lock, to + * avoid the deadlock. + */ + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { + ret = clk_pm_runtime_get(core); + if (ret) { + failed = core; + pr_err("clk: Failed to runtime PM get '%s' for clk '%s'\n", + dev_name(failed->dev), failed->name); + goto err; + } + } + + return 0; + +err: + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { + if (core == failed) + break; + + clk_pm_runtime_put(core); + } + mutex_unlock(&clk_rpm_list_lock); + + return ret; +} + +/** + * clk_pm_runtime_put_all() - Runtime "put" all clk provider devices + * + * Put the runtime PM references taken in clk_pm_runtime_get_all() and release + * the 'clk_rpm_list_lock'. + */ +static void clk_pm_runtime_put_all(void) +{ + struct clk_core *core; + + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) + clk_pm_runtime_put(core); + mutex_unlock(&clk_rpm_list_lock); +} + +static void clk_pm_runtime_init(struct clk_core *core) +{ + struct device *dev = core->dev; + + if (dev && pm_runtime_enabled(dev)) { + core->rpm_enabled = true; + + mutex_lock(&clk_rpm_list_lock); + hlist_add_head(&core->rpm_node, &clk_rpm_list); + mutex_unlock(&clk_rpm_list_lock); + } +} + /*** locking ***/ static void clk_prepare_lock(void) { @@ -1310,9 +1398,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core) if (core->flags & CLK_IGNORE_UNUSED) return; - if (clk_pm_runtime_get(core)) - return; - if (clk_core_is_prepared(core)) { trace_clk_unprepare(core); if (core->ops->unprepare_unused) @@ -1321,8 +1406,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core) core->ops->unprepare(core->hw); trace_clk_unprepare_complete(core); } - - clk_pm_runtime_put(core); } static void __init clk_disable_unused_subtree(struct clk_core *core) @@ -1338,9 +1421,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core) if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_prepare_enable(core->parent); - if (clk_pm_runtime_get(core)) - goto unprepare_out; - flags = clk_enable_lock(); if (core->enable_count) @@ -1365,8 +1445,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core) unlock_out: clk_enable_unlock(flags); - clk_pm_runtime_put(core); -unprepare_out: if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_disable_unprepare(core->parent); } @@ -1382,6 +1460,7 @@ __setup("clk_ignore_unused", clk_ignore_unused_setup); static int __init clk_disable_unused(void) { struct clk_core *core; + int ret; if (clk_ignore_unused) { pr_warn("clk: Not disabling unused clocks\n"); @@ -1390,6 +1469,13 @@ static int __init clk_disable_unused(void) pr_info("clk: Disabling unused clocks\n"); + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; + /* + * Grab the prepare lock to keep the clk topology stable while iterating + * over clks. + */ clk_prepare_lock(); hlist_for_each_entry(core, &clk_root_list, child_node) @@ -1406,6 +1492,8 @@ static int __init clk_disable_unused(void) clk_prepare_unlock(); + clk_pm_runtime_put_all(); + return 0; } late_initcall_sync(clk_disable_unused); @@ -4071,6 +4159,12 @@ static void __clk_release(struct kref *ref) { struct clk_core *core = container_of(ref, struct clk_core, ref); + if (core->rpm_enabled) { + mutex_lock(&clk_rpm_list_lock); + hlist_del(&core->rpm_node); + mutex_unlock(&clk_rpm_list_lock); + } + clk_core_free_parent_map(core); kfree_const(core->name); kfree(core); @@ -4110,9 +4204,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) } core->ops = init->ops; - if (dev && pm_runtime_enabled(dev)) - core->rpm_enabled = true; core->dev = dev; + clk_pm_runtime_init(core); core->of_node = np; if (dev && dev->driver) core->owner = dev->driver->owner; -- GitLab From 5833b99cf8fda9475f3db16490e66aee9467df61 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Thu, 16 Mar 2023 15:58:26 +0800 Subject: [PATCH 1831/2290] clk: remove unnecessary (void*) conversions [ Upstream commit 5b1a1c1ab1f981b15bce778db863344f59bd1501 ] Pointer variables of void * type do not require type cast. Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20230316075826.22754-1-yuzhe@nfschina.com Signed-off-by: Stephen Boyd Stable-dep-of: 9d1e795f754d ("clk: Get runtime PM before walking tree for clk_summary") Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 75d8f7f0de9ba..bf4ac2f52d335 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3245,7 +3245,7 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, static int clk_summary_show(struct seq_file *s, void *data) { struct clk_core *c; - struct hlist_head **lists = (struct hlist_head **)s->private; + struct hlist_head **lists = s->private; seq_puts(s, " enable prepare protect duty hardware\n"); seq_puts(s, " clock count count count rate accuracy phase cycle enable\n"); @@ -3304,7 +3304,7 @@ static int clk_dump_show(struct seq_file *s, void *data) { struct clk_core *c; bool first_node = true; - struct hlist_head **lists = (struct hlist_head **)s->private; + struct hlist_head **lists = s->private; seq_putc(s, '{'); clk_prepare_lock(); -- GitLab From 5a704c267a210e1ea097e1d80bfc6620a706051d Mon Sep 17 00:00:00 2001 From: Vishal Badole Date: Sun, 27 Nov 2022 22:53:19 +0530 Subject: [PATCH 1832/2290] clk: Show active consumers of clocks in debugfs [ Upstream commit dcce5cc7826e9c6b3a2443e5e6b7f8d02a103c35 ] This feature lists the clock consumer's name and respective connection id. Using this feature user can easily check that which user has acquired and enabled a particular clock. Usage: >> cat /sys/kernel/debug/clk/clk_summary enable prepare protect duty hardware Connection clock count count count rate accuracy phase cycle enable consumer Id ------------------------------------------------------------------------------------------------------------------------------ clk_mcasp0_fixed 0 0 0 24576000 0 0 50000 Y deviceless of_clk_get_from_provider deviceless no_connection_id clk_mcasp0 0 0 0 24576000 0 0 50000 N simple-audio-card,cpu no_connection_id deviceless no_connection_id Co-developed-by: Chinmoy Ghosh Signed-off-by: Chinmoy Ghosh Co-developed-by: Mintu Patel Signed-off-by: Mintu Patel Co-developed-by: Vimal Kumar Signed-off-by: Vimal Kumar Signed-off-by: Vishal Badole Link: https://lore.kernel.org/r/1669569799-8526-1-git-send-email-badolevishal1116@gmail.com Signed-off-by: Stephen Boyd Stable-dep-of: 9d1e795f754d ("clk: Get runtime PM before walking tree for clk_summary") Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index bf4ac2f52d335..ded4a51323d2e 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3205,28 +3205,41 @@ static void clk_summary_show_one(struct seq_file *s, struct clk_core *c, int level) { int phase; + struct clk *clk_user; + int multi_node = 0; - seq_printf(s, "%*s%-*s %7d %8d %8d %11lu %10lu ", + seq_printf(s, "%*s%-*s %-7d %-8d %-8d %-11lu %-10lu ", level * 3 + 1, "", - 30 - level * 3, c->name, + 35 - level * 3, c->name, c->enable_count, c->prepare_count, c->protect_count, clk_core_get_rate_recalc(c), clk_core_get_accuracy_recalc(c)); phase = clk_core_get_phase(c); if (phase >= 0) - seq_printf(s, "%5d", phase); + seq_printf(s, "%-5d", phase); else seq_puts(s, "-----"); - seq_printf(s, " %6d", clk_core_get_scaled_duty_cycle(c, 100000)); + seq_printf(s, " %-6d", clk_core_get_scaled_duty_cycle(c, 100000)); if (c->ops->is_enabled) - seq_printf(s, " %9c\n", clk_core_is_enabled(c) ? 'Y' : 'N'); + seq_printf(s, " %5c ", clk_core_is_enabled(c) ? 'Y' : 'N'); else if (!c->ops->enable) - seq_printf(s, " %9c\n", 'Y'); + seq_printf(s, " %5c ", 'Y'); else - seq_printf(s, " %9c\n", '?'); + seq_printf(s, " %5c ", '?'); + + hlist_for_each_entry(clk_user, &c->clks, clks_node) { + seq_printf(s, "%*s%-*s %-25s\n", + level * 3 + 2 + 105 * multi_node, "", + 30, + clk_user->dev_id ? clk_user->dev_id : "deviceless", + clk_user->con_id ? clk_user->con_id : "no_connection_id"); + + multi_node = 1; + } + } static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, @@ -3247,9 +3260,10 @@ static int clk_summary_show(struct seq_file *s, void *data) struct clk_core *c; struct hlist_head **lists = s->private; - seq_puts(s, " enable prepare protect duty hardware\n"); - seq_puts(s, " clock count count count rate accuracy phase cycle enable\n"); - seq_puts(s, "-------------------------------------------------------------------------------------------------------\n"); + seq_puts(s, " enable prepare protect duty hardware connection\n"); + seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); + seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); + clk_prepare_lock(); -- GitLab From 83ada89e4a86e2b28ea2b5113c76d6dc7560a4d0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:59 -0700 Subject: [PATCH 1833/2290] clk: Get runtime PM before walking tree for clk_summary [ Upstream commit 9d1e795f754db1ac3344528b7af0b17b8146f321 ] Similar to the previous commit, we should make sure that all devices are runtime resumed before printing the clk_summary through debugfs. Failure to do so would result in a deadlock if the thread is resuming a device to print clk state and that device is also runtime resuming in another thread, e.g the screen is turning on and the display driver is starting up. We remove the calls to clk_pm_runtime_{get,put}() in this path because they're superfluous now that we know the devices are runtime resumed. This also squashes a bug where the return value of clk_pm_runtime_get() wasn't checked, leading to an RPM count underflow on error paths. Fixes: 1bb294a7981c ("clk: Enable/Disable runtime PM for clk_summary") Cc: Taniya Das Cc: Douglas Anderson Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-6-sboyd@kernel.org Reviewed-by: Douglas Anderson Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index ded4a51323d2e..fe1d45eac837c 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3247,9 +3247,7 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, { struct clk_core *child; - clk_pm_runtime_get(c); clk_summary_show_one(s, c, level); - clk_pm_runtime_put(c); hlist_for_each_entry(child, &c->children, child_node) clk_summary_show_subtree(s, child, level + 1); @@ -3259,11 +3257,15 @@ static int clk_summary_show(struct seq_file *s, void *data) { struct clk_core *c; struct hlist_head **lists = s->private; + int ret; seq_puts(s, " enable prepare protect duty hardware connection\n"); seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; clk_prepare_lock(); @@ -3272,6 +3274,7 @@ static int clk_summary_show(struct seq_file *s, void *data) clk_summary_show_subtree(s, c, 0); clk_prepare_unlock(); + clk_pm_runtime_put_all(); return 0; } @@ -3319,8 +3322,14 @@ static int clk_dump_show(struct seq_file *s, void *data) struct clk_core *c; bool first_node = true; struct hlist_head **lists = s->private; + int ret; + + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; seq_putc(s, '{'); + clk_prepare_lock(); for (; *lists; lists++) { @@ -3333,6 +3342,7 @@ static int clk_dump_show(struct seq_file *s, void *data) } clk_prepare_unlock(); + clk_pm_runtime_put_all(); seq_puts(s, "}\n"); return 0; -- GitLab From 0904f9ef910a4a34d9380e36edc4c73348882464 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:31 +0100 Subject: [PATCH 1834/2290] clk: mediatek: mt8192: Correctly unregister and free clocks on failure [ Upstream commit 0cbe12694990501be92f997d987925132002dbe5 ] If anything fails during probe of the clock controller(s), unregister (and kfree!) whatever we have previously registered to leave with a clean state and prevent leaks. Fixes: 710573dee31b ("clk: mediatek: Add MT8192 basic clocks support") Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Reviewed-by: Markus Schneider-Pargmann Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-2-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt8192.c | 77 ++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 17 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index d0f2269310706..74bd8bac94a35 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1100,27 +1100,64 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); - mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), top_clk_data); - mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data); - mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); - mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, &mt8192_clk_lock, - top_clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, &mt8192_clk_lock, - top_clk_data); - mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt8192_clk_lock, - top_clk_data); - r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); + r = mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), top_clk_data); if (r) return r; + r = mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data); + if (r) + goto unregister_fixed_clks; + + r = mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); + if (r) + goto unregister_early_factors; + + r = mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, + &mt8192_clk_lock, top_clk_data); + if (r) + goto unregister_factors; + + r = mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, + &mt8192_clk_lock, top_clk_data); + if (r) + goto unregister_muxes; + + r = mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, + &mt8192_clk_lock, top_clk_data); + if (r) + goto unregister_top_composites; + + r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); + if (r) + goto unregister_adj_divs_composites; + r = clk_mt8192_reg_mfg_mux_notifier(&pdev->dev, top_clk_data->hws[CLK_TOP_MFG_PLL_SEL]->clk); if (r) - return r; - + goto unregister_gates; - return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, + r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, top_clk_data); + if (r) + goto unregister_gates; + + return 0; + +unregister_gates: + mtk_clk_unregister_gates(top_clks, ARRAY_SIZE(top_clks), top_clk_data); +unregister_adj_divs_composites: + mtk_clk_unregister_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), top_clk_data); +unregister_top_composites: + mtk_clk_unregister_composites(top_muxes, ARRAY_SIZE(top_muxes), top_clk_data); +unregister_muxes: + mtk_clk_unregister_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), top_clk_data); +unregister_factors: + mtk_clk_unregister_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); +unregister_early_factors: + mtk_clk_unregister_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data); +unregister_fixed_clks: + mtk_clk_unregister_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), top_clk_data); + return r; } static int clk_mt8192_infra_probe(struct platform_device *pdev) @@ -1139,14 +1176,16 @@ static int clk_mt8192_infra_probe(struct platform_device *pdev) r = mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc); if (r) - goto free_clk_data; + goto unregister_gates; r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) - goto free_clk_data; + goto unregister_gates; return r; +unregister_gates: + mtk_clk_unregister_gates(infra_clks, ARRAY_SIZE(infra_clks), clk_data); free_clk_data: mtk_free_clk_data(clk_data); return r; @@ -1168,10 +1207,12 @@ static int clk_mt8192_peri_probe(struct platform_device *pdev) r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) - goto free_clk_data; + goto unregister_gates; return r; +unregister_gates: + mtk_clk_unregister_gates(peri_clks, ARRAY_SIZE(peri_clks), clk_data); free_clk_data: mtk_free_clk_data(clk_data); return r; @@ -1194,10 +1235,12 @@ static int clk_mt8192_apmixed_probe(struct platform_device *pdev) r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) - goto free_clk_data; + goto unregister_gates; return r; +unregister_gates: + mtk_clk_unregister_gates(apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); free_clk_data: mtk_free_clk_data(clk_data); return r; -- GitLab From 647a25b07d6d0e190b19b2ce3e674435f1d369b8 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:32 +0100 Subject: [PATCH 1835/2290] clk: mediatek: mt8192: Propagate struct device for gate clocks [ Upstream commit fdc325c8f79cb4155009db8394db19793c4d07cd ] Convert instances of mtk_clk_register_gates() to use the newer mtk_clk_register_gates_with_dev() to propagate struct device to the clk framework. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-3-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt8192.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index 74bd8bac94a35..508af9bbcc46c 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1127,7 +1127,8 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (r) goto unregister_top_composites; - r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); + r = mtk_clk_register_gates_with_dev(node, top_clks, ARRAY_SIZE(top_clks), + top_clk_data, &pdev->dev); if (r) goto unregister_adj_divs_composites; @@ -1170,7 +1171,8 @@ static int clk_mt8192_infra_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), clk_data); + r = mtk_clk_register_gates_with_dev(node, infra_clks, ARRAY_SIZE(infra_clks), + clk_data, &pdev->dev); if (r) goto free_clk_data; @@ -1201,7 +1203,8 @@ static int clk_mt8192_peri_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); + r = mtk_clk_register_gates_with_dev(node, peri_clks, ARRAY_SIZE(peri_clks), + clk_data, &pdev->dev); if (r) goto free_clk_data; @@ -1229,7 +1232,9 @@ static int clk_mt8192_apmixed_probe(struct platform_device *pdev) return -ENOMEM; mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - r = mtk_clk_register_gates(node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); + r = mtk_clk_register_gates_with_dev(node, apmixed_clks, + ARRAY_SIZE(apmixed_clks), clk_data, + &pdev->dev); if (r) goto free_clk_data; -- GitLab From 082b831488a41257b7ac7ffa1d80a0b60d98394d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:33 +0100 Subject: [PATCH 1836/2290] clk: mediatek: clk-gate: Propagate struct device with mtk_clk_register_gates() [ Upstream commit 20498d52c9c1a68b1d92c42bce1dc893d3e74f30 ] Commit e4c23e19aa2a ("clk: mediatek: Register clock gate with device") introduces a helper function for the sole purpose of propagating a struct device pointer to the clk API when registering the mtk-gate clocks to take advantage of Runtime PM when/where needed and where a power domain is defined in devicetree. Function mtk_clk_register_gates() then becomes a wrapper around the new mtk_clk_register_gates_with_dev() function that will simply pass NULL as struct device: this is essential when registering drivers with CLK_OF_DECLARE instead of as a platform device, as there will be no struct device to pass... but we can as well simply have only one function that always takes such pointer as a param and pass NULL when unavoidable. This commit removes the mtk_clk_register_gates() wrapper and renames mtk_clk_register_gates_with_dev() to the former and all of the calls to either of the two functions were fixed in all drivers in order to reflect this change; also, to improve consistency with other kernel functions, the pointer to struct device was moved as the first param. Since a lot of MediaTek clock drivers are actually registering as a platform device, but were still registering the mtk-gate clocks without passing any struct device to the clock framework, they've been changed to pass a valid one now, as to make all those platforms able to use runtime power management where available. While at it, some much needed indentation changes were also done. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Reviewed-by: Markus Schneider-Pargmann Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-4-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-gate.c | 23 ++++++------------- drivers/clk/mediatek/clk-gate.h | 7 +----- drivers/clk/mediatek/clk-mt2701-aud.c | 4 ++-- drivers/clk/mediatek/clk-mt2701-eth.c | 4 ++-- drivers/clk/mediatek/clk-mt2701-g3d.c | 2 +- drivers/clk/mediatek/clk-mt2701-hif.c | 4 ++-- drivers/clk/mediatek/clk-mt2701-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt2701.c | 12 +++++----- drivers/clk/mediatek/clk-mt2712-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt2712.c | 12 +++++----- drivers/clk/mediatek/clk-mt6765.c | 10 ++++---- drivers/clk/mediatek/clk-mt6779-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt6779.c | 6 ++--- drivers/clk/mediatek/clk-mt6795-infracfg.c | 3 ++- drivers/clk/mediatek/clk-mt6795-mm.c | 3 ++- drivers/clk/mediatek/clk-mt6795-pericfg.c | 3 ++- drivers/clk/mediatek/clk-mt6797-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt6797.c | 4 ++-- drivers/clk/mediatek/clk-mt7622-aud.c | 4 ++-- drivers/clk/mediatek/clk-mt7622-eth.c | 8 +++---- drivers/clk/mediatek/clk-mt7622-hif.c | 8 +++---- drivers/clk/mediatek/clk-mt7622.c | 14 ++++++------ drivers/clk/mediatek/clk-mt7629-eth.c | 7 +++--- drivers/clk/mediatek/clk-mt7629-hif.c | 8 +++---- drivers/clk/mediatek/clk-mt7629.c | 10 ++++---- drivers/clk/mediatek/clk-mt7986-eth.c | 10 ++++---- drivers/clk/mediatek/clk-mt7986-infracfg.c | 4 ++-- drivers/clk/mediatek/clk-mt8135.c | 8 +++---- drivers/clk/mediatek/clk-mt8167-aud.c | 2 +- drivers/clk/mediatek/clk-mt8167-img.c | 2 +- drivers/clk/mediatek/clk-mt8167-mfgcfg.c | 2 +- drivers/clk/mediatek/clk-mt8167-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt8167-vdec.c | 3 ++- drivers/clk/mediatek/clk-mt8167.c | 2 +- drivers/clk/mediatek/clk-mt8173-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt8173.c | 24 ++++++++++---------- drivers/clk/mediatek/clk-mt8183-audio.c | 4 ++-- drivers/clk/mediatek/clk-mt8183-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt8183.c | 16 ++++++------- drivers/clk/mediatek/clk-mt8186-mm.c | 3 ++- drivers/clk/mediatek/clk-mt8192-aud.c | 3 ++- drivers/clk/mediatek/clk-mt8192-mm.c | 3 ++- drivers/clk/mediatek/clk-mt8192.c | 17 +++++++------- drivers/clk/mediatek/clk-mt8195-apmixedsys.c | 3 ++- drivers/clk/mediatek/clk-mt8195-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8195-vdo0.c | 3 ++- drivers/clk/mediatek/clk-mt8195-vdo1.c | 3 ++- drivers/clk/mediatek/clk-mt8365-mm.c | 5 ++-- drivers/clk/mediatek/clk-mt8365.c | 4 ++-- drivers/clk/mediatek/clk-mt8516-aud.c | 2 +- drivers/clk/mediatek/clk-mt8516.c | 2 +- drivers/clk/mediatek/clk-mtk.c | 4 ++-- 52 files changed, 156 insertions(+), 160 deletions(-) diff --git a/drivers/clk/mediatek/clk-gate.c b/drivers/clk/mediatek/clk-gate.c index 0c867136e49d7..67d9e741c5e73 100644 --- a/drivers/clk/mediatek/clk-gate.c +++ b/drivers/clk/mediatek/clk-gate.c @@ -152,12 +152,12 @@ const struct clk_ops mtk_clk_gate_ops_no_setclr_inv = { }; EXPORT_SYMBOL_GPL(mtk_clk_gate_ops_no_setclr_inv); -static struct clk_hw *mtk_clk_register_gate(const char *name, +static struct clk_hw *mtk_clk_register_gate(struct device *dev, const char *name, const char *parent_name, struct regmap *regmap, int set_ofs, int clr_ofs, int sta_ofs, u8 bit, const struct clk_ops *ops, - unsigned long flags, struct device *dev) + unsigned long flags) { struct mtk_clk_gate *cg; int ret; @@ -202,10 +202,9 @@ static void mtk_clk_unregister_gate(struct clk_hw *hw) kfree(cg); } -int mtk_clk_register_gates_with_dev(struct device_node *node, - const struct mtk_gate *clks, int num, - struct clk_hw_onecell_data *clk_data, - struct device *dev) +int mtk_clk_register_gates(struct device *dev, struct device_node *node, + const struct mtk_gate *clks, int num, + struct clk_hw_onecell_data *clk_data) { int i; struct clk_hw *hw; @@ -229,13 +228,13 @@ int mtk_clk_register_gates_with_dev(struct device_node *node, continue; } - hw = mtk_clk_register_gate(gate->name, gate->parent_name, + hw = mtk_clk_register_gate(dev, gate->name, gate->parent_name, regmap, gate->regs->set_ofs, gate->regs->clr_ofs, gate->regs->sta_ofs, gate->shift, gate->ops, - gate->flags, dev); + gate->flags); if (IS_ERR(hw)) { pr_err("Failed to register clk %s: %pe\n", gate->name, @@ -261,14 +260,6 @@ err: return PTR_ERR(hw); } -EXPORT_SYMBOL_GPL(mtk_clk_register_gates_with_dev); - -int mtk_clk_register_gates(struct device_node *node, - const struct mtk_gate *clks, int num, - struct clk_hw_onecell_data *clk_data) -{ - return mtk_clk_register_gates_with_dev(node, clks, num, clk_data, NULL); -} EXPORT_SYMBOL_GPL(mtk_clk_register_gates); void mtk_clk_unregister_gates(const struct mtk_gate *clks, int num, diff --git a/drivers/clk/mediatek/clk-gate.h b/drivers/clk/mediatek/clk-gate.h index d9897ef535284..1a46b4c56fc5d 100644 --- a/drivers/clk/mediatek/clk-gate.h +++ b/drivers/clk/mediatek/clk-gate.h @@ -50,15 +50,10 @@ struct mtk_gate { #define GATE_MTK(_id, _name, _parent, _regs, _shift, _ops) \ GATE_MTK_FLAGS(_id, _name, _parent, _regs, _shift, _ops, 0) -int mtk_clk_register_gates(struct device_node *node, +int mtk_clk_register_gates(struct device *dev, struct device_node *node, const struct mtk_gate *clks, int num, struct clk_hw_onecell_data *clk_data); -int mtk_clk_register_gates_with_dev(struct device_node *node, - const struct mtk_gate *clks, int num, - struct clk_hw_onecell_data *clk_data, - struct device *dev); - void mtk_clk_unregister_gates(const struct mtk_gate *clks, int num, struct clk_hw_onecell_data *clk_data); diff --git a/drivers/clk/mediatek/clk-mt2701-aud.c b/drivers/clk/mediatek/clk-mt2701-aud.c index 4287bd3f545ee..03ab212aa7f4e 100644 --- a/drivers/clk/mediatek/clk-mt2701-aud.c +++ b/drivers/clk/mediatek/clk-mt2701-aud.c @@ -127,8 +127,8 @@ static int clk_mt2701_aud_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_AUD_NR); - mtk_clk_register_gates(node, audio_clks, ARRAY_SIZE(audio_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, audio_clks, + ARRAY_SIZE(audio_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { diff --git a/drivers/clk/mediatek/clk-mt2701-eth.c b/drivers/clk/mediatek/clk-mt2701-eth.c index 601358748750e..924725d67c13e 100644 --- a/drivers/clk/mediatek/clk-mt2701-eth.c +++ b/drivers/clk/mediatek/clk-mt2701-eth.c @@ -51,8 +51,8 @@ static int clk_mt2701_eth_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_ETHSYS_NR); - mtk_clk_register_gates(node, eth_clks, ARRAY_SIZE(eth_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, eth_clks, + ARRAY_SIZE(eth_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt2701-g3d.c b/drivers/clk/mediatek/clk-mt2701-g3d.c index 8d1fc8e3336eb..501fb99bb41a2 100644 --- a/drivers/clk/mediatek/clk-mt2701-g3d.c +++ b/drivers/clk/mediatek/clk-mt2701-g3d.c @@ -45,7 +45,7 @@ static int clk_mt2701_g3dsys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_G3DSYS_NR); - mtk_clk_register_gates(node, g3d_clks, ARRAY_SIZE(g3d_clks), + mtk_clk_register_gates(&pdev->dev, node, g3d_clks, ARRAY_SIZE(g3d_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt2701-hif.c b/drivers/clk/mediatek/clk-mt2701-hif.c index edeeb033a2350..1ddefc21d6a0d 100644 --- a/drivers/clk/mediatek/clk-mt2701-hif.c +++ b/drivers/clk/mediatek/clk-mt2701-hif.c @@ -48,8 +48,8 @@ static int clk_mt2701_hif_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_HIFSYS_NR); - mtk_clk_register_gates(node, hif_clks, ARRAY_SIZE(hif_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, hif_clks, + ARRAY_SIZE(hif_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { diff --git a/drivers/clk/mediatek/clk-mt2701-mm.c b/drivers/clk/mediatek/clk-mt2701-mm.c index eb069f3bc9a2b..f4885dffb324f 100644 --- a/drivers/clk/mediatek/clk-mt2701-mm.c +++ b/drivers/clk/mediatek/clk-mt2701-mm.c @@ -76,8 +76,8 @@ static int clk_mt2701_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt2701.c b/drivers/clk/mediatek/clk-mt2701.c index 00d2e81bdd43e..c7510f7ba4cc9 100644 --- a/drivers/clk/mediatek/clk-mt2701.c +++ b/drivers/clk/mediatek/clk-mt2701.c @@ -685,8 +685,8 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt2701_clk_lock, clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } @@ -789,8 +789,8 @@ static int mtk_infrasys_init(struct platform_device *pdev) } } - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - infra_clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), infra_clk_data); mtk_clk_register_factors(infra_fixed_divs, ARRAY_SIZE(infra_fixed_divs), infra_clk_data); @@ -902,8 +902,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); mtk_clk_register_composites(peri_muxs, ARRAY_SIZE(peri_muxs), base, &mt2701_clk_lock, clk_data); diff --git a/drivers/clk/mediatek/clk-mt2712-mm.c b/drivers/clk/mediatek/clk-mt2712-mm.c index ad6daa8f28a83..e5264f1ce60d0 100644 --- a/drivers/clk/mediatek/clk-mt2712-mm.c +++ b/drivers/clk/mediatek/clk-mt2712-mm.c @@ -117,8 +117,8 @@ static int clk_mt2712_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt2712.c b/drivers/clk/mediatek/clk-mt2712.c index d6c2cc183b1a1..78ebb4f2335c1 100644 --- a/drivers/clk/mediatek/clk-mt2712.c +++ b/drivers/clk/mediatek/clk-mt2712.c @@ -1324,8 +1324,8 @@ static int clk_mt2712_top_probe(struct platform_device *pdev) &mt2712_clk_lock, top_clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt2712_clk_lock, top_clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - top_clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), top_clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, top_clk_data); @@ -1344,8 +1344,8 @@ static int clk_mt2712_infra_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -1366,8 +1366,8 @@ static int clk_mt2712_peri_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt6765.c b/drivers/clk/mediatek/clk-mt6765.c index 2c6a52ff5564e..4a7bc6e04580d 100644 --- a/drivers/clk/mediatek/clk-mt6765.c +++ b/drivers/clk/mediatek/clk-mt6765.c @@ -743,7 +743,7 @@ static int clk_mt6765_apmixed_probe(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -784,8 +784,8 @@ static int clk_mt6765_top_probe(struct platform_device *pdev) clk_data); mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, &mt6765_clk_lock, clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -820,8 +820,8 @@ static int clk_mt6765_ifr_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, ifr_clks, ARRAY_SIZE(ifr_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, ifr_clks, + ARRAY_SIZE(ifr_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt6779-mm.c b/drivers/clk/mediatek/clk-mt6779-mm.c index eda8cbee3d234..2cccf62d3b36f 100644 --- a/drivers/clk/mediatek/clk-mt6779-mm.c +++ b/drivers/clk/mediatek/clk-mt6779-mm.c @@ -93,8 +93,8 @@ static int clk_mt6779_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt6779.c b/drivers/clk/mediatek/clk-mt6779.c index 39dadc9547088..5a396d2464ce5 100644 --- a/drivers/clk/mediatek/clk-mt6779.c +++ b/drivers/clk/mediatek/clk-mt6779.c @@ -1223,7 +1223,7 @@ static int clk_mt6779_apmixed_probe(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -1267,8 +1267,8 @@ static int clk_mt6779_infra_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt6795-infracfg.c b/drivers/clk/mediatek/clk-mt6795-infracfg.c index df7eed6e071e3..8025d171d6923 100644 --- a/drivers/clk/mediatek/clk-mt6795-infracfg.c +++ b/drivers/clk/mediatek/clk-mt6795-infracfg.c @@ -101,7 +101,8 @@ static int clk_mt6795_infracfg_probe(struct platform_device *pdev) if (ret) goto free_clk_data; - ret = mtk_clk_register_gates(node, infra_gates, ARRAY_SIZE(infra_gates), clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, infra_gates, + ARRAY_SIZE(infra_gates), clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt6795-mm.c b/drivers/clk/mediatek/clk-mt6795-mm.c index fd73f202f2925..eebb6143ada22 100644 --- a/drivers/clk/mediatek/clk-mt6795-mm.c +++ b/drivers/clk/mediatek/clk-mt6795-mm.c @@ -87,7 +87,8 @@ static int clk_mt6795_mm_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - ret = mtk_clk_register_gates(node, mm_gates, ARRAY_SIZE(mm_gates), clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, mm_gates, + ARRAY_SIZE(mm_gates), clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt6795-pericfg.c b/drivers/clk/mediatek/clk-mt6795-pericfg.c index cb28d35dad59b..f69e715e0c1f3 100644 --- a/drivers/clk/mediatek/clk-mt6795-pericfg.c +++ b/drivers/clk/mediatek/clk-mt6795-pericfg.c @@ -109,7 +109,8 @@ static int clk_mt6795_pericfg_probe(struct platform_device *pdev) if (ret) goto free_clk_data; - ret = mtk_clk_register_gates(node, peri_gates, ARRAY_SIZE(peri_gates), clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, peri_gates, + ARRAY_SIZE(peri_gates), clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt6797-mm.c b/drivers/clk/mediatek/clk-mt6797-mm.c index 99a63f46642fa..d5e9fe445e308 100644 --- a/drivers/clk/mediatek/clk-mt6797-mm.c +++ b/drivers/clk/mediatek/clk-mt6797-mm.c @@ -89,8 +89,8 @@ static int clk_mt6797_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt6797.c b/drivers/clk/mediatek/clk-mt6797.c index b362e99c8f53c..29211744b1736 100644 --- a/drivers/clk/mediatek/clk-mt6797.c +++ b/drivers/clk/mediatek/clk-mt6797.c @@ -584,8 +584,8 @@ static int mtk_infrasys_init(struct platform_device *pdev) } } - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - infra_clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), infra_clk_data); mtk_clk_register_factors(infra_fixed_divs, ARRAY_SIZE(infra_fixed_divs), infra_clk_data); diff --git a/drivers/clk/mediatek/clk-mt7622-aud.c b/drivers/clk/mediatek/clk-mt7622-aud.c index b17731fa11445..e9070d0bea8d6 100644 --- a/drivers/clk/mediatek/clk-mt7622-aud.c +++ b/drivers/clk/mediatek/clk-mt7622-aud.c @@ -114,8 +114,8 @@ static int clk_mt7622_audiosys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_AUDIO_NR_CLK); - mtk_clk_register_gates(node, audio_clks, ARRAY_SIZE(audio_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, audio_clks, + ARRAY_SIZE(audio_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { diff --git a/drivers/clk/mediatek/clk-mt7622-eth.c b/drivers/clk/mediatek/clk-mt7622-eth.c index a60190e834186..ece0f7a7c5f62 100644 --- a/drivers/clk/mediatek/clk-mt7622-eth.c +++ b/drivers/clk/mediatek/clk-mt7622-eth.c @@ -69,8 +69,8 @@ static int clk_mt7622_ethsys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_ETH_NR_CLK); - mtk_clk_register_gates(node, eth_clks, ARRAY_SIZE(eth_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, eth_clks, + ARRAY_SIZE(eth_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -91,8 +91,8 @@ static int clk_mt7622_sgmiisys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_SGMII_NR_CLK); - mtk_clk_register_gates(node, sgmii_clks, ARRAY_SIZE(sgmii_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, sgmii_clks, + ARRAY_SIZE(sgmii_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt7622-hif.c b/drivers/clk/mediatek/clk-mt7622-hif.c index 55baa6d06a205..c57ac2273c4e2 100644 --- a/drivers/clk/mediatek/clk-mt7622-hif.c +++ b/drivers/clk/mediatek/clk-mt7622-hif.c @@ -80,8 +80,8 @@ static int clk_mt7622_ssusbsys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_SSUSB_NR_CLK); - mtk_clk_register_gates(node, ssusb_clks, ARRAY_SIZE(ssusb_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, ssusb_clks, + ARRAY_SIZE(ssusb_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -102,8 +102,8 @@ static int clk_mt7622_pciesys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PCIE_NR_CLK); - mtk_clk_register_gates(node, pcie_clks, ARRAY_SIZE(pcie_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, pcie_clks, + ARRAY_SIZE(pcie_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt7622.c b/drivers/clk/mediatek/clk-mt7622.c index eebbb87906930..bba88018f056a 100644 --- a/drivers/clk/mediatek/clk-mt7622.c +++ b/drivers/clk/mediatek/clk-mt7622.c @@ -621,8 +621,8 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt7622_clk_lock, clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } @@ -635,8 +635,8 @@ static int mtk_infrasys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); mtk_clk_register_cpumuxes(node, infra_muxes, ARRAY_SIZE(infra_muxes), clk_data); @@ -663,7 +663,7 @@ static int mtk_apmixedsys_init(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -682,8 +682,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); mtk_clk_register_composites(peri_muxes, ARRAY_SIZE(peri_muxes), base, &mt7622_clk_lock, clk_data); diff --git a/drivers/clk/mediatek/clk-mt7629-eth.c b/drivers/clk/mediatek/clk-mt7629-eth.c index e1d2635c72c10..eab838af6d413 100644 --- a/drivers/clk/mediatek/clk-mt7629-eth.c +++ b/drivers/clk/mediatek/clk-mt7629-eth.c @@ -82,7 +82,8 @@ static int clk_mt7629_ethsys_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, eth_clks, CLK_ETH_NR_CLK, clk_data); + mtk_clk_register_gates(&pdev->dev, node, eth_clks, + CLK_ETH_NR_CLK, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -106,8 +107,8 @@ static int clk_mt7629_sgmiisys_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, sgmii_clks[id++], CLK_SGMII_NR_CLK, - clk_data); + mtk_clk_register_gates(&pdev->dev, node, sgmii_clks[id++], + CLK_SGMII_NR_CLK, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt7629-hif.c b/drivers/clk/mediatek/clk-mt7629-hif.c index 3628811a2f57f..804900792e490 100644 --- a/drivers/clk/mediatek/clk-mt7629-hif.c +++ b/drivers/clk/mediatek/clk-mt7629-hif.c @@ -75,8 +75,8 @@ static int clk_mt7629_ssusbsys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_SSUSB_NR_CLK); - mtk_clk_register_gates(node, ssusb_clks, ARRAY_SIZE(ssusb_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, ssusb_clks, + ARRAY_SIZE(ssusb_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -97,8 +97,8 @@ static int clk_mt7629_pciesys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PCIE_NR_CLK); - mtk_clk_register_gates(node, pcie_clks, ARRAY_SIZE(pcie_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, pcie_clks, + ARRAY_SIZE(pcie_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt7629.c b/drivers/clk/mediatek/clk-mt7629.c index 01ee45fcd7e34..c0cdaf0242961 100644 --- a/drivers/clk/mediatek/clk-mt7629.c +++ b/drivers/clk/mediatek/clk-mt7629.c @@ -585,8 +585,8 @@ static int mtk_infrasys_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); mtk_clk_register_cpumuxes(node, infra_muxes, ARRAY_SIZE(infra_muxes), clk_data); @@ -610,8 +610,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); mtk_clk_register_composites(peri_muxes, ARRAY_SIZE(peri_muxes), base, &mt7629_clk_lock, clk_data); @@ -637,7 +637,7 @@ static int mtk_apmixedsys_init(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); clk_prepare_enable(clk_data->hws[CLK_APMIXED_ARMPLL]->clk); diff --git a/drivers/clk/mediatek/clk-mt7986-eth.c b/drivers/clk/mediatek/clk-mt7986-eth.c index c21e1d672384a..e04bc6845ea6d 100644 --- a/drivers/clk/mediatek/clk-mt7986-eth.c +++ b/drivers/clk/mediatek/clk-mt7986-eth.c @@ -72,8 +72,8 @@ static void __init mtk_sgmiisys_0_init(struct device_node *node) clk_data = mtk_alloc_clk_data(ARRAY_SIZE(sgmii0_clks)); - mtk_clk_register_gates(node, sgmii0_clks, ARRAY_SIZE(sgmii0_clks), - clk_data); + mtk_clk_register_gates(NULL, node, sgmii0_clks, + ARRAY_SIZE(sgmii0_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -90,8 +90,8 @@ static void __init mtk_sgmiisys_1_init(struct device_node *node) clk_data = mtk_alloc_clk_data(ARRAY_SIZE(sgmii1_clks)); - mtk_clk_register_gates(node, sgmii1_clks, ARRAY_SIZE(sgmii1_clks), - clk_data); + mtk_clk_register_gates(NULL, node, sgmii1_clks, + ARRAY_SIZE(sgmii1_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -109,7 +109,7 @@ static void __init mtk_ethsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(ARRAY_SIZE(eth_clks)); - mtk_clk_register_gates(node, eth_clks, ARRAY_SIZE(eth_clks), clk_data); + mtk_clk_register_gates(NULL, node, eth_clks, ARRAY_SIZE(eth_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt7986-infracfg.c b/drivers/clk/mediatek/clk-mt7986-infracfg.c index 74e68a7197301..578f150e0ee52 100644 --- a/drivers/clk/mediatek/clk-mt7986-infracfg.c +++ b/drivers/clk/mediatek/clk-mt7986-infracfg.c @@ -180,8 +180,8 @@ static int clk_mt7986_infracfg_probe(struct platform_device *pdev) mtk_clk_register_factors(infra_divs, ARRAY_SIZE(infra_divs), clk_data); mtk_clk_register_muxes(infra_muxes, ARRAY_SIZE(infra_muxes), node, &mt7986_clk_lock, clk_data); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { diff --git a/drivers/clk/mediatek/clk-mt8135.c b/drivers/clk/mediatek/clk-mt8135.c index 3ea06d2ec2f11..8137cf2252724 100644 --- a/drivers/clk/mediatek/clk-mt8135.c +++ b/drivers/clk/mediatek/clk-mt8135.c @@ -553,8 +553,8 @@ static void __init mtk_infrasys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(NULL, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -579,8 +579,8 @@ static void __init mtk_pericfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_gates, ARRAY_SIZE(peri_gates), - clk_data); + mtk_clk_register_gates(NULL, node, peri_gates, + ARRAY_SIZE(peri_gates), clk_data); mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, &mt8135_clk_lock, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8167-aud.c b/drivers/clk/mediatek/clk-mt8167-aud.c index b5ac196cd9454..47a7d89d5777c 100644 --- a/drivers/clk/mediatek/clk-mt8167-aud.c +++ b/drivers/clk/mediatek/clk-mt8167-aud.c @@ -50,7 +50,7 @@ static void __init mtk_audsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_AUD_NR_CLK); - mtk_clk_register_gates(node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); + mtk_clk_register_gates(NULL, node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8167-img.c b/drivers/clk/mediatek/clk-mt8167-img.c index 4e7c0772b4f99..e196b3b894a16 100644 --- a/drivers/clk/mediatek/clk-mt8167-img.c +++ b/drivers/clk/mediatek/clk-mt8167-img.c @@ -42,7 +42,7 @@ static void __init mtk_imgsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_IMG_NR_CLK); - mtk_clk_register_gates(node, img_clks, ARRAY_SIZE(img_clks), clk_data); + mtk_clk_register_gates(NULL, node, img_clks, ARRAY_SIZE(img_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8167-mfgcfg.c b/drivers/clk/mediatek/clk-mt8167-mfgcfg.c index 192714498b2ec..602d25f4cb2e2 100644 --- a/drivers/clk/mediatek/clk-mt8167-mfgcfg.c +++ b/drivers/clk/mediatek/clk-mt8167-mfgcfg.c @@ -40,7 +40,7 @@ static void __init mtk_mfgcfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_MFG_NR_CLK); - mtk_clk_register_gates(node, mfg_clks, ARRAY_SIZE(mfg_clks), clk_data); + mtk_clk_register_gates(NULL, node, mfg_clks, ARRAY_SIZE(mfg_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8167-mm.c b/drivers/clk/mediatek/clk-mt8167-mm.c index a94961b7b8cc6..abc70e1221bf9 100644 --- a/drivers/clk/mediatek/clk-mt8167-mm.c +++ b/drivers/clk/mediatek/clk-mt8167-mm.c @@ -98,8 +98,8 @@ static int clk_mt8167_mm_probe(struct platform_device *pdev) data = &mt8167_mmsys_driver_data; - ret = mtk_clk_register_gates(node, data->gates_clk, data->gates_num, - clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, data->gates_clk, + data->gates_num, clk_data); if (ret) return ret; diff --git a/drivers/clk/mediatek/clk-mt8167-vdec.c b/drivers/clk/mediatek/clk-mt8167-vdec.c index 38f0ba357d599..92bc05d997985 100644 --- a/drivers/clk/mediatek/clk-mt8167-vdec.c +++ b/drivers/clk/mediatek/clk-mt8167-vdec.c @@ -49,7 +49,8 @@ static void __init mtk_vdecsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_VDEC_NR_CLK); - mtk_clk_register_gates(node, vdec_clks, ARRAY_SIZE(vdec_clks), clk_data); + mtk_clk_register_gates(NULL, node, vdec_clks, ARRAY_SIZE(vdec_clks), + clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8167.c b/drivers/clk/mediatek/clk-mt8167.c index f900ac4bf7b8d..59fe82ba5c7a1 100644 --- a/drivers/clk/mediatek/clk-mt8167.c +++ b/drivers/clk/mediatek/clk-mt8167.c @@ -937,7 +937,7 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_fixed_clks(fixed_clks, ARRAY_SIZE(fixed_clks), clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), clk_data); + mtk_clk_register_gates(NULL, node, top_clks, ARRAY_SIZE(top_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, diff --git a/drivers/clk/mediatek/clk-mt8173-mm.c b/drivers/clk/mediatek/clk-mt8173-mm.c index 5826eabdc9c77..444a3d58c8bf5 100644 --- a/drivers/clk/mediatek/clk-mt8173-mm.c +++ b/drivers/clk/mediatek/clk-mt8173-mm.c @@ -112,8 +112,8 @@ static int clk_mt8173_mm_probe(struct platform_device *pdev) data = &mt8173_mmsys_driver_data; - ret = mtk_clk_register_gates(node, data->gates_clk, data->gates_num, - clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, data->gates_clk, + data->gates_num, clk_data); if (ret) return ret; diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c index b8529ee7199da..74ed7dd129f47 100644 --- a/drivers/clk/mediatek/clk-mt8173.c +++ b/drivers/clk/mediatek/clk-mt8173.c @@ -888,8 +888,8 @@ static void __init mtk_infrasys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(NULL, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); mtk_clk_register_factors(infra_divs, ARRAY_SIZE(infra_divs), clk_data); mtk_clk_register_cpumuxes(node, cpu_muxes, ARRAY_SIZE(cpu_muxes), @@ -918,8 +918,8 @@ static void __init mtk_pericfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_gates, ARRAY_SIZE(peri_gates), - clk_data); + mtk_clk_register_gates(NULL, node, peri_gates, + ARRAY_SIZE(peri_gates), clk_data); mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, &mt8173_clk_lock, clk_data); @@ -1062,8 +1062,8 @@ static void __init mtk_imgsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_IMG_NR_CLK); - mtk_clk_register_gates(node, img_clks, ARRAY_SIZE(img_clks), - clk_data); + mtk_clk_register_gates(NULL, node, img_clks, + ARRAY_SIZE(img_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -1080,8 +1080,8 @@ static void __init mtk_vdecsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_VDEC_NR_CLK); - mtk_clk_register_gates(node, vdec_clks, ARRAY_SIZE(vdec_clks), - clk_data); + mtk_clk_register_gates(NULL, node, vdec_clks, + ARRAY_SIZE(vdec_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -1097,8 +1097,8 @@ static void __init mtk_vencsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_VENC_NR_CLK); - mtk_clk_register_gates(node, venc_clks, ARRAY_SIZE(venc_clks), - clk_data); + mtk_clk_register_gates(NULL, node, venc_clks, + ARRAY_SIZE(venc_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -1114,8 +1114,8 @@ static void __init mtk_vencltsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_VENCLT_NR_CLK); - mtk_clk_register_gates(node, venclt_clks, ARRAY_SIZE(venclt_clks), - clk_data); + mtk_clk_register_gates(NULL, node, venclt_clks, + ARRAY_SIZE(venclt_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8183-audio.c b/drivers/clk/mediatek/clk-mt8183-audio.c index b2d7746eddbed..f358a6e7a3408 100644 --- a/drivers/clk/mediatek/clk-mt8183-audio.c +++ b/drivers/clk/mediatek/clk-mt8183-audio.c @@ -75,8 +75,8 @@ static int clk_mt8183_audio_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_AUDIO_NR_CLK); - mtk_clk_register_gates(node, audio_clks, ARRAY_SIZE(audio_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, audio_clks, + ARRAY_SIZE(audio_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8183-mm.c b/drivers/clk/mediatek/clk-mt8183-mm.c index 11ecc6fb0065b..3580315309132 100644 --- a/drivers/clk/mediatek/clk-mt8183-mm.c +++ b/drivers/clk/mediatek/clk-mt8183-mm.c @@ -90,8 +90,8 @@ static int clk_mt8183_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c index 1860a35a723a5..ba0d6ba10b359 100644 --- a/drivers/clk/mediatek/clk-mt8183.c +++ b/drivers/clk/mediatek/clk-mt8183.c @@ -1172,8 +1172,8 @@ static int clk_mt8183_apmixed_probe(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, ARRAY_SIZE(apmixed_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, + ARRAY_SIZE(apmixed_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } @@ -1247,8 +1247,8 @@ static int clk_mt8183_top_probe(struct platform_device *pdev) mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), base, &mt8183_clk_lock, top_clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - top_clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), top_clk_data); ret = clk_mt8183_reg_mfg_mux_notifier(&pdev->dev, top_clk_data->hws[CLK_TOP_MUX_MFG]->clk); @@ -1267,8 +1267,8 @@ static int clk_mt8183_infra_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { @@ -1290,8 +1290,8 @@ static int clk_mt8183_peri_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt8186-mm.c b/drivers/clk/mediatek/clk-mt8186-mm.c index 1d33be4079470..0b72607777fa1 100644 --- a/drivers/clk/mediatek/clk-mt8186-mm.c +++ b/drivers/clk/mediatek/clk-mt8186-mm.c @@ -69,7 +69,8 @@ static int clk_mt8186_mm_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); if (r) goto free_mm_data; diff --git a/drivers/clk/mediatek/clk-mt8192-aud.c b/drivers/clk/mediatek/clk-mt8192-aud.c index 8c989bffd8c72..f524188fe4c2d 100644 --- a/drivers/clk/mediatek/clk-mt8192-aud.c +++ b/drivers/clk/mediatek/clk-mt8192-aud.c @@ -87,7 +87,8 @@ static int clk_mt8192_aud_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, aud_clks, + ARRAY_SIZE(aud_clks), clk_data); if (r) return r; diff --git a/drivers/clk/mediatek/clk-mt8192-mm.c b/drivers/clk/mediatek/clk-mt8192-mm.c index 1be3ff4d407db..e9eb4cf8349ac 100644 --- a/drivers/clk/mediatek/clk-mt8192-mm.c +++ b/drivers/clk/mediatek/clk-mt8192-mm.c @@ -91,7 +91,8 @@ static int clk_mt8192_mm_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); if (r) return r; diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index 508af9bbcc46c..ac1eee513649b 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1127,8 +1127,8 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (r) goto unregister_top_composites; - r = mtk_clk_register_gates_with_dev(node, top_clks, ARRAY_SIZE(top_clks), - top_clk_data, &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), top_clk_data); if (r) goto unregister_adj_divs_composites; @@ -1171,8 +1171,8 @@ static int clk_mt8192_infra_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates_with_dev(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data, &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); if (r) goto free_clk_data; @@ -1203,8 +1203,8 @@ static int clk_mt8192_peri_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates_with_dev(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data, &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); if (r) goto free_clk_data; @@ -1232,9 +1232,8 @@ static int clk_mt8192_apmixed_probe(struct platform_device *pdev) return -ENOMEM; mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - r = mtk_clk_register_gates_with_dev(node, apmixed_clks, - ARRAY_SIZE(apmixed_clks), clk_data, - &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, + ARRAY_SIZE(apmixed_clks), clk_data); if (r) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt8195-apmixedsys.c b/drivers/clk/mediatek/clk-mt8195-apmixedsys.c index 0dfed6ec4d155..1bc917f2667e4 100644 --- a/drivers/clk/mediatek/clk-mt8195-apmixedsys.c +++ b/drivers/clk/mediatek/clk-mt8195-apmixedsys.c @@ -124,7 +124,8 @@ static int clk_mt8195_apmixed_probe(struct platform_device *pdev) if (r) goto free_apmixed_data; - r = mtk_clk_register_gates(node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, + ARRAY_SIZE(apmixed_clks), clk_data); if (r) goto unregister_plls; diff --git a/drivers/clk/mediatek/clk-mt8195-topckgen.c b/drivers/clk/mediatek/clk-mt8195-topckgen.c index 1e016329c1d23..e6e0298d64494 100644 --- a/drivers/clk/mediatek/clk-mt8195-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8195-topckgen.c @@ -1286,7 +1286,8 @@ static int clk_mt8195_topck_probe(struct platform_device *pdev) if (r) goto unregister_muxes; - r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), top_clk_data); if (r) goto unregister_composite_divs; diff --git a/drivers/clk/mediatek/clk-mt8195-vdo0.c b/drivers/clk/mediatek/clk-mt8195-vdo0.c index 07b46bfd50406..839b730688acb 100644 --- a/drivers/clk/mediatek/clk-mt8195-vdo0.c +++ b/drivers/clk/mediatek/clk-mt8195-vdo0.c @@ -104,7 +104,8 @@ static int clk_mt8195_vdo0_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, vdo0_clks, ARRAY_SIZE(vdo0_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, vdo0_clks, + ARRAY_SIZE(vdo0_clks), clk_data); if (r) goto free_vdo0_data; diff --git a/drivers/clk/mediatek/clk-mt8195-vdo1.c b/drivers/clk/mediatek/clk-mt8195-vdo1.c index 835335b9d87bb..7df695b289258 100644 --- a/drivers/clk/mediatek/clk-mt8195-vdo1.c +++ b/drivers/clk/mediatek/clk-mt8195-vdo1.c @@ -131,7 +131,8 @@ static int clk_mt8195_vdo1_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, vdo1_clks, ARRAY_SIZE(vdo1_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, vdo1_clks, + ARRAY_SIZE(vdo1_clks), clk_data); if (r) goto free_vdo1_data; diff --git a/drivers/clk/mediatek/clk-mt8365-mm.c b/drivers/clk/mediatek/clk-mt8365-mm.c index 5c8bf18ab1f1d..22c75a03a6452 100644 --- a/drivers/clk/mediatek/clk-mt8365-mm.c +++ b/drivers/clk/mediatek/clk-mt8365-mm.c @@ -81,9 +81,8 @@ static int clk_mt8365_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK); - ret = mtk_clk_register_gates_with_dev(node, mm_clks, - ARRAY_SIZE(mm_clks), clk_data, - dev); + ret = mtk_clk_register_gates(dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); if (ret) goto err_free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index adfecb618f102..b30cbeae1c3d3 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -1019,8 +1019,8 @@ static int clk_mt8365_infra_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - ret = mtk_clk_register_gates(node, ifr_clks, ARRAY_SIZE(ifr_clks), - clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, ifr_clks, + ARRAY_SIZE(ifr_clks), clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt8516-aud.c b/drivers/clk/mediatek/clk-mt8516-aud.c index a3dafc719799c..a6ae8003b9ff6 100644 --- a/drivers/clk/mediatek/clk-mt8516-aud.c +++ b/drivers/clk/mediatek/clk-mt8516-aud.c @@ -48,7 +48,7 @@ static void __init mtk_audsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_AUD_NR_CLK); - mtk_clk_register_gates(node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); + mtk_clk_register_gates(NULL, node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8516.c b/drivers/clk/mediatek/clk-mt8516.c index 056953d594c66..bde0b8c761d47 100644 --- a/drivers/clk/mediatek/clk-mt8516.c +++ b/drivers/clk/mediatek/clk-mt8516.c @@ -655,7 +655,7 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_fixed_clks(fixed_clks, ARRAY_SIZE(fixed_clks), clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), clk_data); + mtk_clk_register_gates(NULL, node, top_clks, ARRAY_SIZE(top_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index d31f01d0ba1c2..6123b234d3c3b 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -459,8 +459,8 @@ int mtk_clk_simple_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates_with_dev(node, mcd->clks, mcd->num_clks, - clk_data, &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, mcd->clks, mcd->num_clks, + clk_data); if (r) goto free_data; -- GitLab From 6f5f72a684a2823f21efbfd20c7e4b528c44a781 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:35 +0100 Subject: [PATCH 1837/2290] clk: mediatek: clk-mtk: Propagate struct device for composites [ Upstream commit 01a6c1ab57c3a474c8d23c7d82c3fcce85f62612 ] Like done for cpumux clocks, propagate struct device for composite clocks registered through clk-mtk helpers to be able to get runtime pm support for MTK clocks. Signed-off-by: AngeloGioacchino Del Regno Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-6-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt2701.c | 10 ++++++---- drivers/clk/mediatek/clk-mt2712.c | 12 ++++++++---- drivers/clk/mediatek/clk-mt6779.c | 10 ++++++---- drivers/clk/mediatek/clk-mt6795-pericfg.c | 3 ++- drivers/clk/mediatek/clk-mt6795-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt6797.c | 3 ++- drivers/clk/mediatek/clk-mt7622.c | 8 +++++--- drivers/clk/mediatek/clk-mt7629.c | 8 +++++--- drivers/clk/mediatek/clk-mt8135.c | 10 ++++++---- drivers/clk/mediatek/clk-mt8167.c | 10 ++++++---- drivers/clk/mediatek/clk-mt8173.c | 10 ++++++---- drivers/clk/mediatek/clk-mt8183.c | 15 +++++++++------ drivers/clk/mediatek/clk-mt8186-mcu.c | 3 ++- drivers/clk/mediatek/clk-mt8186-topckgen.c | 6 ++++-- drivers/clk/mediatek/clk-mt8192.c | 6 ++++-- drivers/clk/mediatek/clk-mt8195-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8365.c | 7 ++++--- drivers/clk/mediatek/clk-mt8516.c | 10 ++++++---- drivers/clk/mediatek/clk-mtk.c | 11 ++++++----- drivers/clk/mediatek/clk-mtk.h | 3 ++- 20 files changed, 93 insertions(+), 58 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt2701.c b/drivers/clk/mediatek/clk-mt2701.c index c7510f7ba4cc9..e80fe9c942eeb 100644 --- a/drivers/clk/mediatek/clk-mt2701.c +++ b/drivers/clk/mediatek/clk-mt2701.c @@ -679,8 +679,9 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_factors(top_fixed_divs, ARRAY_SIZE(top_fixed_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), - base, &mt2701_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt2701_clk_lock, clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt2701_clk_lock, clk_data); @@ -905,8 +906,9 @@ static int mtk_pericfg_init(struct platform_device *pdev) mtk_clk_register_gates(&pdev->dev, node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); - mtk_clk_register_composites(peri_muxs, ARRAY_SIZE(peri_muxs), base, - &mt2701_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, peri_muxs, + ARRAY_SIZE(peri_muxs), base, + &mt2701_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt2712.c b/drivers/clk/mediatek/clk-mt2712.c index 78ebb4f2335c1..a0f0c9ed48d10 100644 --- a/drivers/clk/mediatek/clk-mt2712.c +++ b/drivers/clk/mediatek/clk-mt2712.c @@ -1320,8 +1320,9 @@ static int clk_mt2712_top_probe(struct platform_device *pdev) mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt2712_clk_lock, top_clk_data); + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt2712_clk_lock, top_clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt2712_clk_lock, top_clk_data); mtk_clk_register_gates(&pdev->dev, node, top_clks, @@ -1395,8 +1396,11 @@ static int clk_mt2712_mcu_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MCU_NR_CLK); - mtk_clk_register_composites(mcu_muxes, ARRAY_SIZE(mcu_muxes), base, - &mt2712_clk_lock, clk_data); + r = mtk_clk_register_composites(&pdev->dev, mcu_muxes, + ARRAY_SIZE(mcu_muxes), base, + &mt2712_clk_lock, clk_data); + if (r) + dev_err(&pdev->dev, "Could not register composites: %d\n", r); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt6779.c b/drivers/clk/mediatek/clk-mt6779.c index 5a396d2464ce5..2c20e40d7c809 100644 --- a/drivers/clk/mediatek/clk-mt6779.c +++ b/drivers/clk/mediatek/clk-mt6779.c @@ -1251,11 +1251,13 @@ static int clk_mt6779_top_probe(struct platform_device *pdev) mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, &mt6779_clk_lock, clk_data); - mtk_clk_register_composites(top_aud_muxes, ARRAY_SIZE(top_aud_muxes), - base, &mt6779_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_aud_muxes, + ARRAY_SIZE(top_aud_muxes), base, + &mt6779_clk_lock, clk_data); - mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), - base, &mt6779_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_aud_divs, + ARRAY_SIZE(top_aud_divs), base, + &mt6779_clk_lock, clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt6795-pericfg.c b/drivers/clk/mediatek/clk-mt6795-pericfg.c index f69e715e0c1f3..08aaa9b09c363 100644 --- a/drivers/clk/mediatek/clk-mt6795-pericfg.c +++ b/drivers/clk/mediatek/clk-mt6795-pericfg.c @@ -114,7 +114,8 @@ static int clk_mt6795_pericfg_probe(struct platform_device *pdev) if (ret) goto free_clk_data; - ret = mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, + ret = mtk_clk_register_composites(&pdev->dev, peri_clks, + ARRAY_SIZE(peri_clks), base, &mt6795_peri_clk_lock, clk_data); if (ret) goto unregister_gates; diff --git a/drivers/clk/mediatek/clk-mt6795-topckgen.c b/drivers/clk/mediatek/clk-mt6795-topckgen.c index 2948dd1aee8fa..845cc87049303 100644 --- a/drivers/clk/mediatek/clk-mt6795-topckgen.c +++ b/drivers/clk/mediatek/clk-mt6795-topckgen.c @@ -557,7 +557,8 @@ static int clk_mt6795_topckgen_probe(struct platform_device *pdev) if (ret) goto unregister_factors; - ret = mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), base, + ret = mtk_clk_register_composites(&pdev->dev, top_aud_divs, + ARRAY_SIZE(top_aud_divs), base, &mt6795_top_clk_lock, clk_data); if (ret) goto unregister_muxes; diff --git a/drivers/clk/mediatek/clk-mt6797.c b/drivers/clk/mediatek/clk-mt6797.c index 29211744b1736..0429a80f3cad7 100644 --- a/drivers/clk/mediatek/clk-mt6797.c +++ b/drivers/clk/mediatek/clk-mt6797.c @@ -398,7 +398,8 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_factors(top_fixed_divs, ARRAY_SIZE(top_fixed_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, &mt6797_clk_lock, clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt7622.c b/drivers/clk/mediatek/clk-mt7622.c index bba88018f056a..67a296646722f 100644 --- a/drivers/clk/mediatek/clk-mt7622.c +++ b/drivers/clk/mediatek/clk-mt7622.c @@ -615,8 +615,9 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), - base, &mt7622_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt7622_clk_lock, clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt7622_clk_lock, clk_data); @@ -685,7 +686,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) mtk_clk_register_gates(&pdev->dev, node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); - mtk_clk_register_composites(peri_muxes, ARRAY_SIZE(peri_muxes), base, + mtk_clk_register_composites(&pdev->dev, peri_muxes, + ARRAY_SIZE(peri_muxes), base, &mt7622_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt7629.c b/drivers/clk/mediatek/clk-mt7629.c index c0cdaf0242961..2019e272d1cd7 100644 --- a/drivers/clk/mediatek/clk-mt7629.c +++ b/drivers/clk/mediatek/clk-mt7629.c @@ -566,8 +566,9 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), - base, &mt7629_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt7629_clk_lock, clk_data); clk_prepare_enable(clk_data->hws[CLK_TOP_AXI_SEL]->clk); clk_prepare_enable(clk_data->hws[CLK_TOP_MEM_SEL]->clk); @@ -613,7 +614,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) mtk_clk_register_gates(&pdev->dev, node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); - mtk_clk_register_composites(peri_muxes, ARRAY_SIZE(peri_muxes), base, + mtk_clk_register_composites(&pdev->dev, peri_muxes, + ARRAY_SIZE(peri_muxes), base, &mt7629_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8135.c b/drivers/clk/mediatek/clk-mt8135.c index 8137cf2252724..a39ad58e27418 100644 --- a/drivers/clk/mediatek/clk-mt8135.c +++ b/drivers/clk/mediatek/clk-mt8135.c @@ -536,8 +536,9 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_factors(root_clk_alias, ARRAY_SIZE(root_clk_alias), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt8135_clk_lock, clk_data); + mtk_clk_register_composites(NULL, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt8135_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -581,8 +582,9 @@ static void __init mtk_pericfg_init(struct device_node *node) mtk_clk_register_gates(NULL, node, peri_gates, ARRAY_SIZE(peri_gates), clk_data); - mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, - &mt8135_clk_lock, clk_data); + mtk_clk_register_composites(NULL, peri_clks, + ARRAY_SIZE(peri_clks), base, + &mt8135_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8167.c b/drivers/clk/mediatek/clk-mt8167.c index 59fe82ba5c7a1..91669ebafaf9b 100644 --- a/drivers/clk/mediatek/clk-mt8167.c +++ b/drivers/clk/mediatek/clk-mt8167.c @@ -940,8 +940,9 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_gates(NULL, node, top_clks, ARRAY_SIZE(top_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt8167_clk_lock, clk_data); + mtk_clk_register_composites(NULL, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt8167_clk_lock, clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt8167_clk_lock, clk_data); @@ -966,8 +967,9 @@ static void __init mtk_infracfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_IFR_NR_CLK); - mtk_clk_register_composites(ifr_muxes, ARRAY_SIZE(ifr_muxes), base, - &mt8167_clk_lock, clk_data); + mtk_clk_register_composites(NULL, ifr_muxes, + ARRAY_SIZE(ifr_muxes), base, + &mt8167_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c index 74ed7dd129f47..d05c1109b4f87 100644 --- a/drivers/clk/mediatek/clk-mt8173.c +++ b/drivers/clk/mediatek/clk-mt8173.c @@ -869,8 +869,9 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_fixed_clks(fixed_clks, ARRAY_SIZE(fixed_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt8173_clk_lock, clk_data); + mtk_clk_register_composites(NULL, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt8173_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -920,8 +921,9 @@ static void __init mtk_pericfg_init(struct device_node *node) mtk_clk_register_gates(NULL, node, peri_gates, ARRAY_SIZE(peri_gates), clk_data); - mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, - &mt8173_clk_lock, clk_data); + mtk_clk_register_composites(NULL, peri_clks, + ARRAY_SIZE(peri_clks), base, + &mt8173_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c index ba0d6ba10b359..bf7b342332536 100644 --- a/drivers/clk/mediatek/clk-mt8183.c +++ b/drivers/clk/mediatek/clk-mt8183.c @@ -1241,11 +1241,13 @@ static int clk_mt8183_top_probe(struct platform_device *pdev) mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, &mt8183_clk_lock, top_clk_data); - mtk_clk_register_composites(top_aud_muxes, ARRAY_SIZE(top_aud_muxes), - base, &mt8183_clk_lock, top_clk_data); + mtk_clk_register_composites(&pdev->dev, top_aud_muxes, + ARRAY_SIZE(top_aud_muxes), base, + &mt8183_clk_lock, top_clk_data); - mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), - base, &mt8183_clk_lock, top_clk_data); + mtk_clk_register_composites(&pdev->dev, top_aud_divs, + ARRAY_SIZE(top_aud_divs), base, + &mt8183_clk_lock, top_clk_data); mtk_clk_register_gates(&pdev->dev, node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); @@ -1308,8 +1310,9 @@ static int clk_mt8183_mcu_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MCU_NR_CLK); - mtk_clk_register_composites(mcu_muxes, ARRAY_SIZE(mcu_muxes), base, - &mt8183_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, mcu_muxes, + ARRAY_SIZE(mcu_muxes), base, + &mt8183_clk_lock, clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt8186-mcu.c b/drivers/clk/mediatek/clk-mt8186-mcu.c index dfc305c1fc5d8..e52a2d986c99c 100644 --- a/drivers/clk/mediatek/clk-mt8186-mcu.c +++ b/drivers/clk/mediatek/clk-mt8186-mcu.c @@ -65,7 +65,8 @@ static int clk_mt8186_mcu_probe(struct platform_device *pdev) goto free_mcu_data; } - r = mtk_clk_register_composites(mcu_muxes, ARRAY_SIZE(mcu_muxes), base, + r = mtk_clk_register_composites(&pdev->dev, mcu_muxes, + ARRAY_SIZE(mcu_muxes), base, NULL, clk_data); if (r) goto free_mcu_data; diff --git a/drivers/clk/mediatek/clk-mt8186-topckgen.c b/drivers/clk/mediatek/clk-mt8186-topckgen.c index d7f2c4663c853..4ac157320a6b9 100644 --- a/drivers/clk/mediatek/clk-mt8186-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8186-topckgen.c @@ -720,12 +720,14 @@ static int clk_mt8186_topck_probe(struct platform_device *pdev) if (r) goto unregister_factors; - r = mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, + r = mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, &mt8186_clk_lock, clk_data); if (r) goto unregister_muxes; - r = mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, + r = mtk_clk_register_composites(&pdev->dev, top_adj_divs, + ARRAY_SIZE(top_adj_divs), base, &mt8186_clk_lock, clk_data); if (r) goto unregister_composite_muxes; diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index ac1eee513649b..ab856d0276184 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1117,12 +1117,14 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (r) goto unregister_factors; - r = mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, + r = mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, &mt8192_clk_lock, top_clk_data); if (r) goto unregister_muxes; - r = mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, + r = mtk_clk_register_composites(&pdev->dev, top_adj_divs, + ARRAY_SIZE(top_adj_divs), base, &mt8192_clk_lock, top_clk_data); if (r) goto unregister_top_composites; diff --git a/drivers/clk/mediatek/clk-mt8195-topckgen.c b/drivers/clk/mediatek/clk-mt8195-topckgen.c index e6e0298d64494..aae31ef3903de 100644 --- a/drivers/clk/mediatek/clk-mt8195-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8195-topckgen.c @@ -1281,7 +1281,8 @@ static int clk_mt8195_topck_probe(struct platform_device *pdev) if (r) goto unregister_muxes; - r = mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, + r = mtk_clk_register_composites(&pdev->dev, top_adj_divs, + ARRAY_SIZE(top_adj_divs), base, &mt8195_clk_lock, top_clk_data); if (r) goto unregister_muxes; diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index b30cbeae1c3d3..0482a8aa43cc9 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -952,7 +952,7 @@ static int clk_mt8365_top_probe(struct platform_device *pdev) if (ret) goto unregister_factors; - ret = mtk_clk_register_composites(top_misc_mux_gates, + ret = mtk_clk_register_composites(&pdev->dev, top_misc_mux_gates, ARRAY_SIZE(top_misc_mux_gates), base, &mt8365_clk_lock, clk_data); if (ret) @@ -1080,8 +1080,9 @@ static int clk_mt8365_mcu_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - ret = mtk_clk_register_composites(mcu_muxes, ARRAY_SIZE(mcu_muxes), - base, &mt8365_clk_lock, clk_data); + ret = mtk_clk_register_composites(&pdev->dev, mcu_muxes, + ARRAY_SIZE(mcu_muxes), base, + &mt8365_clk_lock, clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt8516.c b/drivers/clk/mediatek/clk-mt8516.c index bde0b8c761d47..6983d3a48dc9a 100644 --- a/drivers/clk/mediatek/clk-mt8516.c +++ b/drivers/clk/mediatek/clk-mt8516.c @@ -658,8 +658,9 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_gates(NULL, node, top_clks, ARRAY_SIZE(top_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt8516_clk_lock, clk_data); + mtk_clk_register_composites(NULL, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt8516_clk_lock, clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt8516_clk_lock, clk_data); @@ -684,8 +685,9 @@ static void __init mtk_infracfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_IFR_NR_CLK); - mtk_clk_register_composites(ifr_muxes, ARRAY_SIZE(ifr_muxes), base, - &mt8516_clk_lock, clk_data); + mtk_clk_register_composites(NULL, ifr_muxes, + ARRAY_SIZE(ifr_muxes), base, + &mt8516_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index 6123b234d3c3b..152f3d906ef8a 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -197,8 +197,8 @@ void mtk_clk_unregister_factors(const struct mtk_fixed_factor *clks, int num, } EXPORT_SYMBOL_GPL(mtk_clk_unregister_factors); -static struct clk_hw *mtk_clk_register_composite(const struct mtk_composite *mc, - void __iomem *base, spinlock_t *lock) +static struct clk_hw *mtk_clk_register_composite(struct device *dev, + const struct mtk_composite *mc, void __iomem *base, spinlock_t *lock) { struct clk_hw *hw; struct clk_mux *mux = NULL; @@ -264,7 +264,7 @@ static struct clk_hw *mtk_clk_register_composite(const struct mtk_composite *mc, div_ops = &clk_divider_ops; } - hw = clk_hw_register_composite(NULL, mc->name, parent_names, num_parents, + hw = clk_hw_register_composite(dev, mc->name, parent_names, num_parents, mux_hw, mux_ops, div_hw, div_ops, gate_hw, gate_ops, @@ -308,7 +308,8 @@ static void mtk_clk_unregister_composite(struct clk_hw *hw) kfree(mux); } -int mtk_clk_register_composites(const struct mtk_composite *mcs, int num, +int mtk_clk_register_composites(struct device *dev, + const struct mtk_composite *mcs, int num, void __iomem *base, spinlock_t *lock, struct clk_hw_onecell_data *clk_data) { @@ -327,7 +328,7 @@ int mtk_clk_register_composites(const struct mtk_composite *mcs, int num, continue; } - hw = mtk_clk_register_composite(mc, base, lock); + hw = mtk_clk_register_composite(dev, mc, base, lock); if (IS_ERR(hw)) { pr_err("Failed to register clk %s: %pe\n", mc->name, diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 63ae7941aa92f..3993a60738c77 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -149,7 +149,8 @@ struct mtk_composite { .flags = 0, \ } -int mtk_clk_register_composites(const struct mtk_composite *mcs, int num, +int mtk_clk_register_composites(struct device *dev, + const struct mtk_composite *mcs, int num, void __iomem *base, spinlock_t *lock, struct clk_hw_onecell_data *clk_data); void mtk_clk_unregister_composites(const struct mtk_composite *mcs, int num, -- GitLab From a4fe8813a7868ba5867e42e60de7a2b8baac30ff Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:36 +0100 Subject: [PATCH 1838/2290] clk: mediatek: clk-mux: Propagate struct device for mtk-mux [ Upstream commit d3d6bd5e25cdc460df33ae1db4f051c4bdd3aa60 ] Like done for other clocks, propagate struct device for mtk mux clocks registered through clk-mux helpers to enable runtime pm support. Signed-off-by: AngeloGioacchino Del Regno Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-7-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt6765.c | 3 ++- drivers/clk/mediatek/clk-mt6779.c | 5 +++-- drivers/clk/mediatek/clk-mt6795-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt7986-infracfg.c | 3 ++- drivers/clk/mediatek/clk-mt7986-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8183.c | 5 +++-- drivers/clk/mediatek/clk-mt8186-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8192.c | 3 ++- drivers/clk/mediatek/clk-mt8195-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8365.c | 3 ++- drivers/clk/mediatek/clk-mux.c | 14 ++++++++------ drivers/clk/mediatek/clk-mux.h | 3 ++- 12 files changed, 32 insertions(+), 19 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt6765.c b/drivers/clk/mediatek/clk-mt6765.c index 4a7bc6e04580d..c4941523f5520 100644 --- a/drivers/clk/mediatek/clk-mt6765.c +++ b/drivers/clk/mediatek/clk-mt6765.c @@ -782,7 +782,8 @@ static int clk_mt6765_top_probe(struct platform_device *pdev) clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, + mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, &mt6765_clk_lock, clk_data); mtk_clk_register_gates(&pdev->dev, node, top_clks, ARRAY_SIZE(top_clks), clk_data); diff --git a/drivers/clk/mediatek/clk-mt6779.c b/drivers/clk/mediatek/clk-mt6779.c index 2c20e40d7c809..7fe9d12b2dfdd 100644 --- a/drivers/clk/mediatek/clk-mt6779.c +++ b/drivers/clk/mediatek/clk-mt6779.c @@ -1248,8 +1248,9 @@ static int clk_mt6779_top_probe(struct platform_device *pdev) mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), - node, &mt6779_clk_lock, clk_data); + mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, + &mt6779_clk_lock, clk_data); mtk_clk_register_composites(&pdev->dev, top_aud_muxes, ARRAY_SIZE(top_aud_muxes), base, diff --git a/drivers/clk/mediatek/clk-mt6795-topckgen.c b/drivers/clk/mediatek/clk-mt6795-topckgen.c index 845cc87049303..2ab8bf5d6d6d9 100644 --- a/drivers/clk/mediatek/clk-mt6795-topckgen.c +++ b/drivers/clk/mediatek/clk-mt6795-topckgen.c @@ -552,7 +552,8 @@ static int clk_mt6795_topckgen_probe(struct platform_device *pdev) if (ret) goto unregister_fixed_clks; - ret = mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, + ret = mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, &mt6795_top_clk_lock, clk_data); if (ret) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mt7986-infracfg.c b/drivers/clk/mediatek/clk-mt7986-infracfg.c index 578f150e0ee52..0a4bf87ee1607 100644 --- a/drivers/clk/mediatek/clk-mt7986-infracfg.c +++ b/drivers/clk/mediatek/clk-mt7986-infracfg.c @@ -178,7 +178,8 @@ static int clk_mt7986_infracfg_probe(struct platform_device *pdev) return -ENOMEM; mtk_clk_register_factors(infra_divs, ARRAY_SIZE(infra_divs), clk_data); - mtk_clk_register_muxes(infra_muxes, ARRAY_SIZE(infra_muxes), node, + mtk_clk_register_muxes(&pdev->dev, infra_muxes, + ARRAY_SIZE(infra_muxes), node, &mt7986_clk_lock, clk_data); mtk_clk_register_gates(&pdev->dev, node, infra_clks, ARRAY_SIZE(infra_clks), clk_data); diff --git a/drivers/clk/mediatek/clk-mt7986-topckgen.c b/drivers/clk/mediatek/clk-mt7986-topckgen.c index de5121cf28774..c9bf47e6098fd 100644 --- a/drivers/clk/mediatek/clk-mt7986-topckgen.c +++ b/drivers/clk/mediatek/clk-mt7986-topckgen.c @@ -303,7 +303,8 @@ static int clk_mt7986_topckgen_probe(struct platform_device *pdev) mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, + mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, &mt7986_clk_lock, clk_data); clk_prepare_enable(clk_data->hws[CLK_TOP_SYSAXI_SEL]->clk); diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c index bf7b342332536..78620244144e8 100644 --- a/drivers/clk/mediatek/clk-mt8183.c +++ b/drivers/clk/mediatek/clk-mt8183.c @@ -1238,8 +1238,9 @@ static int clk_mt8183_top_probe(struct platform_device *pdev) mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); - mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), - node, &mt8183_clk_lock, top_clk_data); + mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, + &mt8183_clk_lock, top_clk_data); mtk_clk_register_composites(&pdev->dev, top_aud_muxes, ARRAY_SIZE(top_aud_muxes), base, diff --git a/drivers/clk/mediatek/clk-mt8186-topckgen.c b/drivers/clk/mediatek/clk-mt8186-topckgen.c index 4ac157320a6b9..70b6e008a188b 100644 --- a/drivers/clk/mediatek/clk-mt8186-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8186-topckgen.c @@ -715,7 +715,8 @@ static int clk_mt8186_topck_probe(struct platform_device *pdev) if (r) goto unregister_fixed_clks; - r = mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, + r = mtk_clk_register_muxes(&pdev->dev, top_mtk_muxes, + ARRAY_SIZE(top_mtk_muxes), node, &mt8186_clk_lock, clk_data); if (r) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index ab856d0276184..16feb86dcb1b8 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1112,7 +1112,8 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (r) goto unregister_early_factors; - r = mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, + r = mtk_clk_register_muxes(&pdev->dev, top_mtk_muxes, + ARRAY_SIZE(top_mtk_muxes), node, &mt8192_clk_lock, top_clk_data); if (r) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mt8195-topckgen.c b/drivers/clk/mediatek/clk-mt8195-topckgen.c index aae31ef3903de..3485ebb17ab83 100644 --- a/drivers/clk/mediatek/clk-mt8195-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8195-topckgen.c @@ -1262,7 +1262,8 @@ static int clk_mt8195_topck_probe(struct platform_device *pdev) if (r) goto unregister_fixed_clks; - r = mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, + r = mtk_clk_register_muxes(&pdev->dev, top_mtk_muxes, + ARRAY_SIZE(top_mtk_muxes), node, &mt8195_clk_lock, top_clk_data); if (r) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index 0482a8aa43cc9..c9faa07ec0a64 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -947,7 +947,8 @@ static int clk_mt8365_top_probe(struct platform_device *pdev) if (ret) goto unregister_fixed_clks; - ret = mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, + ret = mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, &mt8365_clk_lock, clk_data); if (ret) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mux.c b/drivers/clk/mediatek/clk-mux.c index ba1720b9e2310..c8593554239d6 100644 --- a/drivers/clk/mediatek/clk-mux.c +++ b/drivers/clk/mediatek/clk-mux.c @@ -154,9 +154,10 @@ const struct clk_ops mtk_mux_gate_clr_set_upd_ops = { }; EXPORT_SYMBOL_GPL(mtk_mux_gate_clr_set_upd_ops); -static struct clk_hw *mtk_clk_register_mux(const struct mtk_mux *mux, - struct regmap *regmap, - spinlock_t *lock) +static struct clk_hw *mtk_clk_register_mux(struct device *dev, + const struct mtk_mux *mux, + struct regmap *regmap, + spinlock_t *lock) { struct mtk_clk_mux *clk_mux; struct clk_init_data init = {}; @@ -177,7 +178,7 @@ static struct clk_hw *mtk_clk_register_mux(const struct mtk_mux *mux, clk_mux->lock = lock; clk_mux->hw.init = &init; - ret = clk_hw_register(NULL, &clk_mux->hw); + ret = clk_hw_register(dev, &clk_mux->hw); if (ret) { kfree(clk_mux); return ERR_PTR(ret); @@ -198,7 +199,8 @@ static void mtk_clk_unregister_mux(struct clk_hw *hw) kfree(mux); } -int mtk_clk_register_muxes(const struct mtk_mux *muxes, +int mtk_clk_register_muxes(struct device *dev, + const struct mtk_mux *muxes, int num, struct device_node *node, spinlock_t *lock, struct clk_hw_onecell_data *clk_data) @@ -222,7 +224,7 @@ int mtk_clk_register_muxes(const struct mtk_mux *muxes, continue; } - hw = mtk_clk_register_mux(mux, regmap, lock); + hw = mtk_clk_register_mux(dev, mux, regmap, lock); if (IS_ERR(hw)) { pr_err("Failed to register clk %s: %pe\n", mux->name, diff --git a/drivers/clk/mediatek/clk-mux.h b/drivers/clk/mediatek/clk-mux.h index 83ff420f4ebe6..7ecb963b0ec68 100644 --- a/drivers/clk/mediatek/clk-mux.h +++ b/drivers/clk/mediatek/clk-mux.h @@ -83,7 +83,8 @@ extern const struct clk_ops mtk_mux_gate_clr_set_upd_ops; 0, _upd_ofs, _upd, CLK_SET_RATE_PARENT, \ mtk_mux_clr_set_upd_ops) -int mtk_clk_register_muxes(const struct mtk_mux *muxes, +int mtk_clk_register_muxes(struct device *dev, + const struct mtk_mux *muxes, int num, struct device_node *node, spinlock_t *lock, struct clk_hw_onecell_data *clk_data); -- GitLab From c1d87d56af063c87961511ee25f6b07a5676d27d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:42 +0100 Subject: [PATCH 1839/2290] clk: mediatek: clk-mtk: Extend mtk_clk_simple_probe() [ Upstream commit 7b6183108c8ccf0dc295f39cdf78bd8078455636 ] As a preparation to increase probe functions commonization across various MediaTek SoC clock controller drivers, extend function mtk_clk_simple_probe() to be able to register not only gates, but also fixed clocks, factors, muxes and composites. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Miles Chen Reviewed-by: Chen-Yu Tsai Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-13-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mtk.c | 101 ++++++++++++++++++++++++++++++--- drivers/clk/mediatek/clk-mtk.h | 10 ++++ 2 files changed, 103 insertions(+), 8 deletions(-) diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index 152f3d906ef8a..bfabd94a474a5 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -11,12 +11,14 @@ #include #include #include +#include #include #include #include #include "clk-mtk.h" #include "clk-gate.h" +#include "clk-mux.h" static void mtk_init_clk_data(struct clk_hw_onecell_data *clk_data, unsigned int clk_num) @@ -450,20 +452,71 @@ int mtk_clk_simple_probe(struct platform_device *pdev) const struct mtk_clk_desc *mcd; struct clk_hw_onecell_data *clk_data; struct device_node *node = pdev->dev.of_node; - int r; + void __iomem *base; + int num_clks, r; mcd = of_device_get_match_data(&pdev->dev); if (!mcd) return -EINVAL; - clk_data = mtk_alloc_clk_data(mcd->num_clks); + /* Composite clocks needs us to pass iomem pointer */ + if (mcd->composite_clks) { + if (!mcd->shared_io) + base = devm_platform_ioremap_resource(pdev, 0); + else + base = of_iomap(node, 0); + + if (IS_ERR_OR_NULL(base)) + return IS_ERR(base) ? PTR_ERR(base) : -ENOMEM; + } + + /* Calculate how many clk_hw_onecell_data entries to allocate */ + num_clks = mcd->num_clks + mcd->num_composite_clks; + num_clks += mcd->num_fixed_clks + mcd->num_factor_clks; + num_clks += mcd->num_mux_clks; + + clk_data = mtk_alloc_clk_data(num_clks); if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(&pdev->dev, node, mcd->clks, mcd->num_clks, - clk_data); - if (r) - goto free_data; + if (mcd->fixed_clks) { + r = mtk_clk_register_fixed_clks(mcd->fixed_clks, + mcd->num_fixed_clks, clk_data); + if (r) + goto free_data; + } + + if (mcd->factor_clks) { + r = mtk_clk_register_factors(mcd->factor_clks, + mcd->num_factor_clks, clk_data); + if (r) + goto unregister_fixed_clks; + } + + if (mcd->mux_clks) { + r = mtk_clk_register_muxes(&pdev->dev, mcd->mux_clks, + mcd->num_mux_clks, node, + mcd->clk_lock, clk_data); + if (r) + goto unregister_factors; + }; + + if (mcd->composite_clks) { + /* We don't check composite_lock because it's optional */ + r = mtk_clk_register_composites(&pdev->dev, + mcd->composite_clks, + mcd->num_composite_clks, + base, mcd->clk_lock, clk_data); + if (r) + goto unregister_muxes; + } + + if (mcd->clks) { + r = mtk_clk_register_gates(&pdev->dev, node, mcd->clks, + mcd->num_clks, clk_data); + if (r) + goto unregister_composites; + } r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -481,9 +534,28 @@ int mtk_clk_simple_probe(struct platform_device *pdev) return r; unregister_clks: - mtk_clk_unregister_gates(mcd->clks, mcd->num_clks, clk_data); + if (mcd->clks) + mtk_clk_unregister_gates(mcd->clks, mcd->num_clks, clk_data); +unregister_composites: + if (mcd->composite_clks) + mtk_clk_unregister_composites(mcd->composite_clks, + mcd->num_composite_clks, clk_data); +unregister_muxes: + if (mcd->mux_clks) + mtk_clk_unregister_muxes(mcd->mux_clks, + mcd->num_mux_clks, clk_data); +unregister_factors: + if (mcd->factor_clks) + mtk_clk_unregister_factors(mcd->factor_clks, + mcd->num_factor_clks, clk_data); +unregister_fixed_clks: + if (mcd->fixed_clks) + mtk_clk_unregister_fixed_clks(mcd->fixed_clks, + mcd->num_fixed_clks, clk_data); free_data: mtk_free_clk_data(clk_data); + if (mcd->shared_io && base) + iounmap(base); return r; } EXPORT_SYMBOL_GPL(mtk_clk_simple_probe); @@ -495,7 +567,20 @@ int mtk_clk_simple_remove(struct platform_device *pdev) struct device_node *node = pdev->dev.of_node; of_clk_del_provider(node); - mtk_clk_unregister_gates(mcd->clks, mcd->num_clks, clk_data); + if (mcd->clks) + mtk_clk_unregister_gates(mcd->clks, mcd->num_clks, clk_data); + if (mcd->composite_clks) + mtk_clk_unregister_composites(mcd->composite_clks, + mcd->num_composite_clks, clk_data); + if (mcd->mux_clks) + mtk_clk_unregister_muxes(mcd->mux_clks, + mcd->num_mux_clks, clk_data); + if (mcd->factor_clks) + mtk_clk_unregister_factors(mcd->factor_clks, + mcd->num_factor_clks, clk_data); + if (mcd->fixed_clks) + mtk_clk_unregister_fixed_clks(mcd->fixed_clks, + mcd->num_fixed_clks, clk_data); mtk_free_clk_data(clk_data); return 0; diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 3993a60738c77..880b3d6d80119 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -196,7 +196,17 @@ void mtk_clk_unregister_ref2usb_tx(struct clk_hw *hw); struct mtk_clk_desc { const struct mtk_gate *clks; size_t num_clks; + const struct mtk_composite *composite_clks; + size_t num_composite_clks; + const struct mtk_fixed_clk *fixed_clks; + size_t num_fixed_clks; + const struct mtk_fixed_factor *factor_clks; + size_t num_factor_clks; + const struct mtk_mux *mux_clks; + size_t num_mux_clks; const struct mtk_clk_rst_desc *rst_desc; + spinlock_t *clk_lock; + bool shared_io; }; int mtk_clk_simple_probe(struct platform_device *pdev); -- GitLab From 165d226472575b213dd90dfda19d1605dd7c19a8 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Tue, 12 Mar 2024 19:51:55 +0800 Subject: [PATCH 1840/2290] clk: mediatek: Do a runtime PM get on controllers during probe [ Upstream commit 2f7b1d8b5505efb0057cd1ab85fca206063ea4c3 ] mt8183-mfgcfg has a mutual dependency with genpd during the probing stage, which leads to a deadlock in the following call stack: CPU0: genpd_lock --> clk_prepare_lock genpd_power_off_work_fn() genpd_lock() generic_pm_domain::power_off() clk_unprepare() clk_prepare_lock() CPU1: clk_prepare_lock --> genpd_lock clk_register() __clk_core_init() clk_prepare_lock() clk_pm_runtime_get() genpd_lock() Do a runtime PM get at the probe function to make sure clk_register() won't acquire the genpd lock. Instead of only modifying mt8183-mfgcfg, do this on all mediatek clock controller probings because we don't believe this would cause any regression. Verified on MT8183 and MT8192 Chromebooks. Fixes: acddfc2c261b ("clk: mediatek: Add MT8183 clock support") Signed-off-by: Pin-yen Lin Link: https://lore.kernel.org/r/20240312115249.3341654-1-treapking@chromium.org Reviewed-by: AngeloGioacchino Del Regno Tested-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mtk.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index bfabd94a474a5..fa2c1b1c7dee4 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "clk-mtk.h" @@ -470,6 +471,16 @@ int mtk_clk_simple_probe(struct platform_device *pdev) return IS_ERR(base) ? PTR_ERR(base) : -ENOMEM; } + + devm_pm_runtime_enable(&pdev->dev); + /* + * Do a pm_runtime_resume_and_get() to workaround a possible + * deadlock between clk_register() and the genpd framework. + */ + r = pm_runtime_resume_and_get(&pdev->dev); + if (r) + return r; + /* Calculate how many clk_hw_onecell_data entries to allocate */ num_clks = mcd->num_clks + mcd->num_composite_clks; num_clks += mcd->num_fixed_clks + mcd->num_factor_clks; @@ -531,6 +542,8 @@ int mtk_clk_simple_probe(struct platform_device *pdev) goto unregister_clks; } + pm_runtime_put(&pdev->dev); + return r; unregister_clks: @@ -556,6 +569,8 @@ free_data: mtk_free_clk_data(clk_data); if (mcd->shared_io && base) iounmap(base); + + pm_runtime_put(&pdev->dev); return r; } EXPORT_SYMBOL_GPL(mtk_clk_simple_probe); -- GitLab From d17075a93501062fb9f9582c899be455221c5633 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2024 11:10:33 -0700 Subject: [PATCH 1841/2290] x86/bugs: Fix BHI retpoline check [ Upstream commit 69129794d94c544810e68b2b4eaa7e44063f9bf2 ] Confusingly, X86_FEATURE_RETPOLINE doesn't mean retpolines are enabled, as it also includes the original "AMD retpoline" which isn't a retpoline at all. Also replace cpu_feature_enabled() with boot_cpu_has() because this is before alternatives are patched and cpu_feature_enabled()'s fallback path is slower than plain old boot_cpu_has(). Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Pawan Gupta Cc: Borislav Petkov Cc: Linus Torvalds Link: https://lore.kernel.org/r/ad3807424a3953f0323c011a643405619f2a4927.1712944776.git.jpoimboe@kernel.org Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/bugs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6d69123de3660..3f38592ec7713 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1629,7 +1629,8 @@ static void __init bhi_select_mitigation(void) return; /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { spec_ctrl_disable_kernel_rrsba(); if (rrsba_disabled) return; @@ -2783,11 +2784,13 @@ static const char *spectre_bhi_state(void) { if (!boot_cpu_has_bug(X86_BUG_BHI)) return "; BHI: Not affected"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) return "; BHI: BHI_DIS_S"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; - else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled) + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && + rrsba_disabled) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) return "; BHI: Vulnerable, KVM: SW loop"; -- GitLab From 943c3e45c80a8356a683bc54a1004d7efd8949f3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 16 Apr 2024 23:04:34 -0700 Subject: [PATCH 1842/2290] x86/cpufeatures: Fix dependencies for GFNI, VAES, and VPCLMULQDQ [ Upstream commit 9543f6e26634537997b6e909c20911b7bf4876de ] Fix cpuid_deps[] to list the correct dependencies for GFNI, VAES, and VPCLMULQDQ. These features don't depend on AVX512, and there exist CPUs that support these features but not AVX512. GFNI actually doesn't even depend on AVX. This prevents GFNI from being unnecessarily disabled if AVX is disabled to mitigate the GDS vulnerability. This also prevents all three features from being unnecessarily disabled if AVX512VL (or its dependency AVX512F) were to be disabled, but it looks like there isn't any case where this happens anyway. Fixes: c128dbfa0f87 ("x86/cpufeatures: Enable new SSE/AVX/AVX512 CPU features") Signed-off-by: Eric Biggers Signed-off-by: Borislav Petkov (AMD) Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20240417060434.47101-1-ebiggers@kernel.org Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/cpuid-deps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index c881bcafba7d7..9c19f40b1b272 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_VAES, X86_FEATURE_AVX }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, @@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, - { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, -- GitLab From d05380576fb42adb427b1d5dd11f09fe33833b1a Mon Sep 17 00:00:00 2001 From: Ai Chao Date: Fri, 19 Apr 2024 16:21:59 +0800 Subject: [PATCH 1843/2290] ALSA: hda/realtek - Enable audio jacks of Haier Boyue G42 with ALC269VC commit 7ee5faad0f8c3ad86c8cfc2f6aac91d2ba29790f upstream. The Haier Boyue G42 with ALC269VC cannot detect the MIC of headset, the line out and internal speaker until ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS quirk applied. Signed-off-by: Ai Chao Cc: Message-ID: <20240419082159.476879-1-aichao@kylinos.cn> Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e8cf38dc8a5e0..77c40063d63a7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10122,6 +10122,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), -- GitLab From a6d2a8b211c874971ee4cf3ddd167408177f6e76 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Sat, 30 Mar 2024 19:01:14 +0000 Subject: [PATCH 1844/2290] binder: check offset alignment in binder_get_object() commit aaef73821a3b0194a01bd23ca77774f704a04d40 upstream. Commit 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") introduced changes to how binder objects are copied. In doing so, it unintentionally removed an offset alignment check done through calls to binder_alloc_copy_from_buffer() -> check_buffer(). These calls were replaced in binder_get_object() with copy_from_user(), so now an explicit offset alignment check is needed here. This avoids later complications when unwinding the objects gets harder. It is worth noting this check existed prior to commit 7a67a39320df ("binder: add function to copy binder object from buffer"), likely removed due to redundancy at the time. Fixes: 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20240330190115.1877819-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 55cd17a13e758..8c2b7c074eca1 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1707,8 +1707,10 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr)) + if (offset > buffer->data_size || read_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) return 0; + if (u) { if (copy_from_user(object, u + offset, read_size)) return 0; -- GitLab From 38e10c9faa95ef37c927fdb19dbdc97153ce6150 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Fri, 1 Mar 2024 15:11:18 +0200 Subject: [PATCH 1845/2290] thunderbolt: Avoid notify PM core about runtime PM resume commit dcd12acaf384c30437fa5a9a1f71df06fc9835fd upstream. Currently we notify PM core about occurred wakes after any resume. This is not actually needed after resume from runtime suspend. Hence, notify PM core about occurred wakes only after resume from system sleep. Also, if the wake occurred in USB4 router upstream port, we don't notify the PM core about it since it is not actually needed and can cause unexpected autowake (e.g. if /sys/power/wakeup_count is used). While there add the missing kernel-doc for tb_switch_resume(). Signed-off-by: Gil Fine Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 27 +++++++++++++++++++++++++-- drivers/thunderbolt/tb.c | 4 ++-- drivers/thunderbolt/tb.h | 3 ++- drivers/thunderbolt/usb4.c | 13 +++++++------ 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 55698a0978f03..b4c01bc7120ab 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3135,7 +3135,26 @@ static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags) return tb_lc_set_wake(sw, flags); } -int tb_switch_resume(struct tb_switch *sw) +static void tb_switch_check_wakes(struct tb_switch *sw) +{ + if (device_may_wakeup(&sw->dev)) { + if (tb_switch_is_usb4(sw)) + usb4_switch_check_wakes(sw); + } +} + +/** + * tb_switch_resume() - Resume a switch after sleep + * @sw: Switch to resume + * @runtime: Is this resume from runtime suspend or system sleep + * + * Resumes and re-enumerates router (and all its children), if still plugged + * after suspend. Don't enumerate device router whose UID was changed during + * suspend. If this is resume from system sleep, notifies PM core about the + * wakes occurred during suspend. Disables all wakes, except USB4 wake of + * upstream port for USB4 routers that shall be always enabled. + */ +int tb_switch_resume(struct tb_switch *sw, bool runtime) { struct tb_port *port; int err; @@ -3184,6 +3203,9 @@ int tb_switch_resume(struct tb_switch *sw) if (err) return err; + if (!runtime) + tb_switch_check_wakes(sw); + /* Disable wakes */ tb_switch_set_wake(sw, 0); @@ -3213,7 +3235,8 @@ int tb_switch_resume(struct tb_switch *sw) */ if (tb_port_unlock(port)) tb_port_warn(port, "failed to unlock port\n"); - if (port->remote && tb_switch_resume(port->remote->sw)) { + if (port->remote && + tb_switch_resume(port->remote->sw, runtime)) { tb_port_warn(port, "lost during suspend, disconnecting\n"); tb_sw_set_unplugged(port->remote->sw); diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index e83269dc2b067..c5e4fa478e643 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1628,7 +1628,7 @@ static int tb_resume_noirq(struct tb *tb) /* remove any pci devices the firmware might have setup */ tb_switch_reset(tb->root_switch); - tb_switch_resume(tb->root_switch); + tb_switch_resume(tb->root_switch, false); tb_free_invalid_tunnels(tb); tb_free_unplugged_children(tb->root_switch); tb_restore_children(tb->root_switch); @@ -1754,7 +1754,7 @@ static int tb_runtime_resume(struct tb *tb) struct tb_tunnel *tunnel, *n; mutex_lock(&tb->lock); - tb_switch_resume(tb->root_switch); + tb_switch_resume(tb->root_switch, true); tb_free_invalid_tunnels(tb); tb_restore_children(tb->root_switch); list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index b3fec5f8e20cd..acf5b86208455 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -787,7 +787,7 @@ int tb_switch_configure(struct tb_switch *sw); int tb_switch_add(struct tb_switch *sw); void tb_switch_remove(struct tb_switch *sw); void tb_switch_suspend(struct tb_switch *sw, bool runtime); -int tb_switch_resume(struct tb_switch *sw); +int tb_switch_resume(struct tb_switch *sw, bool runtime); int tb_switch_reset(struct tb_switch *sw); int tb_switch_wait_for_bit(struct tb_switch *sw, u32 offset, u32 bit, u32 value, int timeout_msec); @@ -1182,6 +1182,7 @@ static inline struct tb_retimer *tb_to_retimer(struct device *dev) return NULL; } +void usb4_switch_check_wakes(struct tb_switch *sw); int usb4_switch_setup(struct tb_switch *sw); int usb4_switch_read_uid(struct tb_switch *sw, u64 *uid); int usb4_switch_drom_read(struct tb_switch *sw, unsigned int address, void *buf, diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 3c821f5e44814..b0394ba6d111d 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -153,15 +153,18 @@ static inline int usb4_switch_op_data(struct tb_switch *sw, u16 opcode, tx_dwords, rx_data, rx_dwords); } -static void usb4_switch_check_wakes(struct tb_switch *sw) +/** + * usb4_switch_check_wakes() - Check for wakes and notify PM core about them + * @sw: Router whose wakes to check + * + * Checks wakes occurred during suspend and notify the PM core about them. + */ +void usb4_switch_check_wakes(struct tb_switch *sw) { struct tb_port *port; bool wakeup = false; u32 val; - if (!device_may_wakeup(&sw->dev)) - return; - if (tb_route(sw)) { if (tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_6, 1)) return; @@ -226,8 +229,6 @@ int usb4_switch_setup(struct tb_switch *sw) u32 val = 0; int ret; - usb4_switch_check_wakes(sw); - if (!tb_route(sw)) return 0; -- GitLab From 9eae1facfc061802e045a49ae83ac1f0ff9900a3 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Fri, 1 Mar 2024 15:22:53 +0200 Subject: [PATCH 1846/2290] thunderbolt: Fix wake configurations after device unplug commit c38fa07dc69f0b9e6f43ecab96dc7861a70c827c upstream. Currently we don't configure correctly the wake events after unplug of device router. What can happen is that the downstream ports of host router will be configured to wake on: USB4-wake and wake-on-disconnect, but not on wake-on-connect. This may cause the later plugged device not to wake the domain and fail in enumeration. Fix this by clearing downstream port's "USB4 Port is Configured" bit, after unplug of a device router. Signed-off-by: Gil Fine Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index b4c01bc7120ab..d3058ede53064 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2880,22 +2880,29 @@ void tb_switch_unconfigure_link(struct tb_switch *sw) { struct tb_port *up, *down; - if (sw->is_unplugged) - return; if (!tb_route(sw) || tb_switch_is_icm(sw)) return; + /* + * Unconfigure downstream port so that wake-on-connect can be + * configured after router unplug. No need to unconfigure upstream port + * since its router is unplugged. + */ up = tb_upstream_port(sw); - if (tb_switch_is_usb4(up->sw)) - usb4_port_unconfigure(up); - else - tb_lc_unconfigure_port(up); - down = up->remote; if (tb_switch_is_usb4(down->sw)) usb4_port_unconfigure(down); else tb_lc_unconfigure_port(down); + + if (sw->is_unplugged) + return; + + up = tb_upstream_port(sw); + if (tb_switch_is_usb4(up->sw)) + usb4_port_unconfigure(up); + else + tb_lc_unconfigure_port(up); } static void tb_switch_credits_init(struct tb_switch *sw) -- GitLab From ac882d6b21bffecb57bcc4486701239eef5aa67b Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 8 Apr 2024 10:16:33 -0700 Subject: [PATCH 1847/2290] comedi: vmk80xx: fix incomplete endpoint checking commit d1718530e3f640b7d5f0050e725216eab57a85d8 upstream. While vmk80xx does have endpoint checking implemented, some things can fall through the cracks. Depending on the hardware model, URBs can have either bulk or interrupt type, and current version of vmk80xx_find_usb_endpoints() function does not take that fully into account. While this warning does not seem to be too harmful, at the very least it will crash systems with 'panic_on_warn' set on them. Fix the issue found by Syzkaller [1] by somewhat simplifying the endpoint checking process with usb_find_common_endpoints() and ensuring that only expected endpoint types are present. This patch has not been tested on real hardware. [1] Syzkaller report: usb 1-1: BOGUS urb xfer, pipe 1 != type 3 WARNING: CPU: 0 PID: 781 at drivers/usb/core/urb.c:504 usb_submit_urb+0xc4e/0x18c0 drivers/usb/core/urb.c:503 ... Call Trace: usb_start_wait_urb+0x113/0x520 drivers/usb/core/message.c:59 vmk80xx_reset_device drivers/comedi/drivers/vmk80xx.c:227 [inline] vmk80xx_auto_attach+0xa1c/0x1a40 drivers/comedi/drivers/vmk80xx.c:818 comedi_auto_config+0x238/0x380 drivers/comedi/drivers.c:1067 usb_probe_interface+0x5cd/0xb00 drivers/usb/core/driver.c:399 ... Similar issue also found by Syzkaller: Link: https://syzkaller.appspot.com/bug?extid=5205eb2f17de3e01946e Reported-and-tested-by: syzbot+5f29dc6a889fc42bd896@syzkaller.appspotmail.com Cc: stable Fixes: 49253d542cc0 ("staging: comedi: vmk80xx: factor out usb endpoint detection") Reviewed-by: Ian Abbott Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20240408171633.31649-1-n.zhandarovich@fintech.ru Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/vmk80xx.c | 35 +++++++++++--------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c index 4536ed43f65b2..84dce5184a77a 100644 --- a/drivers/comedi/drivers/vmk80xx.c +++ b/drivers/comedi/drivers/vmk80xx.c @@ -641,33 +641,22 @@ static int vmk80xx_find_usb_endpoints(struct comedi_device *dev) struct vmk80xx_private *devpriv = dev->private; struct usb_interface *intf = comedi_to_usb_interface(dev); struct usb_host_interface *iface_desc = intf->cur_altsetting; - struct usb_endpoint_descriptor *ep_desc; - int i; - - if (iface_desc->desc.bNumEndpoints != 2) - return -ENODEV; - - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - ep_desc = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_int_in(ep_desc) || - usb_endpoint_is_bulk_in(ep_desc)) { - if (!devpriv->ep_rx) - devpriv->ep_rx = ep_desc; - continue; - } + struct usb_endpoint_descriptor *ep_rx_desc, *ep_tx_desc; + int ret; - if (usb_endpoint_is_int_out(ep_desc) || - usb_endpoint_is_bulk_out(ep_desc)) { - if (!devpriv->ep_tx) - devpriv->ep_tx = ep_desc; - continue; - } - } + if (devpriv->model == VMK8061_MODEL) + ret = usb_find_common_endpoints(iface_desc, &ep_rx_desc, + &ep_tx_desc, NULL, NULL); + else + ret = usb_find_common_endpoints(iface_desc, NULL, NULL, + &ep_rx_desc, &ep_tx_desc); - if (!devpriv->ep_rx || !devpriv->ep_tx) + if (ret) return -ENODEV; + devpriv->ep_rx = ep_rx_desc; + devpriv->ep_tx = ep_tx_desc; + if (!usb_endpoint_maxp(devpriv->ep_rx) || !usb_endpoint_maxp(devpriv->ep_tx)) return -EINVAL; -- GitLab From 2c9b943e9924cf1269e44289bc5e60e51b0f5270 Mon Sep 17 00:00:00 2001 From: Emil Kronborg Date: Wed, 20 Mar 2024 12:15:36 +0000 Subject: [PATCH 1848/2290] serial: mxs-auart: add spinlock around changing cts state commit 54c4ec5f8c471b7c1137a1f769648549c423c026 upstream. The uart_handle_cts_change() function in serial_core expects the caller to hold uport->lock. For example, I have seen the below kernel splat, when the Bluetooth driver is loaded on an i.MX28 board. [ 85.119255] ------------[ cut here ]------------ [ 85.124413] WARNING: CPU: 0 PID: 27 at /drivers/tty/serial/serial_core.c:3453 uart_handle_cts_change+0xb4/0xec [ 85.134694] Modules linked in: hci_uart bluetooth ecdh_generic ecc wlcore_sdio configfs [ 85.143314] CPU: 0 PID: 27 Comm: kworker/u3:0 Not tainted 6.6.3-00021-gd62a2f068f92 #1 [ 85.151396] Hardware name: Freescale MXS (Device Tree) [ 85.156679] Workqueue: hci0 hci_power_on [bluetooth] (...) [ 85.191765] uart_handle_cts_change from mxs_auart_irq_handle+0x380/0x3f4 [ 85.198787] mxs_auart_irq_handle from __handle_irq_event_percpu+0x88/0x210 (...) Cc: stable@vger.kernel.org Fixes: 4d90bb147ef6 ("serial: core: Document and assert lock requirements for irq helpers") Reviewed-by: Frank Li Signed-off-by: Emil Kronborg Link: https://lore.kernel.org/r/20240320121530.11348-1-emil.kronborg@protonmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mxs-auart.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index d21a4f3ef2fe6..8b31017e7e563 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1094,11 +1094,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port, static irqreturn_t mxs_auart_irq_handle(int irq, void *context) { - u32 istat; + u32 istat, stat; struct mxs_auart_port *s = context; u32 mctrl_temp = s->mctrl_prev; - u32 stat = mxs_read(s, REG_STAT); + uart_port_lock(&s->port); + + stat = mxs_read(s, REG_STAT); istat = mxs_read(s, REG_INTR); /* ack irq */ @@ -1134,6 +1136,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context) istat &= ~AUART_INTR_TXIS; } + uart_port_unlock(&s->port); + return IRQ_HANDLED; } -- GitLab From bbaafbb4651fede8d3c3881601ecaa4f834f9d3f Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 8 Apr 2024 19:23:43 +1000 Subject: [PATCH 1849/2290] serial/pmac_zilog: Remove flawed mitigation for rx irq flood commit 1be3226445362bfbf461c92a5bcdb1723f2e4907 upstream. The mitigation was intended to stop the irq completely. That may be better than a hard lock-up but it turns out that you get a crash anyway if you're using pmac_zilog as a serial console: ttyPZ0: pmz: rx irq flood ! BUG: spinlock recursion on CPU#0, swapper/0 That's because the pr_err() call in pmz_receive_chars() results in pmz_console_write() attempting to lock a spinlock already locked in pmz_interrupt(). With CONFIG_DEBUG_SPINLOCK=y, this produces a fatal BUG splat. The spinlock in question is the one in struct uart_port. Even when it's not fatal, the serial port rx function ceases to work. Also, the iteration limit doesn't play nicely with QEMU, as can be seen in the bug report linked below. A web search for other reports of the error message "pmz: rx irq flood" didn't produce anything. So I don't think this code is needed any more. Remove it. Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Aneesh Kumar K.V Cc: Naveen N. Rao Cc: Andy Shevchenko Cc: stable@kernel.org Cc: linux-m68k@lists.linux-m68k.org Link: https://github.com/vivier/qemu-m68k/issues/44 Link: https://lore.kernel.org/all/1078874617.9746.36.camel@gaston/ Acked-by: Michael Ellerman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Finn Thain Link: https://lore.kernel.org/r/e853cf2c762f23101cd2ddec0cc0c2be0e72685f.1712568223.git.fthain@linux-m68k.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pmac_zilog.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c index fe2e4ec423f79..daf15d23bb42e 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -210,7 +210,6 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) { struct tty_port *port; unsigned char ch, r1, drop, flag; - int loops = 0; /* Sanity check, make sure the old bug is no longer happening */ if (uap->port.state == NULL) { @@ -291,24 +290,11 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) if (r1 & Rx_OVR) tty_insert_flip_char(port, 0, TTY_OVERRUN); next_char: - /* We can get stuck in an infinite loop getting char 0 when the - * line is in a wrong HW state, we break that here. - * When that happens, I disable the receive side of the driver. - * Note that what I've been experiencing is a real irq loop where - * I'm getting flooded regardless of the actual port speed. - * Something strange is going on with the HW - */ - if ((++loops) > 1000) - goto flood; ch = read_zsreg(uap, R0); if (!(ch & Rx_CH_AV)) break; } - return true; - flood: - pmz_interrupt_control(uap, 0); - pmz_error("pmz: rx irq flood !\n"); return true; } -- GitLab From 87d15af82d24f5193319028e0be1cd9fc34e8f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 17 Apr 2024 11:03:27 +0200 Subject: [PATCH 1850/2290] serial: stm32: Return IRQ_NONE in the ISR if no handling happend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13c785323b36b845300b256d0e5963c3727667d7 upstream. If there is a stuck irq that the handler doesn't address, returning IRQ_HANDLED unconditionally makes it impossible for the irq core to detect the problem and disable the irq. So only return IRQ_HANDLED if an event was handled. A stuck irq is still problematic, but with this change at least it only makes the UART nonfunctional instead of occupying the (usually only) CPU by 100% and so stall the whole machine. Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver") Cc: stable@vger.kernel.org Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/5f92603d0dfd8a5b8014b2b10a902d91e0bb881f.1713344161.git.u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 2a9c4058824a8..ec2558ebb3158 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -755,6 +755,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; u32 sr; unsigned int size; + irqreturn_t ret = IRQ_NONE; sr = readl_relaxed(port->membase + ofs->isr); @@ -763,11 +764,14 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) (sr & USART_SR_TC)) { stm32_usart_tc_interrupt_disable(port); stm32_usart_rs485_rts_disable(port); + ret = IRQ_HANDLED; } - if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) + if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) { writel_relaxed(USART_ICR_RTOCF, port->membase + ofs->icr); + ret = IRQ_HANDLED; + } if ((sr & USART_SR_WUF) && ofs->icr != UNDEF_REG) { /* Clear wake up flag and disable wake up interrupt */ @@ -776,6 +780,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_WUFIE); if (irqd_is_wakeup_set(irq_get_irq_data(port->irq))) pm_wakeup_event(tport->tty->dev, 0); + ret = IRQ_HANDLED; } /* @@ -790,6 +795,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) uart_unlock_and_check_sysrq(port); if (size) tty_flip_buffer_push(tport); + ret = IRQ_HANDLED; } } @@ -797,6 +803,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) spin_lock(&port->lock); stm32_usart_transmit_chars(port); spin_unlock(&port->lock); + ret = IRQ_HANDLED; } /* Receiver timeout irq for DMA RX */ @@ -806,9 +813,10 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) uart_unlock_and_check_sysrq(port); if (size) tty_flip_buffer_push(tport); + ret = IRQ_HANDLED; } - return IRQ_HANDLED; + return ret; } static void stm32_usart_set_mctrl(struct uart_port *port, unsigned int mctrl) -- GitLab From 282b223cfd5be01a28d08d90f724f0c3feab2797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 17 Apr 2024 11:03:28 +0200 Subject: [PATCH 1851/2290] serial: stm32: Reset .throttled state in .startup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ea2624b5b829b8f93c0dce25721d835969b34faf upstream. When an UART is opened that still has .throttled set from a previous open, the RX interrupt is enabled but the irq handler doesn't consider it. This easily results in a stuck irq with the effect to occupy the CPU in a tight loop. So reset the throttle state in .startup() to ensure that RX irqs are handled. Fixes: d1ec8a2eabe9 ("serial: stm32: update throttle and unthrottle ops for dma mode") Cc: stable@vger.kernel.org Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/a784f80d3414f7db723b2ec66efc56e1ad666cbf.1713344161.git.u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index ec2558ebb3158..7d11511c8c12a 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -1021,6 +1021,7 @@ static int stm32_usart_startup(struct uart_port *port) val |= USART_CR2_SWAP; writel_relaxed(val, port->membase + ofs->cr2); } + stm32_port->throttled = false; /* RX FIFO Flush */ if (ofs->rqr != UNDEF_REG) -- GitLab From 3c4ba8a6c5894a061f5eb851b5987898053608eb Mon Sep 17 00:00:00 2001 From: bolan wang Date: Wed, 6 Mar 2024 19:03:39 +0800 Subject: [PATCH 1852/2290] USB: serial: option: add Fibocom FM135-GL variants commit 356952b13af5b2c338df1e06889fd1b5e12cbbf4 upstream. Update the USB serial option driver support for the Fibocom FM135-GL LTE modules. - VID:PID 2cb7:0115, FM135-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x0115: mbim, diag, at, pipe Here are the outputs of usb-devices: T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0115 Rev=05.15 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom Module S: SerialNumber=12345678 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: bolan wang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1a3e5a9414f07..72b72bc4b417f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2272,6 +2272,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff), /* Fibocom FM135 (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ -- GitLab From 3e34029b3c118e29c973d7869416b75529451a95 Mon Sep 17 00:00:00 2001 From: Chuanhong Guo Date: Tue, 12 Mar 2024 14:29:12 +0800 Subject: [PATCH 1853/2290] USB: serial: option: add support for Fibocom FM650/FG650 commit fb1f4584b1215e8c209f6b3a4028ed8351a0e961 upstream. Fibocom FM650/FG650 are 5G modems with ECM/NCM/RNDIS/MBIM modes. This patch adds support to all 4 modes. In all 4 modes, the first serial port is the AT console while the other 3 appear to be diagnostic interfaces for dumping modem logs. usb-devices output for all modes: ECM: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=5000 MxCh= 0 D: Ver= 3.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=0a04 Rev=04.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=FG650 Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=c0 MxPwr=504mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms NCM: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=5000 MxCh= 0 D: Ver= 3.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=0a05 Rev=04.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=FG650 Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=c0 MxPwr=504mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0d Prot=00 Driver=cdc_ncm E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_ncm E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms RNDIS: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 4 Spd=5000 MxCh= 0 D: Ver= 3.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=0a06 Rev=04.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=FG650 Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=c0 MxPwr=504mA I: If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms MBIM: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 7 Spd=5000 MxCh= 0 D: Ver= 3.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=0a07 Rev=04.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=FG650 Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=c0 MxPwr=504mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Chuanhong Guo Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 72b72bc4b417f..f05d94a9f067e 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2279,6 +2279,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a04, 0xff) }, /* Fibocom FM650-CN (ECM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) }, /* Fibocom FM650-CN (NCM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) }, /* Fibocom FM650-CN (RNDIS mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) }, /* Fibocom FM650-CN (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ -- GitLab From 9eba0750255ac458016fe181e5c22ad8ee376fba Mon Sep 17 00:00:00 2001 From: Coia Prant Date: Mon, 15 Apr 2024 07:26:25 -0700 Subject: [PATCH 1854/2290] USB: serial: option: add Lonsung U8300/U9300 product commit cf16ffa17c398434a77b8a373e69287c95b60de2 upstream. Update the USB serial option driver to support Longsung U8300/U9300. For U8300 Interface 4 is used by for QMI interface in stock firmware of U8300, the router which uses U8300 modem. Interface 5 is used by for ADB interface in stock firmware of U8300, the router which uses U8300 modem. Interface mapping is: 0: unknown (Debug), 1: AT (Modem), 2: AT, 3: PPP (NDIS / Pipe), 4: QMI, 5: ADB T: Bus=05 Lev=01 Prnt=03 Port=02 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b05 Rev=03.18 S: Manufacturer=Android S: Product=Android C: #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms For U9300 Interface 1 is used by for ADB interface in stock firmware of U9300, the router which uses U9300 modem. Interface 4 is used by for QMI interface in stock firmware of U9300, the router which uses U9300 modem. Interface mapping is: 0: ADB, 1: AT (Modem), 2: AT, 3: PPP (NDIS / Pipe), 4: QMI Note: Interface 3 of some models of the U9300 series can send AT commands. T: Bus=05 Lev=01 Prnt=05 Port=04 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b3c Rev=03.18 S: Manufacturer=Android S: Product=Android C: #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms Tested successfully using Modem Manager on U9300. Tested successfully AT commands using If=1, If=2 and If=3 on U9300. Signed-off-by: Coia Prant Reviewed-by: Lars Melin [ johan: drop product defines, trim commit message ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f05d94a9f067e..a8cc7e452f0e0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2052,6 +2052,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff), .driver_info = RSVD(4) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b05), /* Longsung U8300 */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b3c), /* Longsung U9300 */ + .driver_info = RSVD(0) | RSVD(4) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, { USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) }, -- GitLab From 25a299c566bf40a1315ebc705edcc47ffa12bf1b Mon Sep 17 00:00:00 2001 From: Jerry Meng Date: Mon, 15 Apr 2024 15:04:29 +0800 Subject: [PATCH 1855/2290] USB: serial: option: support Quectel EM060K sub-models commit c840244aba7ad2b83ed904378b36bd6aef25511c upstream. EM060K_129, EM060K_12a, EM060K_12b and EM0060K_12c are EM060K's sub-models, having the same name "Quectel EM060K-GL" and the same interface layout. MBIM + GNSS + DIAG + NMEA + AT + QDSS + DPL T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0129 Rev= 5.04 S: Manufacturer=Quectel S: Product=Quectel EM060K-GL S: SerialNumber=f6fa08b6 C:* #Ifs= 8 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8f(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Jerry Meng Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index a8cc7e452f0e0..b3da923448481 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -255,6 +255,10 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_EM060K_128 0x0128 +#define QUECTEL_PRODUCT_EM060K_129 0x0129 +#define QUECTEL_PRODUCT_EM060K_12a 0x012a +#define QUECTEL_PRODUCT_EM060K_12b 0x012b +#define QUECTEL_PRODUCT_EM060K_12c 0x012c #define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 @@ -1218,6 +1222,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) }, -- GitLab From 19f98f214bd6214a91f395541b29c3017c1a2515 Mon Sep 17 00:00:00 2001 From: Vanillan Wang Date: Tue, 16 Apr 2024 18:02:55 +0800 Subject: [PATCH 1856/2290] USB: serial: option: add Rolling RW101-GL and RW135-GL support commit 311f97a4c7c22a01f8897bddf00428dfd0668e79 upstream. Update the USB serial option driver support for the Rolling LTE modules. - VID:PID 33f8:01a2, RW101-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x01a2: mbim, diag, at, pipe - VID:PID 33f8:01a3, RW101-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x01a3: mbim, pipe - VID:PID 33f8:01a4, RW101-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x01a4: mbim, diag, at, pipe - VID:PID 33f8:0104, RW101-GL for laptop debug M.2 cards(with RMNET interface for /Linux/Chrome OS) 0x0104: RMNET, diag, at, pipe - VID:PID 33f8:0115, RW135-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x0115: MBIM, diag, at, pipe Here are the outputs of usb-devices: T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=01a2 Rev=05.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=12345678 C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=01a3 Rev=05.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=12345678 C: #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 17 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=01a4 Rev=05.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=12345678 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=33f8 ProdID=0104 Rev=05.04 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=ba2eb033 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=0115 Rev=05.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=12345678 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Vanillan Wang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b3da923448481..17be469837fc8 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2307,6 +2307,14 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ + { USB_DEVICE(0x33f8, 0x0104), /* Rolling RW101-GL (laptop RMNET) */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a2, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a3, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a4, 0xff), /* Rolling RW101-GL (laptop MBIM) */ + .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, -- GitLab From 4ed7c7720aa0036c168add333e652c986de81b08 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 18 Apr 2024 13:34:30 +0200 Subject: [PATCH 1857/2290] USB: serial: option: add Telit FN920C04 rmnet compositions commit 582ee2f9d268d302595db3e36b985e5cbb93284d upstream. Add the following Telit FN920C04 compositions: 0x10a0: rmnet + tty (AT/NMEA) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a0 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10a4: rmnet + tty (AT) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a4 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10a9: rmnet + tty (AT) + tty (diag) + DPL (data packet logging) + adb T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a9 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniele Palmas Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 17be469837fc8..b5ee8518fcc78 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1376,6 +1376,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), -- GitLab From 8672ad663a22d0e4a325bb7d817b36ec412b967c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 18 Apr 2024 16:33:28 +0200 Subject: [PATCH 1858/2290] Revert "usb: cdc-wdm: close race between read and workqueue" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1607830dadeefc407e4956336d9fcd9e9defd810 upstream. This reverts commit 339f83612f3a569b194680768b22bf113c26a29d. It has been found to cause problems in a number of Chromebook devices, so revert the change until it can be brought back in a safe way. Link: https://lore.kernel.org/r/385a3519-b45d-48c5-a6fd-a3fdb6bec92f@chromium.org Reported-by:: Aleksander Morgado Fixes: 339f83612f3a ("usb: cdc-wdm: close race between read and workqueue") Cc: stable Cc: Oliver Neukum Cc: Bjørn Mork Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index fdc1a66b129a4..1f0951be15ab7 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -485,7 +485,6 @@ out_free_mem: static int service_outstanding_interrupt(struct wdm_device *desc) { int rv = 0; - int used; /* submit read urb only if the device is waiting for it */ if (!desc->resp_count || !--desc->resp_count) @@ -500,10 +499,7 @@ static int service_outstanding_interrupt(struct wdm_device *desc) goto out; } - used = test_and_set_bit(WDM_RESPONDING, &desc->flags); - if (used) - goto out; - + set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); -- GitLab From 9de10b59d16880a0a3ae2876c142fe54ce45d816 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 9 Apr 2024 12:27:54 +0000 Subject: [PATCH 1859/2290] usb: dwc2: host: Fix dereference issue in DDMA completion flow. commit eed04fa96c48790c1cce73c8a248e9d460b088f8 upstream. Fixed variable dereference issue in DDMA completion flow. Fixes: b258e4268850 ("usb: dwc2: host: Fix ISOC flow in DDMA mode") CC: stable@vger.kernel.org Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-usb/2024040834-ethically-rumble-701f@gregkh/T/#m4c4b83bef0ebb4b67fe2e0a7d6466cbb6f416e39 Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/cc826d3ef53c934d8e6d98870f17f3cdc3d2755d.1712665387.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd_ddma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c index 79582b102c7ed..994a78ad084b1 100644 --- a/drivers/usb/dwc2/hcd_ddma.c +++ b/drivers/usb/dwc2/hcd_ddma.c @@ -867,13 +867,15 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, struct dwc2_dma_desc *dma_desc; struct dwc2_hcd_iso_packet_desc *frame_desc; u16 frame_desc_idx; - struct urb *usb_urb = qtd->urb->priv; + struct urb *usb_urb; u16 remain = 0; int rc = 0; if (!qtd->urb) return -EINVAL; + usb_urb = qtd->urb->priv; + dma_sync_single_for_cpu(hsotg->dev, qh->desc_list_dma + (idx * sizeof(struct dwc2_dma_desc)), sizeof(struct dwc2_dma_desc), -- GitLab From a676b17edb52afe3b06eaa80fcb3f1dd2df7c460 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 5 Mar 2024 14:51:38 +0800 Subject: [PATCH 1860/2290] usb: Disable USB3 LPM at shutdown commit d920a2ed8620be04a3301e1a9c2b7cc1de65f19d upstream. SanDisks USB3 storage may disapper after system reboot: usb usb2-port3: link state change xhci_hcd 0000:00:14.0: clear port3 link state change, portsc: 0x2c0 usb usb2-port3: do warm reset, port only xhci_hcd 0000:00:14.0: xhci_hub_status_data: stopping usb2 port polling xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x2b0, return 0x2b0 usb usb2-port3: not warm reset yet, waiting 50ms xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x2f0, return 0x2f0 usb usb2-port3: not warm reset yet, waiting 200ms ... xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x6802c0, return 0x7002c0 usb usb2-port3: not warm reset yet, waiting 200ms xhci_hcd 0000:00:14.0: clear port3 reset change, portsc: 0x4802c0 xhci_hcd 0000:00:14.0: clear port3 warm(BH) reset change, portsc: 0x4002c0 xhci_hcd 0000:00:14.0: clear port3 link state change, portsc: 0x2c0 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x2c0, return 0x2c0 usb usb2-port3: not enabled, trying warm reset again... This is due to the USB device still cause port change event after xHCI is shuted down: xhci_hcd 0000:38:00.0: // Setting command ring address to 0xffffe001 xhci_hcd 0000:38:00.0: xhci_resume: starting usb3 port polling. xhci_hcd 0000:38:00.0: xhci_hub_status_data: stopping usb4 port polling xhci_hcd 0000:38:00.0: xhci_hub_status_data: stopping usb3 port polling xhci_hcd 0000:38:00.0: hcd_pci_runtime_resume: 0 xhci_hcd 0000:38:00.0: xhci_shutdown: stopping usb3 port polling. xhci_hcd 0000:38:00.0: // Halt the HC xhci_hcd 0000:38:00.0: xhci_shutdown completed - status = 1 xhci_hcd 0000:00:14.0: xhci_shutdown: stopping usb1 port polling. xhci_hcd 0000:00:14.0: // Halt the HC xhci_hcd 0000:00:14.0: xhci_shutdown completed - status = 1 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x1203, return 0x203 xhci_hcd 0000:00:14.0: set port reset, actual port 2-3 status = 0x1311 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x201203, return 0x100203 xhci_hcd 0000:00:14.0: clear port3 reset change, portsc: 0x1203 xhci_hcd 0000:00:14.0: clear port3 warm(BH) reset change, portsc: 0x1203 xhci_hcd 0000:00:14.0: clear port3 link state change, portsc: 0x1203 xhci_hcd 0000:00:14.0: clear port3 connect change, portsc: 0x1203 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x1203, return 0x203 usb 2-3: device not accepting address 2, error -108 xhci_hcd 0000:00:14.0: xHCI dying or halted, can't queue_command xhci_hcd 0000:00:14.0: Set port 2-3 link state, portsc: 0x1203, write 0x11261 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x1263, return 0x263 xhci_hcd 0000:00:14.0: set port reset, actual port 2-3 status = 0x1271 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x12b1, return 0x2b1 usb usb2-port3: not reset yet, waiting 60ms ACPI: PM: Preparing to enter system sleep state S5 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x12f1, return 0x2f1 usb usb2-port3: not reset yet, waiting 200ms reboot: Restarting system The port change event is caused by LPM transition, so disabling LPM at shutdown to make sure the device is in U0 for warmboot. Signed-off-by: Kai-Heng Feng Cc: stable Link: https://lore.kernel.org/r/20240305065140.66801-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index e91fa567d08d2..93a63b7f164d1 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -409,8 +409,10 @@ static void usb_port_shutdown(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); - if (port_dev->child) + if (port_dev->child) { usb_disable_usb2_hardware_lpm(port_dev->child); + usb_unlocked_disable_lpm(port_dev->child); + } } static const struct dev_pm_ops usb_port_pm_ops = { -- GitLab From 0588bbbd718a8130b98c54518f1e0b569ce60a93 Mon Sep 17 00:00:00 2001 From: Norihiko Hama Date: Wed, 27 Mar 2024 11:35:50 +0900 Subject: [PATCH 1861/2290] usb: gadget: f_ncm: Fix UAF ncm object at re-bind after usb ep transport error commit 6334b8e4553cc69f51e383c9de545082213d785e upstream. When ncm function is working and then stop usb0 interface for link down, eth_stop() is called. At this piont, accidentally if usb transport error should happen in usb_ep_enable(), 'in_ep' and/or 'out_ep' may not be enabled. After that, ncm_disable() is called to disable for ncm unbind but gether_disconnect() is never called since 'in_ep' is not enabled. As the result, ncm object is released in ncm unbind but 'dev->port_usb' associated to 'ncm->port' is not NULL. And when ncm bind again to recover netdev, ncm object is reallocated but usb0 interface is already associated to previous released ncm object. Therefore, once usb0 interface is up and eth_start_xmit() is called, released ncm object is dereferrenced and it might cause use-after-free memory. [function unlink via configfs] usb0: eth_stop dev->port_usb=ffffff9b179c3200 --> error happens in usb_ep_enable(). NCM: ncm_disable: ncm=ffffff9b179c3200 --> no gether_disconnect() since ncm->port.in_ep->enabled is false. NCM: ncm_unbind: ncm unbind ncm=ffffff9b179c3200 NCM: ncm_free: ncm free ncm=ffffff9b179c3200 <-- released ncm [function link via configfs] NCM: ncm_alloc: ncm alloc ncm=ffffff9ac4f8a000 NCM: ncm_bind: ncm bind ncm=ffffff9ac4f8a000 NCM: ncm_set_alt: ncm=ffffff9ac4f8a000 alt=0 usb0: eth_open dev->port_usb=ffffff9b179c3200 <-- previous released ncm usb0: eth_start dev->port_usb=ffffff9b179c3200 <-- eth_start_xmit() --> dev->wrap() Unable to handle kernel paging request at virtual address dead00000000014f This patch addresses the issue by checking if 'ncm->netdev' is not NULL at ncm_disable() to call gether_disconnect() to deassociate 'dev->port_usb'. It's more reasonable to check 'ncm->netdev' to call gether_connect/disconnect rather than check 'ncm->port.in_ep->enabled' since it might not be enabled but the gether connection might be established. Signed-off-by: Norihiko Hama Cc: stable Link: https://lore.kernel.org/r/20240327023550.51214-1-Norihiko.Hama@alpsalpine.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 14601a2d25427..b267ed9dc6d99 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -884,7 +884,7 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) if (alt > 1) goto fail; - if (ncm->port.in_ep->enabled) { + if (ncm->netdev) { DBG(cdev, "reset ncm\n"); ncm->netdev = NULL; gether_disconnect(&ncm->port); @@ -1369,7 +1369,7 @@ static void ncm_disable(struct usb_function *f) DBG(cdev, "ncm deactivated\n"); - if (ncm->port.in_ep->enabled) { + if (ncm->netdev) { ncm->netdev = NULL; gether_disconnect(&ncm->port); } -- GitLab From 7c6f941492b7f1900dbf81c53058d3f848c39861 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 12 Mar 2024 07:19:58 +0200 Subject: [PATCH 1862/2290] mei: me: disable RPL-S on SPS and IGN firmwares commit 0dc04112bee6fdd6eb847ccb32214703022c0269 upstream. Extend the quirk to disable MEI interface on Intel PCH Ignition (IGN) and SPS firmwares for RPL-S devices. These firmwares do not support the MEI protocol. Fixes: 3ed8c7d39cfe ("mei: me: add raptor lake point S DID") Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240312051958.118478-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-me.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index f8219cbd2c7ce..a617f64a351dc 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -116,7 +116,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, -- GitLab From 89af25bd4b4bf6a71295f07e07a8ae7dc03c6595 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sat, 23 Mar 2024 17:48:43 +0100 Subject: [PATCH 1863/2290] speakup: Avoid crash on very long word commit c8d2f34ea96ea3bce6ba2535f867f0d4ee3b22e1 upstream. In case a console is set up really large and contains a really long word (> 256 characters), we have to stop before the length of the word buffer. Signed-off-by: Samuel Thibault Fixes: c6e3fd22cd538 ("Staging: add speakup to the staging directory") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240323164843.1426997-1-samuel.thibault@ens-lyon.org Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index 73db0cb44fc7b..45d906f17ea3d 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -573,7 +573,7 @@ static u_long get_word(struct vc_data *vc) } attr_ch = get_char(vc, (u_short *)tmp_pos, &spk_attr); buf[cnt++] = attr_ch; - while (tmpx < vc->vc_cols - 1) { + while (tmpx < vc->vc_cols - 1 && cnt < sizeof(buf) - 1) { tmp_pos += 2; tmpx++; ch = get_char(vc, (u_short *)tmp_pos, &temp); -- GitLab From 5d43e072285e81b0b63cee7189b3357c7768a43b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 13 Mar 2024 17:43:41 -0400 Subject: [PATCH 1864/2290] fs: sysfs: Fix reference leak in sysfs_break_active_protection() commit a90bca2228c0646fc29a72689d308e5fe03e6d78 upstream. The sysfs_break_active_protection() routine has an obvious reference leak in its error path. If the call to kernfs_find_and_get() fails then kn will be NULL, so the companion sysfs_unbreak_active_protection() routine won't get called (and would only cause an access violation by trying to dereference kn->parent if it was called). As a result, the reference to kobj acquired at the start of the function will never be released. Fix the leak by adding an explicit kobject_put() call when kn is NULL. Signed-off-by: Alan Stern Fixes: 2afc9166f79b ("scsi: sysfs: Introduce sysfs_{un,}break_active_protection()") Cc: Bart Van Assche Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Acked-by: Tejun Heo Link: https://lore.kernel.org/r/8a4d3f0f-c5e3-4b70-a188-0ca433f9e6f9@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index a12ac0356c69c..f21e73d107249 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -450,6 +450,8 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, kn = kernfs_find_and_get(kobj->sd, attr->name); if (kn) kernfs_break_active_protection(kn); + else + kobject_put(kobj); return kn; } EXPORT_SYMBOL_GPL(sysfs_break_active_protection); -- GitLab From e487b8eccf67c0703299e977d2a6d8a0ce0a8eae Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 5 Apr 2024 16:55:54 -0700 Subject: [PATCH 1865/2290] KVM: x86: Snapshot if a vCPU's vendor model is AMD vs. Intel compatible commit fd706c9b1674e2858766bfbf7430534c2b26fbef upstream. Add kvm_vcpu_arch.is_amd_compatible to cache if a vCPU's vendor model is compatible with AMD, i.e. if the vCPU vendor is AMD or Hygon, along with helpers to check if a vCPU is compatible AMD vs. Intel. To handle Intel vs. AMD behavior related to masking the LVTPC entry, KVM will need to check for vendor compatibility on every PMI injection, i.e. querying for AMD will soon be a moderately hot path. Note! This subtly (or maybe not-so-subtly) makes "Intel compatible" KVM's default behavior, both if userspace omits (or never sets) CPUID 0x0 and if userspace sets a completely unknown vendor. One could argue that KVM should treat such vCPUs as not being compatible with Intel *or* AMD, but that would add useless complexity to KVM. KVM needs to do *something* in the face of vendor specific behavior, and so unless KVM conjured up a magic third option, choosing to treat unknown vendors as neither Intel nor AMD means that checks on AMD compatibility would yield Intel behavior, and checks for Intel compatibility would yield AMD behavior. And that's far worse as it would effectively yield random behavior depending on whether KVM checked for AMD vs. Intel vs. !AMD vs. !Intel. And practically speaking, all x86 CPUs follow either Intel or AMD architecture, i.e. "supporting" an unknown third architecture adds no value. Deliberately don't convert any of the existing guest_cpuid_is_intel() checks, as the Intel side of things is messier due to some flows explicitly checking for exactly vendor==Intel, versus some flows assuming anything that isn't "AMD compatible" gets Intel behavior. The Intel code will be cleaned up in the future. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20240405235603.1173076-2-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/cpuid.c | 1 + arch/x86/kvm/cpuid.h | 10 ++++++++++ arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/x86.c | 2 +- 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dfcdcafe3a2cd..887a171488ea2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -773,6 +773,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; u32 kvm_cpuid_base; + bool is_amd_compatible; u64 reserved_gpa_bits; int maxphyaddr; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 62a44455c51d0..f02961cbbb75a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -340,6 +340,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_update_pv_runtime(vcpu); + vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b1658c0de847c..18fd2e845989a 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -125,6 +125,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu) return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx); } +static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.is_amd_compatible; +} + +static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) +{ + return !guest_cpuid_is_amd_compatible(vcpu); +} + static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d30325e297a03..13134954e24df 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4649,7 +4649,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, context->cpu_role.base.level, is_efer_nx(context), guest_can_use_gbpages(vcpu), is_cr4_pse(context), - guest_cpuid_is_amd_or_hygon(vcpu)); + guest_cpuid_is_amd_compatible(vcpu)); } static void diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f724765032bc4..a2ea636a23086 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3278,7 +3278,7 @@ static bool is_mci_status_msr(u32 msr) static bool can_set_mci_status(struct kvm_vcpu *vcpu) { /* McStatusWrEn enabled? */ - if (guest_cpuid_is_amd_or_hygon(vcpu)) + if (guest_cpuid_is_amd_compatible(vcpu)) return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); return false; -- GitLab From 0fb74c00d140a66128afc0003785dcc57e69d312 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Mar 2024 16:58:33 -0800 Subject: [PATCH 1866/2290] KVM: x86/pmu: Disable support for adaptive PEBS commit 9e985cbf2942a1bb8fcef9adc2a17d90fd7ca8ee upstream. Drop support for virtualizing adaptive PEBS, as KVM's implementation is architecturally broken without an obvious/easy path forward, and because exposing adaptive PEBS can leak host LBRs to the guest, i.e. can leak host kernel addresses to the guest. Bug #1 is that KVM doesn't account for the upper 32 bits of IA32_FIXED_CTR_CTRL when (re)programming fixed counters, e.g fixed_ctrl_field() drops the upper bits, reprogram_fixed_counters() stores local variables as u8s and truncates the upper bits too, etc. Bug #2 is that, because KVM _always_ sets precise_ip to a non-zero value for PEBS events, perf will _always_ generate an adaptive record, even if the guest requested a basic record. Note, KVM will also enable adaptive PEBS in individual *counter*, even if adaptive PEBS isn't exposed to the guest, but this is benign as MSR_PEBS_DATA_CFG is guaranteed to be zero, i.e. the guest will only ever see Basic records. Bug #3 is in perf. intel_pmu_disable_fixed() doesn't clear the upper bits either, i.e. leaves ICL_FIXED_0_ADAPTIVE set, and intel_pmu_enable_fixed() effectively doesn't clear ICL_FIXED_0_ADAPTIVE either. I.e. perf _always_ enables ADAPTIVE counters, regardless of what KVM requests. Bug #4 is that adaptive PEBS *might* effectively bypass event filters set by the host, as "Updated Memory Access Info Group" records information that might be disallowed by userspace via KVM_SET_PMU_EVENT_FILTER. Bug #5 is that KVM doesn't ensure LBR MSRs hold guest values (or at least zeros) when entering a vCPU with adaptive PEBS, which allows the guest to read host LBRs, i.e. host RIPs/addresses, by enabling "LBR Entries" records. Disable adaptive PEBS support as an immediate fix due to the severity of the LBR leak in particular, and because fixing all of the bugs will be non-trivial, e.g. not suitable for backporting to stable kernels. Note! This will break live migration, but trying to make KVM play nice with live migration would be quite complicated, wouldn't be guaranteed to work (i.e. KVM might still kill/confuse the guest), and it's not clear that there are any publicly available VMMs that support adaptive PEBS, let alone live migrate VMs that support adaptive PEBS, e.g. QEMU doesn't support PEBS in any capacity. Link: https://lore.kernel.org/all/20240306230153.786365-1-seanjc@google.com Link: https://lore.kernel.org/all/ZeepGjHCeSfadANM@google.com Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") Cc: stable@vger.kernel.org Cc: Like Xu Cc: Mingwei Zhang Cc: Zhenyu Wang Cc: Zhang Xiong Cc: Lv Zhiyuan Cc: Dapeng Mi Cc: Jim Mattson Acked-by: Like Xu Link: https://lore.kernel.org/r/20240307005833.827147-1-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5c1590855ffcd..10aff2c9a4e4c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7742,8 +7742,28 @@ static u64 vmx_get_perf_capabilities(void) if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; + + /* + * Disallow adaptive PEBS as it is functionally broken, can be + * used by the guest to read *host* LBRs, and can be used to + * bypass userspace event filters. To correctly and safely + * support adaptive PEBS, KVM needs to: + * + * 1. Account for the ADAPTIVE flag when (re)programming fixed + * counters. + * + * 2. Gain support from perf (or take direct control of counter + * programming) to support events without adaptive PEBS + * enabled for the hardware counter. + * + * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with + * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1. + * + * 4. Document which PMU events are effectively exposed to the + * guest via adaptive PEBS, and make adaptive PEBS mutually + * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary. + */ + perf_cap &= ~PERF_CAP_PEBS_BASELINE; } return perf_cap; -- GitLab From e09465aeccf1b1209ecbdb1acd914d944a4625b5 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 5 Apr 2024 16:55:55 -0700 Subject: [PATCH 1867/2290] KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD platforms commit 49ff3b4aec51e3abfc9369997cc603319b02af9a upstream. On AMD and Hygon platforms, the local APIC does not automatically set the mask bit of the LVTPC register when handling a PMI and there is no need to clear it in the kernel's PMI handler. For guests, the mask bit is currently set by kvm_apic_local_deliver() and unless it is cleared by the guest kernel's PMI handler, PMIs stop arriving and break use-cases like sampling with perf record. This does not affect non-PerfMonV2 guests because PMIs are handled in the guest kernel by x86_pmu_handle_irq() which always clears the LVTPC mask bit irrespective of the vendor. Before: $ perf record -e cycles:u true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (1 samples) ] After: $ perf record -e cycles:u true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (19 samples) ] Fixes: a16eb25b09c0 ("KVM: x86: Mask LVTPC when handling a PMI") Cc: stable@vger.kernel.org Signed-off-by: Sandipan Das Reviewed-by: Jim Mattson [sean: use is_intel_compatible instead of !is_amd_or_hygon()] Signed-off-by: Sean Christopherson Message-ID: <20240405235603.1173076-3-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bfeafe4855528..c90fef0258c51 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2548,7 +2548,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); - if (r && lvt_type == APIC_LVTPC) + if (r && lvt_type == APIC_LVTPC && + guest_cpuid_is_intel_compatible(apic->vcpu)) kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); return r; } -- GitLab From f7e71a7cf399f53ff9fc314ca3836dc913b05bd6 Mon Sep 17 00:00:00 2001 From: Yaxiong Tian Date: Wed, 17 Apr 2024 10:52:48 +0800 Subject: [PATCH 1868/2290] arm64: hibernate: Fix level3 translation fault in swsusp_save() commit 50449ca66cc5a8cbc64749cf4b9f3d3fc5f4b457 upstream. On arm64 machines, swsusp_save() faults if it attempts to access MEMBLOCK_NOMAP memory ranges. This can be reproduced in QEMU using UEFI when booting with rodata=off debug_pagealloc=off and CONFIG_KFENCE=n: Unable to handle kernel paging request at virtual address ffffff8000000000 Mem abort info: ESR = 0x0000000096000007 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x07: level 3 translation fault Data abort info: ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=00000000eeb0b000 [ffffff8000000000] pgd=180000217fff9803, p4d=180000217fff9803, pud=180000217fff9803, pmd=180000217fff8803, pte=0000000000000000 Internal error: Oops: 0000000096000007 [#1] SMP Internal error: Oops: 0000000096000007 [#1] SMP Modules linked in: xt_multiport ipt_REJECT nf_reject_ipv4 xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c iptable_filter bpfilter rfkill at803x snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg dwmac_generic stmmac_platform snd_hda_codec stmmac joydev pcs_xpcs snd_hda_core phylink ppdev lp parport ramoops reed_solomon ip_tables x_tables nls_iso8859_1 vfat multipath linear amdgpu amdxcp drm_exec gpu_sched drm_buddy hid_generic usbhid hid radeon video drm_suballoc_helper drm_ttm_helper ttm i2c_algo_bit drm_display_helper cec drm_kms_helper drm CPU: 0 PID: 3663 Comm: systemd-sleep Not tainted 6.6.2+ #76 Source Version: 4e22ed63a0a48e7a7cff9b98b7806d8d4add7dc0 Hardware name: Greatwall GW-XXXXXX-XXX/GW-XXXXXX-XXX, BIOS KunLun BIOS V4.0 01/19/2021 pstate: 600003c5 (nZCv DAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : swsusp_save+0x280/0x538 lr : swsusp_save+0x280/0x538 sp : ffffffa034a3fa40 x29: ffffffa034a3fa40 x28: ffffff8000001000 x27: 0000000000000000 x26: ffffff8001400000 x25: ffffffc08113e248 x24: 0000000000000000 x23: 0000000000080000 x22: ffffffc08113e280 x21: 00000000000c69f2 x20: ffffff8000000000 x19: ffffffc081ae2500 x18: 0000000000000000 x17: 6666662074736420 x16: 3030303030303030 x15: 3038666666666666 x14: 0000000000000b69 x13: ffffff9f89088530 x12: 00000000ffffffea x11: 00000000ffff7fff x10: 00000000ffff7fff x9 : ffffffc08193f0d0 x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 0000000000000001 x5 : ffffffa0fff09dc8 x4 : 0000000000000000 x3 : 0000000000000027 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 000000000000004e Call trace: swsusp_save+0x280/0x538 swsusp_arch_suspend+0x148/0x190 hibernation_snapshot+0x240/0x39c hibernate+0xc4/0x378 state_store+0xf0/0x10c kobj_attr_store+0x14/0x24 The reason is swsusp_save() -> copy_data_pages() -> page_is_saveable() -> kernel_page_present() assuming that a page is always present when can_set_direct_map() is false (all of rodata_full, debug_pagealloc_enabled() and arm64_kfence_can_set_direct_map() false), irrespective of the MEMBLOCK_NOMAP ranges. Such MEMBLOCK_NOMAP regions should not be saved during hibernation. This problem was introduced by changes to the pfn_valid() logic in commit a7d9f306ba70 ("arm64: drop pfn_valid_within() and simplify pfn_valid()"). Similar to other architectures, drop the !can_set_direct_map() check in kernel_page_present() so that page_is_savable() skips such pages. Fixes: a7d9f306ba70 ("arm64: drop pfn_valid_within() and simplify pfn_valid()") Cc: # 5.14.x Suggested-by: Mike Rapoport Suggested-by: Catalin Marinas Co-developed-by: xiongxin Signed-off-by: xiongxin Signed-off-by: Yaxiong Tian Acked-by: Mike Rapoport (IBM) Link: https://lore.kernel.org/r/20240417025248.386622-1-tianyaxiong@kylinos.cn [catalin.marinas@arm.com: rework commit message] Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/pageattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 826cb200b204f..425b398f8d456 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -220,9 +220,6 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); - if (!can_set_direct_map()) - return true; - pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; -- GitLab From 76c2f4d426a5358fced5d5990744d46f10a4ccea Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Fri, 12 Apr 2024 16:17:32 +0800 Subject: [PATCH 1869/2290] init/main.c: Fix potential static_command_line memory overflow commit 46dad3c1e57897ab9228332f03e1c14798d2d3b9 upstream. We allocate memory of size 'xlen + strlen(boot_command_line) + 1' for static_command_line, but the strings copied into static_command_line are extra_command_line and command_line, rather than extra_command_line and boot_command_line. When strlen(command_line) > strlen(boot_command_line), static_command_line will overflow. This patch just recovers strlen(command_line) which was miss-consolidated with strlen(boot_command_line) in the commit f5c7310ac73e ("init/main: add checks for the return value of memblock_alloc*()") Link: https://lore.kernel.org/all/20240412081733.35925-2-ytcoode@gmail.com/ Fixes: f5c7310ac73e ("init/main: add checks for the return value of memblock_alloc*()") Cc: stable@vger.kernel.org Signed-off-by: Yuntao Wang Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- init/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/init/main.c b/init/main.c index ccde19e7275fa..2c339793511b5 100644 --- a/init/main.c +++ b/init/main.c @@ -633,6 +633,8 @@ static void __init setup_command_line(char *command_line) if (!saved_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen); + len = xlen + strlen(command_line) + 1; + static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); if (!static_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len); -- GitLab From 5ef7ba2799a3b5ed292b8f6407376e2c25ef002e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sun, 7 Apr 2024 16:54:56 +0800 Subject: [PATCH 1870/2290] mm/memory-failure: fix deadlock when hugetlb_optimize_vmemmap is enabled commit 1983184c22dd84a4d95a71e5c6775c2638557dc7 upstream. When I did hard offline test with hugetlb pages, below deadlock occurs: ====================================================== WARNING: possible circular locking dependency detected 6.8.0-11409-gf6cef5f8c37f #1 Not tainted ------------------------------------------------------ bash/46904 is trying to acquire lock: ffffffffabe68910 (cpu_hotplug_lock){++++}-{0:0}, at: static_key_slow_dec+0x16/0x60 but task is already holding lock: ffffffffabf92ea8 (pcp_batch_high_lock){+.+.}-{3:3}, at: zone_pcp_disable+0x16/0x40 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (pcp_batch_high_lock){+.+.}-{3:3}: __mutex_lock+0x6c/0x770 page_alloc_cpu_online+0x3c/0x70 cpuhp_invoke_callback+0x397/0x5f0 __cpuhp_invoke_callback_range+0x71/0xe0 _cpu_up+0xeb/0x210 cpu_up+0x91/0xe0 cpuhp_bringup_mask+0x49/0xb0 bringup_nonboot_cpus+0xb7/0xe0 smp_init+0x25/0xa0 kernel_init_freeable+0x15f/0x3e0 kernel_init+0x15/0x1b0 ret_from_fork+0x2f/0x50 ret_from_fork_asm+0x1a/0x30 -> #0 (cpu_hotplug_lock){++++}-{0:0}: __lock_acquire+0x1298/0x1cd0 lock_acquire+0xc0/0x2b0 cpus_read_lock+0x2a/0xc0 static_key_slow_dec+0x16/0x60 __hugetlb_vmemmap_restore_folio+0x1b9/0x200 dissolve_free_huge_page+0x211/0x260 __page_handle_poison+0x45/0xc0 memory_failure+0x65e/0xc70 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x387/0x550 ksys_write+0x64/0xe0 do_syscall_64+0xca/0x1e0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(pcp_batch_high_lock); lock(cpu_hotplug_lock); lock(pcp_batch_high_lock); rlock(cpu_hotplug_lock); *** DEADLOCK *** 5 locks held by bash/46904: #0: ffff98f6c3bb23f0 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x64/0xe0 #1: ffff98f6c328e488 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xf8/0x1d0 #2: ffff98ef83b31890 (kn->active#113){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x100/0x1d0 #3: ffffffffabf9db48 (mf_mutex){+.+.}-{3:3}, at: memory_failure+0x44/0xc70 #4: ffffffffabf92ea8 (pcp_batch_high_lock){+.+.}-{3:3}, at: zone_pcp_disable+0x16/0x40 stack backtrace: CPU: 10 PID: 46904 Comm: bash Kdump: loaded Not tainted 6.8.0-11409-gf6cef5f8c37f #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x68/0xa0 check_noncircular+0x129/0x140 __lock_acquire+0x1298/0x1cd0 lock_acquire+0xc0/0x2b0 cpus_read_lock+0x2a/0xc0 static_key_slow_dec+0x16/0x60 __hugetlb_vmemmap_restore_folio+0x1b9/0x200 dissolve_free_huge_page+0x211/0x260 __page_handle_poison+0x45/0xc0 memory_failure+0x65e/0xc70 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x387/0x550 ksys_write+0x64/0xe0 do_syscall_64+0xca/0x1e0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 RIP: 0033:0x7fc862314887 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007fff19311268 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007fc862314887 RDX: 000000000000000c RSI: 000056405645fe10 RDI: 0000000000000001 RBP: 000056405645fe10 R08: 00007fc8623d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007fc86241b780 R14: 00007fc862417600 R15: 00007fc862416a00 In short, below scene breaks the lock dependency chain: memory_failure __page_handle_poison zone_pcp_disable -- lock(pcp_batch_high_lock) dissolve_free_huge_page __hugetlb_vmemmap_restore_folio static_key_slow_dec cpus_read_lock -- rlock(cpu_hotplug_lock) Fix this by calling drain_all_pages() instead. This issue won't occur until commit a6b40850c442 ("mm: hugetlb: replace hugetlb_free_vmemmap_enabled with a static_key"). As it introduced rlock(cpu_hotplug_lock) in dissolve_free_huge_page() code path while lock(pcp_batch_high_lock) is already in the __page_handle_poison(). [linmiaohe@huawei.com: extend comment per Oscar] [akpm@linux-foundation.org: reflow block comment] Link: https://lkml.kernel.org/r/20240407085456.2798193-1-linmiaohe@huawei.com Fixes: a6b40850c442 ("mm: hugetlb: replace hugetlb_free_vmemmap_enabled with a static_key") Signed-off-by: Miaohe Lin Acked-by: Oscar Salvador Reviewed-by: Jane Chu Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5b846ed5dcbe9..be58ce9992595 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -84,11 +84,23 @@ static int __page_handle_poison(struct page *page) { int ret; - zone_pcp_disable(page_zone(page)); + /* + * zone_pcp_disable() can't be used here. It will + * hold pcp_batch_high_lock and dissolve_free_huge_page() might hold + * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap + * optimization is enabled. This will break current lock dependency + * chain and leads to deadlock. + * Disabling pcp before dissolving the page was a deterministic + * approach because we made sure that those pages cannot end up in any + * PCP list. Draining PCP lists expels those pages to the buddy system, + * but nothing guarantees that those pages do not get back to a PCP + * queue if we need to refill those. + */ ret = dissolve_free_huge_page(page); - if (!ret) + if (!ret) { + drain_all_pages(page_zone(page)); ret = take_page_off_buddy(page); - zone_pcp_enable(page_zone(page)); + } return ret; } -- GitLab From 212e3baccdb1939606420d88f7f52d346b49a284 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 11 Apr 2024 11:11:38 +0800 Subject: [PATCH 1871/2290] drm/amdgpu: validate the parameters of bo mapping operations more clearly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6fef2d4c00b5b8561ad68dd2b68173f5c6af1e75 upstream. Verify the parameters of amdgpu_vm_bo_(map/replace_map/clearing_mappings) in one common place. Fixes: dc54d3d1744d ("drm/amdgpu: implement AMDGPU_VA_OP_CLEAR v2") Cc: stable@vger.kernel.org Reported-by: Vlad Stolyarov Suggested-by: Christian König Signed-off-by: xinhui pan Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 72 ++++++++++++++++---------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4c661e024e13d..49a47807c42d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1400,6 +1400,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -1426,21 +1457,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -1493,17 +1517,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -1517,7 +1533,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -1604,10 +1620,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); -- GitLab From 8f79b42d1c213189a99c5de3e58081c205aaf47c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 11 Apr 2024 22:55:11 -0400 Subject: [PATCH 1872/2290] drm/vmwgfx: Sort primary plane formats by order of preference commit d4c972bff3129a9dd4c22a3999fd8eba1a81531a upstream. The table of primary plane formats wasn't sorted at all, leading to applications picking our least desirable formats by defaults. Sort the primary plane formats according to our order of preference. Nice side-effect of this change is that it makes IGT's kms_atomic plane-invalid-params pass because the test picks the first format which for vmwgfx was DRM_FORMAT_XRGB1555 and uses fb's with odd sizes which make Pixman, which IGT depends on assert due to the fact that our 16bpp formats aren't 32 bit aligned like Pixman requires all formats to be. Signed-off-by: Zack Rusin Fixes: 36cc79bc9077 ("drm/vmwgfx: Add universal plane support") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v4.12+ Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/msgid/20240412025511.78553-6-zack.rusin@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index b02d2793659f9..b116600b343a8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -246,10 +246,10 @@ struct vmw_framebuffer_bo { static const uint32_t __maybe_unused vmw_primary_plane_formats[] = { - DRM_FORMAT_XRGB1555, - DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB1555, }; static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = { -- GitLab From bcff1ed2ff1933be34b3737e5534c6650cd4e6e1 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 11 Apr 2024 22:55:10 -0400 Subject: [PATCH 1873/2290] drm/vmwgfx: Fix crtc's atomic check conditional commit a60ccade88f926e871a57176e86a34bbf0db0098 upstream. The conditional was supposed to prevent enabling of a crtc state without a set primary plane. Accidently it also prevented disabling crtc state with a set primary plane. Neither is correct. Fix the conditional and just driver-warn when a crtc state has been enabled without a primary plane which will help debug broken userspace. Fixes IGT's kms_atomic_interruptible and kms_atomic_transition tests. Signed-off-by: Zack Rusin Fixes: 06ec41909e31 ("drm/vmwgfx: Add and connect CRTC helper functions") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v4.12+ Reviewed-by: Ian Forbes Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240412025511.78553-5-zack.rusin@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index aa571b75cd07f..b1aed051b41ab 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -793,6 +793,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct vmw_private *vmw = vmw_priv(crtc->dev); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc); @@ -800,9 +801,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, bool has_primary = new_state->plane_mask & drm_plane_mask(crtc->primary); - /* We always want to have an active plane with an active CRTC */ - if (has_primary != new_state->enable) - return -EINVAL; + /* + * This is fine in general, but broken userspace might expect + * some actual rendering so give a clue as why it's blank. + */ + if (new_state->enable && !has_primary) + drm_dbg_driver(&vmw->drm, + "CRTC without a primary plane will be blank.\n"); if (new_state->connector_mask != connector_mask && -- GitLab From ad74d208f213c06d860916ad40f609ade8c13039 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 11 Apr 2024 11:15:09 +1000 Subject: [PATCH 1874/2290] nouveau: fix instmem race condition around ptr stores commit fff1386cc889d8fb4089d285f883f8cba62d82ce upstream. Running a lot of VK CTS in parallel against nouveau, once every few hours you might see something like this crash. BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 8000000114e6e067 P4D 8000000114e6e067 PUD 109046067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 53891 Comm: deqp-vk Not tainted 6.8.0-rc6+ #27 Hardware name: Gigabyte Technology Co., Ltd. Z390 I AORUS PRO WIFI/Z390 I AORUS PRO WIFI-CF, BIOS F8 11/05/2021 RIP: 0010:gp100_vmm_pgt_mem+0xe3/0x180 [nouveau] Code: c7 48 01 c8 49 89 45 58 85 d2 0f 84 95 00 00 00 41 0f b7 46 12 49 8b 7e 08 89 da 42 8d 2c f8 48 8b 47 08 41 83 c7 01 48 89 ee <48> 8b 40 08 ff d0 0f 1f 00 49 8b 7e 08 48 89 d9 48 8d 75 04 48 c1 RSP: 0000:ffffac20c5857838 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000004d8001 RCX: 0000000000000001 RDX: 00000000004d8001 RSI: 00000000000006d8 RDI: ffffa07afe332180 RBP: 00000000000006d8 R08: ffffac20c5857ad0 R09: 0000000000ffff10 R10: 0000000000000001 R11: ffffa07af27e2de0 R12: 000000000000001c R13: ffffac20c5857ad0 R14: ffffa07a96fe9040 R15: 000000000000001c FS: 00007fe395eed7c0(0000) GS:ffffa07e2c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000011febe001 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ... ? gp100_vmm_pgt_mem+0xe3/0x180 [nouveau] ? gp100_vmm_pgt_mem+0x37/0x180 [nouveau] nvkm_vmm_iter+0x351/0xa20 [nouveau] ? __pfx_nvkm_vmm_ref_ptes+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] ? __lock_acquire+0x3ed/0x2170 ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] nvkm_vmm_ptes_get_map+0xc2/0x100 [nouveau] ? __pfx_nvkm_vmm_ref_ptes+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] nvkm_vmm_map_locked+0x224/0x3a0 [nouveau] Adding any sort of useful debug usually makes it go away, so I hand wrote the function in a line, and debugged the asm. Every so often pt->memory->ptrs is NULL. This ptrs ptr is set in the nv50_instobj_acquire called from nvkm_kmap. If Thread A and Thread B both get to nv50_instobj_acquire around the same time, and Thread A hits the refcount_set line, and in lockstep thread B succeeds at refcount_inc_not_zero, there is a chance the ptrs value won't have been stored since refcount_set is unordered. Force a memory barrier here, I picked smp_mb, since we want it on all CPUs and it's write followed by a read. v2: use paired smp_rmb/smp_wmb. Cc: Fixes: be55287aa5ba ("drm/nouveau/imem/nv50: embed nvkm_instobj directly into nv04_instobj") Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240411011510.2546857-1-airlied@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index c51bac76174c1..9fe5b6a36ab98 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -221,8 +221,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory) void __iomem *map = NULL; /* Already mapped? */ - if (refcount_inc_not_zero(&iobj->maps)) + if (refcount_inc_not_zero(&iobj->maps)) { + /* read barrier match the wmb on refcount set */ + smp_rmb(); return iobj->map; + } /* Take the lock, and re-check that another thread hasn't * already mapped the object in the meantime. @@ -249,6 +252,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; + /* barrier to ensure the ptrs are written before refcount is set */ + smp_wmb(); refcount_set(&iobj->maps, 1); } -- GitLab From 1e7feb31a18c197d63a5e606025ed63c762f8918 Mon Sep 17 00:00:00 2001 From: Qiang Zhang Date: Sun, 14 Apr 2024 19:49:45 +0800 Subject: [PATCH 1875/2290] bootconfig: use memblock_free_late to free xbc memory to buddy commit 89f9a1e876b5a7ad884918c03a46831af202c8a0 upstream. On the time to free xbc memory in xbc_exit(), memblock may has handed over memory to buddy allocator. So it doesn't make sense to free memory back to memblock. memblock_free() called by xbc_exit() even causes UAF bugs on architectures with CONFIG_ARCH_KEEP_MEMBLOCK disabled like x86. Following KASAN logs shows this case. This patch fixes the xbc memory free problem by calling memblock_free() in early xbc init error rewind path and calling memblock_free_late() in xbc exit path to free memory to buddy allocator. [ 9.410890] ================================================================== [ 9.418962] BUG: KASAN: use-after-free in memblock_isolate_range+0x12d/0x260 [ 9.426850] Read of size 8 at addr ffff88845dd30000 by task swapper/0/1 [ 9.435901] CPU: 9 PID: 1 Comm: swapper/0 Tainted: G U 6.9.0-rc3-00208-g586b5dfb51b9 #5 [ 9.446403] Hardware name: Intel Corporation RPLP LP5 (CPU:RaptorLake)/RPLP LP5 (ID:13), BIOS IRPPN02.01.01.00.00.19.015.D-00000000 Dec 28 2023 [ 9.460789] Call Trace: [ 9.463518] [ 9.465859] dump_stack_lvl+0x53/0x70 [ 9.469949] print_report+0xce/0x610 [ 9.473944] ? __virt_addr_valid+0xf5/0x1b0 [ 9.478619] ? memblock_isolate_range+0x12d/0x260 [ 9.483877] kasan_report+0xc6/0x100 [ 9.487870] ? memblock_isolate_range+0x12d/0x260 [ 9.493125] memblock_isolate_range+0x12d/0x260 [ 9.498187] memblock_phys_free+0xb4/0x160 [ 9.502762] ? __pfx_memblock_phys_free+0x10/0x10 [ 9.508021] ? mutex_unlock+0x7e/0xd0 [ 9.512111] ? __pfx_mutex_unlock+0x10/0x10 [ 9.516786] ? kernel_init_freeable+0x2d4/0x430 [ 9.521850] ? __pfx_kernel_init+0x10/0x10 [ 9.526426] xbc_exit+0x17/0x70 [ 9.529935] kernel_init+0x38/0x1e0 [ 9.533829] ? _raw_spin_unlock_irq+0xd/0x30 [ 9.538601] ret_from_fork+0x2c/0x50 [ 9.542596] ? __pfx_kernel_init+0x10/0x10 [ 9.547170] ret_from_fork_asm+0x1a/0x30 [ 9.551552] [ 9.555649] The buggy address belongs to the physical page: [ 9.561875] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x45dd30 [ 9.570821] flags: 0x200000000000000(node=0|zone=2) [ 9.576271] page_type: 0xffffffff() [ 9.580167] raw: 0200000000000000 ffffea0011774c48 ffffea0012ba1848 0000000000000000 [ 9.588823] raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 [ 9.597476] page dumped because: kasan: bad access detected [ 9.605362] Memory state around the buggy address: [ 9.610714] ffff88845dd2ff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 9.618786] ffff88845dd2ff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 9.626857] >ffff88845dd30000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 9.634930] ^ [ 9.638534] ffff88845dd30080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 9.646605] ffff88845dd30100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 9.654675] ================================================================== Link: https://lore.kernel.org/all/20240414114944.1012359-1-qiang4.zhang@linux.intel.com/ Fixes: 40caa127f3c7 ("init: bootconfig: Remove all bootconfig data when the init memory is removed") Cc: Stable@vger.kernel.org Signed-off-by: Qiang Zhang Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- include/linux/bootconfig.h | 7 ++++++- lib/bootconfig.c | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index ca73940e26df8..4195444ec45d1 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -287,7 +287,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_exit(void); +void __init _xbc_exit(bool early); + +static inline void xbc_exit(void) +{ + _xbc_exit(false); +} /* XBC embedded bootconfig data in kernel */ #ifdef CONFIG_BOOT_CONFIG_EMBED diff --git a/lib/bootconfig.c b/lib/bootconfig.c index c59d26068a640..8841554432d5b 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -61,9 +61,12 @@ static inline void * __init xbc_alloc_mem(size_t size) return memblock_alloc(size, SMP_CACHE_BYTES); } -static inline void __init xbc_free_mem(void *addr, size_t size) +static inline void __init xbc_free_mem(void *addr, size_t size, bool early) { - memblock_free(addr, size); + if (early) + memblock_free(addr, size); + else if (addr) + memblock_free_late(__pa(addr), size); } #else /* !__KERNEL__ */ @@ -73,7 +76,7 @@ static inline void *xbc_alloc_mem(size_t size) return malloc(size); } -static inline void xbc_free_mem(void *addr, size_t size) +static inline void xbc_free_mem(void *addr, size_t size, bool early) { free(addr); } @@ -904,13 +907,13 @@ static int __init xbc_parse_tree(void) * If you need to reuse xbc_init() with new boot config, you can * use this. */ -void __init xbc_exit(void) +void __init _xbc_exit(bool early) { - xbc_free_mem(xbc_data, xbc_data_size); + xbc_free_mem(xbc_data, xbc_data_size, early); xbc_data = NULL; xbc_data_size = 0; xbc_node_num = 0; - xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); + xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX, early); xbc_nodes = NULL; brace_index = 0; } @@ -963,7 +966,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) if (!xbc_nodes) { if (emsg) *emsg = "Failed to allocate bootconfig nodes"; - xbc_exit(); + _xbc_exit(true); return -ENOMEM; } memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX); @@ -977,7 +980,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) *epos = xbc_err_pos; if (emsg) *emsg = xbc_err_msg; - xbc_exit(); + _xbc_exit(true); } else ret = xbc_node_num; -- GitLab From 897ac5306bbeb83e90c437326f7044c79a17c611 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Tue, 16 Apr 2024 03:20:48 +0900 Subject: [PATCH 1876/2290] nilfs2: fix OOB in nilfs_set_de_type commit c4a7dc9523b59b3e73fd522c73e95e072f876b16 upstream. The size of the nilfs_type_by_mode array in the fs/nilfs2/dir.c file is defined as "S_IFMT >> S_SHIFT", but the nilfs_set_de_type() function, which uses this array, specifies the index to read from the array in the same way as "(mode & S_IFMT) >> S_SHIFT". static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) { umode_t mode = inode->i_mode; de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; // oob } However, when the index is determined this way, an out-of-bounds (OOB) error occurs by referring to an index that is 1 larger than the array size when the condition "mode & S_IFMT == S_IFMT" is satisfied. Therefore, a patch to resize the nilfs_type_by_mode array should be applied to prevent OOB errors. Link: https://lkml.kernel.org/r/20240415182048.7144-1-konishi.ryusuke@gmail.com Reported-by: syzbot+2e22057de05b9f3b30d8@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2e22057de05b9f3b30d8 Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") Signed-off-by: Jeongjun Park Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index decd6471300b0..760405da852f6 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -243,7 +243,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = { #define S_SHIFT 12 static unsigned char -nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { +nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, -- GitLab From 013c787d231188a6408e2991150d3c9bf9a2aa0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 17 Jun 2023 09:26:44 +0300 Subject: [PATCH 1877/2290] net: dsa: mt7530: set all CPU ports in MT7531_CPU_PMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ff221029a51fd54cacac66e193e0c75e4de940e7 upstream. MT7531_CPU_PMAP represents the destination port mask for trapped-to-CPU frames (further restricted by PCR_MATRIX). Currently the driver sets the first CPU port as the single port in this bit mask, which works fine regardless of whether the device tree defines port 5, 6 or 5+6 as CPU ports. This is because the logic coincides with DSA's logic of picking the first CPU port as the CPU port that all user ports are affine to, by default. An upcoming change would like to influence DSA's selection of the default CPU port to no longer be the first one, and in that case, this logic needs adaptation. Since there is no observed leakage or duplication of frames if all CPU ports are defined in this bit mask, simply include them all. Suggested-by: Russell King (Oracle) Suggested-by: Vladimir Oltean Signed-off-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Arınç ÜNAL Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 15 +++++++-------- drivers/net/dsa/mt7530.h | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b5f61a9a378eb..b5c36d220f28d 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1236,6 +1236,13 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) if (priv->id == ID_MT7530 || priv->id == ID_MT7621) mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port)); + /* Add the CPU port to the CPU port bitmap for MT7531. Trapped frames + * will be forwarded to the CPU port that is affine to the inbound user + * port. + */ + if (priv->id == ID_MT7531) + mt7530_set(priv, MT7531_CFC, MT7531_CPU_PMAP(BIT(port))); + /* CPU port gets connected to all user ports of * the switch. */ @@ -2534,16 +2541,8 @@ static int mt7531_setup_common(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; - struct dsa_port *cpu_dp; int ret, i; - /* BPDU to CPU port */ - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, - BIT(cpu_dp->index)); - break; - } - mt753x_trap_frames(priv); /* Enable and reset MIB counters */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index af18f47f22141..206966e46e817 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -57,6 +57,7 @@ enum mt753x_id { #define MT7531_MIRROR_PORT_GET(x) (((x) >> 16) & MIRROR_MASK) #define MT7531_MIRROR_PORT_SET(x) (((x) & MIRROR_MASK) << 16) #define MT7531_CPU_PMAP_MASK GENMASK(7, 0) +#define MT7531_CPU_PMAP(x) FIELD_PREP(MT7531_CPU_PMAP_MASK, x) #define MT753X_MIRROR_REG(id) (((id) == ID_MT7531) ? \ MT7531_CFC : MT7530_MFC) -- GitLab From d9c2f69cc12ce8d37e2f94a033e0c2c0bdedf558 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 17 Jun 2023 09:26:48 +0300 Subject: [PATCH 1878/2290] net: dsa: introduce preferred_default_local_cpu_port and use on MT7530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b79d7c14f48083abb3fb061370c0c64a569edf4c upstream. Since the introduction of the OF bindings, DSA has always had a policy that in case multiple CPU ports are present in the device tree, the numerically smallest one is always chosen. The MT7530 switch family, except the switch on the MT7988 SoC, has 2 CPU ports, 5 and 6, where port 6 is preferable on the MT7531BE switch because it has higher bandwidth. The MT7530 driver developers had 3 options: - to modify DSA when the MT7531 switch support was introduced, such as to prefer the better port - to declare both CPU ports in device trees as CPU ports, and live with the sub-optimal performance resulting from not preferring the better port - to declare just port 6 in the device tree as a CPU port Of course they chose the path of least resistance (3rd option), kicking the can down the road. The hardware description in the device tree is supposed to be stable - developers are not supposed to adopt the strategy of piecemeal hardware description, where the device tree is updated in lockstep with the features that the kernel currently supports. Now, as a result of the fact that they did that, any attempts to modify the device tree and describe both CPU ports as CPU ports would make DSA change its default selection from port 6 to 5, effectively resulting in a performance degradation visible to users with the MT7531BE switch as can be seen below. Without preferring port 6: [ ID][Role] Interval Transfer Bitrate Retr [ 5][TX-C] 0.00-20.00 sec 374 MBytes 157 Mbits/sec 734 sender [ 5][TX-C] 0.00-20.00 sec 373 MBytes 156 Mbits/sec receiver [ 7][RX-C] 0.00-20.00 sec 1.81 GBytes 778 Mbits/sec 0 sender [ 7][RX-C] 0.00-20.00 sec 1.81 GBytes 777 Mbits/sec receiver With preferring port 6: [ ID][Role] Interval Transfer Bitrate Retr [ 5][TX-C] 0.00-20.00 sec 1.99 GBytes 856 Mbits/sec 273 sender [ 5][TX-C] 0.00-20.00 sec 1.99 GBytes 855 Mbits/sec receiver [ 7][RX-C] 0.00-20.00 sec 1.72 GBytes 737 Mbits/sec 15 sender [ 7][RX-C] 0.00-20.00 sec 1.71 GBytes 736 Mbits/sec receiver Using one port for WAN and the other ports for LAN is a very popular use case which is what this test emulates. As such, this change proposes that we retroactively modify stable kernels (which don't support the modification of the CPU port assignments, so as to let user space fix the problem and restore the throughput) to keep the mt7530 driver preferring port 6 even with device trees where the hardware is more fully described. Fixes: c288575f7810 ("net: dsa: mt7530: Add the support of MT7531 switch") Signed-off-by: Vladimir Oltean Signed-off-by: Arınç ÜNAL Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Arınç ÜNAL Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 15 +++++++++++++++ include/net/dsa.h | 8 ++++++++ net/dsa/dsa2.c | 24 +++++++++++++++++++++++- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b5c36d220f28d..05f5f0aa8b11d 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -419,6 +419,20 @@ static void mt7530_pll_setup(struct mt7530_priv *priv) core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); } +/* If port 6 is available as a CPU port, always prefer that as the default, + * otherwise don't care. + */ +static struct dsa_port * +mt753x_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp = dsa_to_port(ds, 6); + + if (dsa_port_is_cpu(cpu_dp)) + return cpu_dp; + + return NULL; +} + /* Setup port 6 interface mode and TRGMII TX circuit */ static int mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) @@ -3405,6 +3419,7 @@ static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, static const struct dsa_switch_ops mt7530_switch_ops = { .get_tag_protocol = mtk_get_tag_protocol, .setup = mt753x_setup, + .preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port, .get_strings = mt7530_get_strings, .get_ethtool_stats = mt7530_get_ethtool_stats, .get_sset_count = mt7530_get_sset_count, diff --git a/include/net/dsa.h b/include/net/dsa.h index ee369670e20e4..f96b61d9768e0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -968,6 +968,14 @@ struct dsa_switch_ops { struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); + /* + * Compatibility between device trees defining multiple CPU ports and + * drivers which are not OK to use by default the numerically smallest + * CPU port of a switch for its local ports. This can return NULL, + * meaning "don't know/don't care". + */ + struct dsa_port *(*preferred_default_local_cpu_port)(struct dsa_switch *ds); + /* * Port's MAC EEE settings */ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 5417f7b1187cb..98f8648791755 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -425,6 +425,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return 0; } +static struct dsa_port * +dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + + if (!ds->ops->preferred_default_local_cpu_port) + return NULL; + + cpu_dp = ds->ops->preferred_default_local_cpu_port(ds); + if (!cpu_dp) + return NULL; + + if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds)) + return NULL; + + return cpu_dp; +} + /* Perform initial assignment of CPU ports to user ports and DSA links in the * fabric, giving preference to CPU ports local to each switch. Default to * using the first CPU port in the switch tree if the port does not have a CPU @@ -432,12 +450,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) */ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) { - struct dsa_port *cpu_dp, *dp; + struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp; list_for_each_entry(cpu_dp, &dst->ports, list) { if (!dsa_port_is_cpu(cpu_dp)) continue; + preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds); + if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp) + continue; + /* Prefer a local CPU port */ dsa_switch_for_each_port(dp, cpu_dp->ds) { /* Prefer the first local CPU port found */ -- GitLab From 41a004ffba9b1fd8a5a7128ebd0dfa3ed39c3316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Wed, 20 Mar 2024 23:45:30 +0300 Subject: [PATCH 1879/2290] net: dsa: mt7530: fix improper frames on all 25MHz and 40MHz XTAL MT7530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5f563c31ff0c40ce395d0bae7daa94c7950dac97 upstream. The MT7530 switch after reset initialises with a core clock frequency that works with a 25MHz XTAL connected to it. For 40MHz XTAL, the core clock frequency must be set to 500MHz. The mt7530_pll_setup() function is responsible of setting the core clock frequency. Currently, it runs on MT7530 with 25MHz and 40MHz XTAL. This causes MT7530 switch with 25MHz XTAL to egress and ingress frames improperly. Introduce a check to run it only on MT7530 with 40MHz XTAL. The core clock frequency is set by writing to a switch PHY's register. Access to the PHY's register is done via the MDIO bus the switch is also on. Therefore, it works only when the switch makes switch PHYs listen on the MDIO bus the switch is on. This is controlled either by the state of the ESW_P1_LED_1 pin after reset deassertion or modifying bit 5 of the modifiable trap register. When ESW_P1_LED_1 is pulled high, PHY indirect access is used. That means accessing PHY registers via the PHY indirect access control register of the switch. When ESW_P1_LED_1 is pulled low, PHY direct access is used. That means accessing PHY registers via the MDIO bus the switch is on. For MT7530 switch with 40MHz XTAL on a board with ESW_P1_LED_1 pulled high, the core clock frequency won't be set to 500MHz, causing the switch to egress and ingress frames improperly. Run mt7530_pll_setup() after PHY direct access is set on the modifiable trap register. With these two changes, all MT7530 switches with 25MHz and 40MHz, and P1_LED_1 pulled high or low, will egress and ingress frames properly. Link: https://github.com/BPI-SINOVOIP/BPI-R2-bsp/blob/4a5dd143f2172ec97a2872fa29c7c4cd520f45b5/linux-mt/drivers/net/ethernet/mediatek/gsw_mt7623.c#L1039 Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20240320-for-net-mt7530-fix-25mhz-xtal-with-direct-phy-access-v1-1-d92f605f1160@arinc9.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 05f5f0aa8b11d..63c738c672739 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2434,8 +2434,6 @@ mt7530_setup(struct dsa_switch *ds) SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); - mt7530_pll_setup(priv); - /* Lower Tx driving for TRGMII path */ for (i = 0; i < NUM_TRGMII_CTRL; i++) mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), @@ -2453,6 +2451,9 @@ mt7530_setup(struct dsa_switch *ds) priv->p6_interface = PHY_INTERFACE_MODE_NA; + if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_40MHZ) + mt7530_pll_setup(priv); + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ -- GitLab From 7d51db455ca03e5270cc585a75a674abd063fa6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Mon, 8 Apr 2024 10:08:53 +0300 Subject: [PATCH 1880/2290] net: dsa: mt7530: fix enabling EEE on MT7531 switch on all boards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 06dfcd4098cfdc4d4577d94793a4f9125386da8b upstream. The commit 40b5d2f15c09 ("net: dsa: mt7530: Add support for EEE features") brought EEE support but did not enable EEE on MT7531 switch MACs. EEE is enabled on MT7531 switch MACs by pulling the LAN2LED0 pin low on the board (bootstrapping), unsetting the EEE_DIS bit on the trap register, or setting the internal EEE switch bit on the CORE_PLL_GROUP4 register. Thanks to SkyLake Huang (黃啟澤) from MediaTek for providing information on the internal EEE switch bit. There are existing boards that were not designed to pull the pin low. Because of that, the EEE status currently depends on the board design. The EEE_DIS bit on the trap pertains to the LAN2LED0 pin which is usually used to control an LED. Once the bit is unset, the pin will be low. That will make the active low LED turn on. The pin is controlled by the switch PHY. It seems that the PHY controls the pin in the way that it inverts the pin state. That means depending on the wiring of the LED connected to LAN2LED0 on the board, the LED may be on without an active link. To not cause this unwanted behaviour whilst enabling EEE on all boards, set the internal EEE switch bit on the CORE_PLL_GROUP4 register. My testing on MT7531 shows a certain amount of traffic loss when EEE is enabled. That said, I haven't come across a board that enables EEE. So enable EEE on the switch MACs but disable EEE advertisement on the switch PHYs. This way, we don't change the behaviour of the majority of the boards that have this switch. The mediatek-ge PHY driver already disables EEE advertisement on the switch PHYs but my testing shows that it is somehow enabled afterwards. Disabling EEE advertisement before the PHY driver initialises keeps it off. With this change, EEE can now be enabled using ethtool. Fixes: 40b5d2f15c09 ("net: dsa: mt7530: Add support for EEE features") Reviewed-by: Florian Fainelli Signed-off-by: Arınç ÜNAL Tested-by: Daniel Golle Reviewed-by: Daniel Golle Link: https://lore.kernel.org/r/20240408-for-net-mt7530-fix-eee-for-mt7531-mt7988-v3-1-84fdef1f008b@arinc9.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 17 ++++++++++++----- drivers/net/dsa/mt7530.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 63c738c672739..1aba0cf38630f 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2675,18 +2675,25 @@ mt7531_setup(struct dsa_switch *ds) priv->p5_interface = PHY_INTERFACE_MODE_NA; priv->p6_interface = PHY_INTERFACE_MODE_NA; - /* Enable PHY core PLL, since phy_device has not yet been created - * provided for phy_[read,write]_mmd_indirect is called, we provide - * our own mt7531_ind_mmd_phy_[read,write] to complete this - * function. + /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since + * phy_device has not yet been created provided for + * phy_[read,write]_mmd_indirect is called, we provide our own + * mt7531_ind_mmd_phy_[read,write] to complete this function. */ val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, CORE_PLL_GROUP4); - val |= MT7531_PHY_PLL_BYPASS_MODE; + val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE; val &= ~MT7531_PHY_PLL_OFF; mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, CORE_PLL_GROUP4, val); + /* Disable EEE advertisement on the switch PHYs. */ + for (i = MT753X_CTRL_PHY_ADDR; + i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) { + mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV, + 0); + } + mt7531_setup_common(ds); /* Setup VLAN ID 0 for VLAN-unaware bridges */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 206966e46e817..6441e8d7f05d9 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -673,6 +673,7 @@ enum mt7531_clk_skew { #define RG_SYSPLL_DDSFBK_EN BIT(12) #define RG_SYSPLL_BIAS_EN BIT(11) #define RG_SYSPLL_BIAS_LPF_EN BIT(10) +#define MT7531_RG_SYSPLL_DMY2 BIT(6) #define MT7531_PHY_PLL_OFF BIT(5) #define MT7531_PHY_PLL_BYPASS_MODE BIT(4) -- GitLab From b80ba648714e6d790d69610cf14656be222d0248 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 11 Apr 2024 23:02:15 +0900 Subject: [PATCH 1881/2290] ksmbd: fix slab-out-of-bounds in smb2_allocate_rsp_buf commit c119f4ede3fa90a9463f50831761c28f989bfb20 upstream. If ->ProtocolId is SMB2_TRANSFORM_PROTO_NUM, smb2 request size validation could be skipped. if request size is smaller than sizeof(struct smb2_query_info_req), slab-out-of-bounds read can happen in smb2_allocate_rsp_buf(). This patch allocate response buffer after decrypting transform request. smb3_decrypt_req() will validate transform request size and avoid slab-out-of-bound in smb2_allocate_rsp_buf(). Reported-by: Norbert Szetei Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/server.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 11b201e6ee44b..63b01f7d97031 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, int rc; bool is_chained = false; - if (conn->ops->allocate_rsp_buf(work)) - return; - if (conn->ops->is_transform_hdr && conn->ops->is_transform_hdr(work->request_buf)) { rc = conn->ops->decrypt_req(work); - if (rc < 0) { - conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); - goto send; - } - + if (rc < 0) + return; work->encrypted = true; } + if (conn->ops->allocate_rsp_buf(work)) + return; + rc = conn->ops->init_rsp_hdr(work); if (rc) { /* either uid or tid is not correct */ -- GitLab From 21ff9d7d223c5c19cb4334009e4c0c83a2f4d674 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 12 Apr 2024 09:45:00 +0900 Subject: [PATCH 1882/2290] ksmbd: validate request buffer size in smb2_allocate_rsp_buf() commit 17cf0c2794bdb6f39671265aa18aea5c22ee8c4a upstream. The response buffer should be allocated in smb2_allocate_rsp_buf before validating request. But the fields in payload as well as smb2 header is used in smb2_allocate_rsp_buf(). This patch add simple buffer size validation to avoid potencial out-of-bounds in request buffer. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index c02b1772cb807..34d88425434ab 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -534,6 +534,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) if (cmd == SMB2_QUERY_INFO_HE) { struct smb2_query_info_req *req; + if (get_rfc1002_len(work->request_buf) < + offsetof(struct smb2_query_info_req, OutputBufferLength)) + return -EINVAL; + req = smb2_get_msg(work->request_buf); if ((req->InfoType == SMB2_O_INFO_FILE && (req->FileInfoClass == FILE_FULL_EA_INFORMATION || -- GitLab From 4687606d94deaccce06c64655ee82989acff5512 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Mon, 15 Apr 2024 15:12:48 +0200 Subject: [PATCH 1883/2290] ksmbd: clear RENAME_NOREPLACE before calling vfs_rename commit 4973b04d3ea577db80c501c5f14e68ec69fe1794 upstream. File overwrite case is explicitly handled, so it is not necessary to pass RENAME_NOREPLACE to vfs_rename. Clearing the flag fixes rename operations when the share is a ntfs-3g mount. The latter uses an older version of fuse with no support for flags in the ->rename op. Cc: stable@vger.kernel.org Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index fe2c80ea2e47e..a4c99ec38faca 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -746,10 +746,15 @@ retry: goto out4; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { err = -EEXIST; goto out4; } + flags &= ~(RENAME_NOREPLACE); if (old_child == trap) { err = -EINVAL; -- GitLab From 67a877128bbe6a5c3282ad1ed8df5ba2cc93b55a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 19 Apr 2024 23:46:34 +0900 Subject: [PATCH 1884/2290] ksmbd: common: use struct_group_attr instead of struct_group for network_open_info commit 0268a7cc7fdc47d90b6c18859de7718d5059f6f1 upstream. 4byte padding cause the connection issue with the applications of MacOS. smb2_close response size increases by 4 bytes by padding, And the smb client of MacOS check it and stop the connection. This patch use struct_group_attr instead of struct_group for network_open_info to use __packed to avoid padding. Fixes: 0015eb6e1238 ("smb: client, common: fix fortify warnings") Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/common/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index a3936ff53d9d0..25383b11d01b9 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -699,7 +699,7 @@ struct smb2_close_rsp { __le16 StructureSize; /* 60 */ __le16 Flags; __le32 Reserved; - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; -- GitLab From 0f7908a016c092cfdaa16d785fa5099d867bc1a3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Jan 2024 11:02:43 +0100 Subject: [PATCH 1885/2290] PCI/ASPM: Fix deadlock when enabling ASPM commit 1e560864159d002b453da42bd2c13a1805515a20 upstream. A last minute revert in 6.7-final introduced a potential deadlock when enabling ASPM during probe of Qualcomm PCIe controllers as reported by lockdep: ============================================ WARNING: possible recursive locking detected 6.7.0 #40 Not tainted -------------------------------------------- kworker/u16:5/90 is trying to acquire lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc but task is already holding lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pci_bus_sem); lock(pci_bus_sem); *** DEADLOCK *** Call trace: print_deadlock_bug+0x25c/0x348 __lock_acquire+0x10a4/0x2064 lock_acquire+0x1e8/0x318 down_read+0x60/0x184 pcie_aspm_pm_state_change+0x58/0xdc pci_set_full_power_state+0xa8/0x114 pci_set_power_state+0xc4/0x120 qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom] pci_walk_bus+0x64/0xbc qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom] The deadlock can easily be reproduced on machines like the Lenovo ThinkPad X13s by adding a delay to increase the race window during asynchronous probe where another thread can take a write lock. Add a new pci_set_power_state_locked() and associated helper functions that can be called with the PCI bus semaphore held to avoid taking the read lock twice. Link: https://lore.kernel.org/r/ZZu0qx2cmn7IwTyQ@hovoldconsulting.com Link: https://lore.kernel.org/r/20240130100243.11011-1-johan+linaro@kernel.org Fixes: f93e71aea6c6 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Helgaas Cc: # 6.7 [bhelgaas: backported to v6.1.y, which contains b9c370b61d73 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()""), a backport of f93e71aea6c6. This omits the drivers/pci/controller/dwc/pcie-qcom.c hunk that updates qcom_pcie_enable_aspm(), which was added by 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops"), which is not present in v6.1.87.] Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/bus.c | 49 +++++++++++++++++--------- drivers/pci/pci.c | 78 +++++++++++++++++++++++++++-------------- drivers/pci/pci.h | 4 +-- drivers/pci/pcie/aspm.c | 13 ++++--- include/linux/pci.h | 5 +++ 5 files changed, 100 insertions(+), 49 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index feafa378bf8ea..aa2fba1c0f567 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -379,21 +379,8 @@ void pci_bus_add_devices(const struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_add_devices); -/** pci_walk_bus - walk devices on/under bus, calling callback. - * @top bus whose devices should be walked - * @cb callback to be called for each device found - * @userdata arbitrary pointer to be passed to callback. - * - * Walk the given bus, including any bridged devices - * on buses under this bus. Call the provided callback - * on each device found. - * - * We check the return of @cb each time. If it returns anything - * other than 0, we break out. - * - */ -void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata) +static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata, bool locked) { struct pci_dev *dev; struct pci_bus *bus; @@ -401,7 +388,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), int retval; bus = top; - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); next = top->devices.next; for (;;) { if (next == &bus->devices) { @@ -424,10 +412,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), if (retval) break; } - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); +} + +/** + * pci_walk_bus - walk devices on/under bus, calling callback. + * @top: bus whose devices should be walked + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * Walk the given bus, including any bridged devices + * on buses under this bus. Call the provided callback + * on each device found. + * + * We check the return of @cb each time. If it returns anything + * other than 0, we break out. + */ +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + __pci_walk_bus(top, cb, userdata, false); } EXPORT_SYMBOL_GPL(pci_walk_bus); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + lockdep_assert_held(&pci_bus_sem); + + __pci_walk_bus(top, cb, userdata, true); +} +EXPORT_SYMBOL_GPL(pci_walk_bus_locked); + struct pci_bus *pci_bus_get(struct pci_bus *bus) { if (bus) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5368a37154cf9..67956bfebf879 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1258,6 +1258,7 @@ end: /** * pci_set_full_power_state - Put a PCI device into D0 and update its state * @dev: PCI device to power up + * @locked: whether pci_bus_sem is held * * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register * to confirm the state change, restore its BARs if they might be lost and @@ -1267,7 +1268,7 @@ end: * to D0, it is more efficient to use pci_power_up() directly instead of this * function. */ -static int pci_set_full_power_state(struct pci_dev *dev) +static int pci_set_full_power_state(struct pci_dev *dev, bool locked) { u16 pmcsr; int ret; @@ -1303,7 +1304,7 @@ static int pci_set_full_power_state(struct pci_dev *dev) } if (dev->bus->self) - pcie_aspm_pm_state_change(dev->bus->self); + pcie_aspm_pm_state_change(dev->bus->self, locked); return 0; } @@ -1332,10 +1333,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) pci_walk_bus(bus, __pci_dev_set_current_state, &state); } +static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked) +{ + if (!bus) + return; + + if (locked) + pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state); + else + pci_walk_bus(bus, __pci_dev_set_current_state, &state); +} + /** * pci_set_low_power_state - Put a PCI device into a low-power state. * @dev: PCI device to handle. * @state: PCI power state (D1, D2, D3hot) to put the device into. + * @locked: whether pci_bus_sem is held * * Use the device's PCI_PM_CTRL register to put it into a low-power state. * @@ -1346,7 +1359,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) * 0 if device already is in the requested state. * 0 if device's power state has been successfully changed. */ -static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) +static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked) { u16 pmcsr; @@ -1400,29 +1413,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) pci_power_name(state)); if (dev->bus->self) - pcie_aspm_pm_state_change(dev->bus->self); + pcie_aspm_pm_state_change(dev->bus->self, locked); return 0; } -/** - * pci_set_power_state - Set the power state of a PCI device - * @dev: PCI device to handle. - * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. - * - * Transition a device to a new power state, using the platform firmware and/or - * the device's PCI PM registers. - * - * RETURN VALUE: - * -EINVAL if the requested state is invalid. - * -EIO if device does not support PCI PM or its PM capabilities register has a - * wrong version, or device doesn't support the requested state. - * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. - * 0 if device already is in the requested state. - * 0 if the transition is to D3 but D3 is not supported. - * 0 if device's power state has been successfully changed. - */ -int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked) { int error; @@ -1446,7 +1442,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; if (state == PCI_D0) - return pci_set_full_power_state(dev); + return pci_set_full_power_state(dev, locked); /* * This device is quirked not to be put into D3, so don't put it in @@ -1460,16 +1456,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) * To put the device in D3cold, put it into D3hot in the native * way, then put it into D3cold using platform ops. */ - error = pci_set_low_power_state(dev, PCI_D3hot); + error = pci_set_low_power_state(dev, PCI_D3hot, locked); if (pci_platform_power_transition(dev, PCI_D3cold)) return error; /* Powering off a bridge may power off the whole hierarchy */ if (dev->current_state == PCI_D3cold) - pci_bus_set_current_state(dev->subordinate, PCI_D3cold); + __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked); } else { - error = pci_set_low_power_state(dev, state); + error = pci_set_low_power_state(dev, state, locked); if (pci_platform_power_transition(dev, state)) return error; @@ -1477,8 +1473,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; } + +/** + * pci_set_power_state - Set the power state of a PCI device + * @dev: PCI device to handle. + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * + * Transition a device to a new power state, using the platform firmware and/or + * the device's PCI PM registers. + * + * RETURN VALUE: + * -EINVAL if the requested state is invalid. + * -EIO if device does not support PCI PM or its PM capabilities register has a + * wrong version, or device doesn't support the requested state. + * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. + * 0 if device already is in the requested state. + * 0 if the transition is to D3 but D3 is not supported. + * 0 if device's power state has been successfully changed. + */ +int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +{ + return __pci_set_power_state(dev, state, false); +} EXPORT_SYMBOL(pci_set_power_state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ + lockdep_assert_held(&pci_bus_sem); + + return __pci_set_power_state(dev, state, true); +} +EXPORT_SYMBOL(pci_set_power_state_locked); + #define PCI_EXP_SAVE_REGS 7 static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9950deeb047a7..88576a22fecb1 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -556,12 +556,12 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); -void pcie_aspm_pm_state_change(struct pci_dev *pdev); +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } -static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { } +static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } #endif diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 2a3d973658dac..cf4acea6610d5 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1055,8 +1055,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) up_read(&pci_bus_sem); } -/* @pdev: the root port or switch downstream port */ -void pcie_aspm_pm_state_change(struct pci_dev *pdev) +/* + * @pdev: the root port or switch downstream port + * @locked: whether pci_bus_sem is held + */ +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { struct pcie_link_state *link = pdev->link_state; @@ -1066,12 +1069,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev) * Devices changed PM state, we should recheck if latency * meets all functions' requirement */ - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); pcie_update_aspm_capable(link->root); pcie_config_aspm_path(link); mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); } void pcie_aspm_powersave_config_link(struct pci_dev *pdev) diff --git a/include/linux/pci.h b/include/linux/pci.h index f5d89a4b811f1..4da7411da9baf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1383,6 +1383,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev, struct pci_saved_state **state); int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); @@ -1553,6 +1554,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); @@ -1884,6 +1887,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return 0; } +static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ return 0; } static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) { return 0; } static inline pci_power_t pci_choose_state(struct pci_dev *dev, -- GitLab From f2295faba5e8249ae4082791bfc1664c88fff83a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 27 Apr 2024 17:07:18 +0200 Subject: [PATCH 1886/2290] Linux 6.1.88 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240423213853.356988651@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Ron Economos Tested-by: Florian Fainelli Tested-by: Yann Sionneau Tested-by: Mateusz Jończyk Tested-by: kernelci.org bot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e46a57006a34f..c73cb678fb9ac 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 87 +SUBLEVEL = 88 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 8a5291736e706cc3df06e2427bb7fa087dbfb0f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Apr 2024 12:21:58 +0200 Subject: [PATCH 1887/2290] Revert "ASoC: ti: Convert Pandora ASoC to GPIO descriptors" This reverts commit 0f4048e1a0c6e9d3d31ce5b684600fd137cebfca which is commit 319e6ac143b9e9048e527ab9dd2aabb8fdf3d60f upstream. It breaks the 6.1.y build, so needs to be reverted. Cc: Linus Walleij Cc: Jarkko Nikula Cc: Mark Brown Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/pdata-quirks.c | 10 ----- sound/soc/ti/omap3pandora.c | 63 +++++++++++++++++++----------- 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 44da1e14a3740..9deba798cc919 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -257,19 +257,9 @@ static struct platform_device pandora_backlight = { .id = -1, }; -static struct gpiod_lookup_table pandora_soc_audio_gpios = { - .dev_id = "soc-audio", - .table = { - GPIO_LOOKUP("gpio-112-127", 6, "dac", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-0-15", 14, "amp", GPIO_ACTIVE_HIGH), - { } - }, -}; - static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); - gpiod_add_lookup_table(&pandora_soc_audio_gpios); } #endif /* CONFIG_ARCH_OMAP3 */ diff --git a/sound/soc/ti/omap3pandora.c b/sound/soc/ti/omap3pandora.c index fa92ed97dfe3b..a287e9747c2a1 100644 --- a/sound/soc/ti/omap3pandora.c +++ b/sound/soc/ti/omap3pandora.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include @@ -21,11 +21,12 @@ #include "omap-mcbsp.h" +#define OMAP3_PANDORA_DAC_POWER_GPIO 118 +#define OMAP3_PANDORA_AMP_POWER_GPIO 14 + #define PREFIX "ASoC omap3pandora: " static struct regulator *omap3pandora_dac_reg; -static struct gpio_desc *dac_power_gpio; -static struct gpio_desc *amp_power_gpio; static int omap3pandora_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -77,9 +78,9 @@ static int omap3pandora_dac_event(struct snd_soc_dapm_widget *w, return ret; } mdelay(1); - gpiod_set_value(dac_power_gpio, 1); + gpio_set_value(OMAP3_PANDORA_DAC_POWER_GPIO, 1); } else { - gpiod_set_value(dac_power_gpio, 0); + gpio_set_value(OMAP3_PANDORA_DAC_POWER_GPIO, 0); mdelay(1); regulator_disable(omap3pandora_dac_reg); } @@ -91,9 +92,9 @@ static int omap3pandora_hp_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event) { if (SND_SOC_DAPM_EVENT_ON(event)) - gpiod_set_value(amp_power_gpio, 1); + gpio_set_value(OMAP3_PANDORA_AMP_POWER_GPIO, 1); else - gpiod_set_value(amp_power_gpio, 0); + gpio_set_value(OMAP3_PANDORA_AMP_POWER_GPIO, 0); return 0; } @@ -228,10 +229,35 @@ static int __init omap3pandora_soc_init(void) pr_info("OMAP3 Pandora SoC init\n"); + ret = gpio_request(OMAP3_PANDORA_DAC_POWER_GPIO, "dac_power"); + if (ret) { + pr_err(PREFIX "Failed to get DAC power GPIO\n"); + return ret; + } + + ret = gpio_direction_output(OMAP3_PANDORA_DAC_POWER_GPIO, 0); + if (ret) { + pr_err(PREFIX "Failed to set DAC power GPIO direction\n"); + goto fail0; + } + + ret = gpio_request(OMAP3_PANDORA_AMP_POWER_GPIO, "amp_power"); + if (ret) { + pr_err(PREFIX "Failed to get amp power GPIO\n"); + goto fail0; + } + + ret = gpio_direction_output(OMAP3_PANDORA_AMP_POWER_GPIO, 0); + if (ret) { + pr_err(PREFIX "Failed to set amp power GPIO direction\n"); + goto fail1; + } + omap3pandora_snd_device = platform_device_alloc("soc-audio", -1); if (omap3pandora_snd_device == NULL) { pr_err(PREFIX "Platform device allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto fail1; } platform_set_drvdata(omap3pandora_snd_device, &snd_soc_card_omap3pandora); @@ -242,20 +268,6 @@ static int __init omap3pandora_soc_init(void) goto fail2; } - dac_power_gpio = devm_gpiod_get(&omap3pandora_snd_device->dev, - "dac", GPIOD_OUT_LOW); - if (IS_ERR(dac_power_gpio)) { - ret = PTR_ERR(dac_power_gpio); - goto fail3; - } - - amp_power_gpio = devm_gpiod_get(&omap3pandora_snd_device->dev, - "amp", GPIOD_OUT_LOW); - if (IS_ERR(amp_power_gpio)) { - ret = PTR_ERR(amp_power_gpio); - goto fail3; - } - omap3pandora_dac_reg = regulator_get(&omap3pandora_snd_device->dev, "vcc"); if (IS_ERR(omap3pandora_dac_reg)) { pr_err(PREFIX "Failed to get DAC regulator from %s: %ld\n", @@ -271,7 +283,10 @@ fail3: platform_device_del(omap3pandora_snd_device); fail2: platform_device_put(omap3pandora_snd_device); - +fail1: + gpio_free(OMAP3_PANDORA_AMP_POWER_GPIO); +fail0: + gpio_free(OMAP3_PANDORA_DAC_POWER_GPIO); return ret; } module_init(omap3pandora_soc_init); @@ -280,6 +295,8 @@ static void __exit omap3pandora_soc_exit(void) { regulator_put(omap3pandora_dac_reg); platform_device_unregister(omap3pandora_snd_device); + gpio_free(OMAP3_PANDORA_AMP_POWER_GPIO); + gpio_free(OMAP3_PANDORA_DAC_POWER_GPIO); } module_exit(omap3pandora_soc_exit); -- GitLab From dcbc050cb0d304c3427d6583384eebcaf0e3caee Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Apr 2024 12:32:48 +0200 Subject: [PATCH 1888/2290] Linux 6.1.89 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c73cb678fb9ac..a0472e1cf7156 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 88 +SUBLEVEL = 89 EXTRAVERSION = NAME = Curry Ramen -- GitLab From e24e1651908bbabb38dbd89d964ae97da700701a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 19 Apr 2024 12:05:07 -0300 Subject: [PATCH 1889/2290] smb: client: fix rename(2) regression against samba [ Upstream commit 18d86965e31f9be4d477da0744a7cdc9815858de ] After commit 2c7d399e551c ("smb: client: reuse file lease key in compound operations") the client started reusing lease keys for rename, unlink and set path size operations to prevent it from breaking its own leases and thus causing unnecessary lease breaks to same connection. The implementation relies on positive dentries and cifsInodeInfo::lease_granted to decide whether reusing lease keys for the compound requests. cifsInodeInfo::lease_granted was introduced by commit 0ab95c2510b6 ("Defer close only when lease is enabled.") to indicate whether lease caching is granted for a specific file, but that can only happen until file is open, so cifsInodeInfo::lease_granted was left uninitialised in ->alloc_inode and then client started sending random lease keys for files that hadn't any leases. This fixes the following test case against samba: mount.cifs //srv/share /mnt/1 -o ...,nosharesock mount.cifs //srv/share /mnt/2 -o ...,nosharesock touch /mnt/1/foo; tail -f /mnt/1/foo & pid=$! mv /mnt/2/foo /mnt/2/bar # fails with -EIO kill $pid Fixes: 0ab95c2510b6 ("Defer close only when lease is enabled.") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 0a79771c8f33b..f0a3336ffb6c8 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -387,6 +387,7 @@ cifs_alloc_inode(struct super_block *sb) * server, can not assume caching of file data or metadata. */ cifs_set_oplock_level(cifs_inode, 0); + cifs_inode->lease_granted = false; cifs_inode->flags = 0; spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; -- GitLab From 4f83ca4c7aa60447565108cd09a400bb3b4898c4 Mon Sep 17 00:00:00 2001 From: Takayuki Nagata Date: Mon, 15 Apr 2024 16:47:49 +0900 Subject: [PATCH 1890/2290] cifs: reinstate original behavior again for forceuid/forcegid [ Upstream commit 77d8aa79ecfb209308e0644c02f655122b31def7 ] forceuid/forcegid should be enabled by default when uid=/gid= options are specified, but commit 24e0a1eff9e2 ("cifs: switch to new mount api") changed the behavior. Due to the change, a mounted share does not show intentional uid/gid for files and directories even though uid=/gid= options are specified since forceuid/forcegid are not enabled. This patch reinstates original behavior that overrides uid/gid with specified uid/gid by the options. Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Signed-off-by: Takayuki Nagata Acked-by: Paulo Alcantara (Red Hat) Acked-by: Ronnie Sahlberg Acked-by: Tom Talpey Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/fs_context.c | 12 ++++++++++++ fs/smb/client/fs_context.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 4d5302b58b534..ca39d01077cdf 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -676,6 +676,16 @@ static int smb3_fs_context_validate(struct fs_context *fc) /* set the port that we got earlier */ cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port); + if (ctx->uid_specified && !ctx->forceuid_specified) { + ctx->override_uid = 1; + pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n"); + } + + if (ctx->gid_specified && !ctx->forcegid_specified) { + ctx->override_gid = 1; + pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n"); + } + if (ctx->override_uid && !ctx->uid_specified) { ctx->override_uid = 0; pr_notice("ignoring forceuid mount option specified with no uid= option\n"); @@ -923,12 +933,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->override_uid = 0; else ctx->override_uid = 1; + ctx->forceuid_specified = true; break; case Opt_forcegid: if (result.negated) ctx->override_gid = 0; else ctx->override_gid = 1; + ctx->forcegid_specified = true; break; case Opt_perm: if (result.negated) diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 26093f54d3e65..319a91b7f6700 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -154,6 +154,8 @@ enum cifs_param { }; struct smb3_fs_context { + bool forceuid_specified; + bool forcegid_specified; bool uid_specified; bool cruid_specified; bool gid_specified; -- GitLab From 6b0ac25f367fb177f557cf684357e2233138fb68 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 6 Mar 2024 00:44:04 +0000 Subject: [PATCH 1891/2290] HID: intel-ish-hid: ipc: Fix dev_err usage with uninitialized dev->devc [ Upstream commit 92826905ae340b7f2b25759a06c8c60bfc476b9f ] The variable dev->devc in ish_dev_init was utilized by dev_err before it was properly assigned. To rectify this, the assignment of dev->devc has been moved to immediately follow memory allocation. Without this change "(NULL device *)" is printed for device information. Fixes: 8ae2f2b0a284 ("HID: intel-ish-hid: ipc: Fix potential use-after-free in work function") Fixes: ae02e5d40d5f ("HID: intel-ish-hid: ipc layer") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-ish-hid/ipc/ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index a49c6affd7c4c..dd5fc60874ba1 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -948,6 +948,7 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) if (!dev) return NULL; + dev->devc = &pdev->dev; ishtp_device_init(dev); init_waitqueue_head(&dev->wait_hw_ready); @@ -983,7 +984,6 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) } dev->ops = &ish_hw_ops; - dev->devc = &pdev->dev; dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr); return dev; } -- GitLab From 526facda6194c481cb10c80523cae50b7dbf018e Mon Sep 17 00:00:00 2001 From: Yaraslau Furman Date: Wed, 3 Apr 2024 19:54:24 +0300 Subject: [PATCH 1892/2290] HID: logitech-dj: allow mice to use all types of reports [ Upstream commit 21f28a7eb78dea6c59be6b0a5e0b47bf3d25fcbb ] You can bind whatever action you want to the mouse's reprogrammable buttons using Windows application. Allow Linux to receive multimedia keycodes. Fixes: 3ed224e273ac ("HID: logitech-dj: Fix 064d:c52f receiver support") Signed-off-by: Yaraslau Furman Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-logitech-dj.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 08768e5accedc..57697605b2e24 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -965,9 +965,7 @@ static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev, } break; case REPORT_TYPE_MOUSE: - workitem->reports_supported |= STD_MOUSE | HIDPP; - if (djrcv_dev->type == recvr_type_mouse_only) - workitem->reports_supported |= MULTIMEDIA; + workitem->reports_supported |= STD_MOUSE | HIDPP | MULTIMEDIA; break; } } -- GitLab From 9ab1d84bdb12a52387810f1554ba66c0f0cbd25d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 15:24:35 +0300 Subject: [PATCH 1893/2290] arm64: dts: rockchip: set PHY address of MT7531 switch to 0x1f MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a2ac2a1b02590a22a236c43c455f421cdede45f5 ] The MT7531 switch listens on PHY address 0x1f on an MDIO bus. I've got two findings that support this. There's no bootstrapping option to change the PHY address of the switch. The Linux driver hardcodes 0x1f as the PHY address of the switch. So the reg property on the device tree is currently ignored by the Linux driver. Therefore, describe the correct PHY address on Banana Pi BPI-R2 Pro that has this switch. Signed-off-by: Arınç ÜNAL Fixes: c1804463e5c6 ("arm64: dts: rockchip: Add mt7531 dsa node to BPI-R2-Pro board") Link: https://lore.kernel.org/r/20240314-for-rockchip-mt7531-phy-address-v1-1-743b5873358f@arinc9.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 26d7fda275edb..7952a14314360 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -521,9 +521,9 @@ #address-cells = <1>; #size-cells = <0>; - switch@0 { + switch@1f { compatible = "mediatek,mt7531"; - reg = <0>; + reg = <0x1f>; ports { #address-cells = <1>; -- GitLab From cb5b05e61968247a9ef0c71b306c888d3b2993e9 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:07 +0100 Subject: [PATCH 1894/2290] arm64: dts: rockchip: enable internal pull-up on Q7_USB_ID for RK3399 Puma [ Upstream commit e6b1168f37e3f86d9966276c5a3fff9eb0df3e5f ] The Q7_USB_ID has a diode used as a level-shifter, and is used as an input pin. The SoC default for this pin is a pull-up, which is correct but the pinconf in the introducing commit missed that, so let's fix this oversight. Fixes: ed2c66a95c0c ("arm64: dts: rockchip: fix rk3399-puma-haikou USB OTG mode") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-1-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index aa3e21bd6c8f4..fee2cc035613c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -443,7 +443,7 @@ usb3 { usb3_id: usb3-id { rockchip,pins = - <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; }; }; }; -- GitLab From aa1af71deedaf08fd400aed2f38a8c57557ea334 Mon Sep 17 00:00:00 2001 From: Iskander Amara Date: Fri, 8 Mar 2024 09:52:43 +0100 Subject: [PATCH 1895/2290] arm64: dts: rockchip: fix alphabetical ordering RK3399 puma [ Upstream commit f0abb4b2c7acf3c3e4130dc3f54cd90cf2ae62bc ] Nodes overridden by their reference should be ordered alphabetically to make it easier to read the DTS. pinctrl node is defined in the wrong location so let's reorder it. Signed-off-by: Iskander Amara Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308085243.69903-2-iskander.amara@theobroma-systems.com Signed-off-by: Heiko Stuebner Stable-dep-of: 945a7c857091 ("arm64: dts: rockchip: enable internal pull-up on PCIE_WAKE# for RK3399 Puma") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index fee2cc035613c..a060419bca901 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -401,15 +401,6 @@ gpio1830-supply = <&vcc_1v8>; }; -&pmu_io_domains { - status = "okay"; - pmu1830-supply = <&vcc_1v8>; -}; - -&pwm2 { - status = "okay"; -}; - &pinctrl { i2c8 { i2c8_xfer_a: i2c8-xfer { @@ -448,6 +439,15 @@ }; }; +&pmu_io_domains { + status = "okay"; + pmu1830-supply = <&vcc_1v8>; +}; + +&pwm2 { + status = "okay"; +}; + &sdhci { /* * Signal integrity isn't great at 200MHz but 100MHz has proven stable -- GitLab From 076ff06a1e3a9d9b4f1ab32921e2af73357c045a Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:08 +0100 Subject: [PATCH 1896/2290] arm64: dts: rockchip: enable internal pull-up on PCIE_WAKE# for RK3399 Puma [ Upstream commit 945a7c8570916650a415757d15d83e0fa856a686 ] The PCIE_WAKE# has a diode used as a level-shifter, and is used as an input pin. While the SoC default is to enable the pull-up, the core rk3399 pinconf for this pin opted for pull-none. So as to not disturb the behaviour of other boards which may rely on pull-none instead of pull-up, set the needed pull-up only for RK3399 Puma. Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-2-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index a060419bca901..a77f922107c20 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -401,6 +401,11 @@ gpio1830-supply = <&vcc_1v8>; }; +&pcie_clkreqn_cpm { + rockchip,pins = + <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>; +}; + &pinctrl { i2c8 { i2c8_xfer_a: i2c8-xfer { -- GitLab From 475816446f6037a10f50ae79af737783e0bc9a8c Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Mon, 1 Apr 2024 00:20:56 +0200 Subject: [PATCH 1897/2290] arm64: dts: rockchip: Remove unsupported node from the Pinebook Pro dts [ Upstream commit 43853e843aa6c3d47ff2b0cce898318839483d05 ] Remove a redundant node from the Pine64 Pinebook Pro dts, which is intended to provide a value for the delay in PCI Express enumeration, but that isn't supported without additional out-of-tree kernel patches. There were already efforts to upstream those kernel patches, because they reportedly make some PCI Express cards (such as LSI SAS HBAs) usable in Pine64 RockPro64 (which is also based on the RK3399); otherwise, those PCI Express cards fail to enumerate. However, providing the required background and explanations proved to be a tough nut to crack, which is the reason why those patches remain outside of the kernel mainline for now. If those out-of-tree patches eventually become upstreamed, the resulting device-tree changes will almost surely belong to the RK3399 SoC dtsi. Also, the above-mentioned unusable-without-out-of-tree-patches PCI Express devices are in all fairness not usable in a Pinebook Pro without some extensive hardware modifications, which is another reason to delete this redundant node. When it comes to the Pinebook Pro, only M.2 NVMe SSDs can be installed out of the box (using an additional passive adapter PCB sold separately by Pine64), which reportedly works fine with no additional patches. Fixes: 5a65505a6988 ("arm64: dts: rockchip: Add initial support for Pinebook Pro") Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/0f82c3f97cb798d012270d13b34d8d15305ef293.1711923520.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 194e48c755f6b..a51e8d0493cab 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -789,7 +789,6 @@ }; &pcie0 { - bus-scan-delay-ms = <1000>; ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; -- GitLab From 2f83d4763ac969b40c19adec23fbe610ec5a751d Mon Sep 17 00:00:00 2001 From: Ikjoon Jang Date: Fri, 23 Feb 2024 17:11:21 +0800 Subject: [PATCH 1898/2290] arm64: dts: mediatek: mt8183: Add power-domains properity to mfgcfg [ Upstream commit 1781f2c461804c0123f59afc7350e520a88edffb ] mfgcfg clock is under MFG_ASYNC power domain. Fixes: e526c9bc11f8 ("arm64: dts: Add Mediatek SoC MT8183 and evaluation board dts and Makefile") Fixes: 37fb78b9aeb7 ("arm64: dts: mediatek: Add mt8183 power domains controller") Signed-off-by: Weiyi Lu Signed-off-by: Ikjoon Jang Reviewed-by: Enric Balletbo i Serra Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240223091122.2430037-1-wenst@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index d5d9b954c449a..2147e152683bf 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1554,6 +1554,7 @@ compatible = "mediatek,mt8183-mfgcfg", "syscon"; reg = <0 0x13000000 0 0x1000>; #clock-cells = <1>; + power-domains = <&spm MT8183_POWER_DOMAIN_MFG_ASYNC>; }; gpu: gpu@13040000 { -- GitLab From 18548e2ab95449e1a34e20e658c2457f433aa0e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:28 -0500 Subject: [PATCH 1899/2290] arm64: dts: mediatek: mt8192: Add missing gce-client-reg to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 00bcc8810d9dd69d3899a4189e2f3964f263a600 ] Add the missing mediatek,gce-client-reg property to the mutex node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: b4b75bac952b ("arm64: dts: mt8192: Add display nodes") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-1-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8192.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 4ed8a0f187583..7ecba8c7262da 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -1240,6 +1240,7 @@ reg = <0 0x14001000 0 0x1000>; interrupts = ; clocks = <&mmsys CLK_MM_DISP_MUTEX0>; + mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0x1000 0x1000>; mediatek,gce-events = , ; power-domains = <&spm MT8192_POWER_DOMAIN_DISP>; -- GitLab From 5bcfc5337334f44518b1d24b0d0bd155558d15a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:29 -0500 Subject: [PATCH 1900/2290] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to vpp/vdosys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 96b0c1528ef41fe754f5d1378b1db6c098a2e33f ] Add the missing mediatek,gce-client-reg property to the vppsys and vdosys nodes to allow them to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: 6aa5b46d1755 ("arm64: dts: mt8195: Add vdosys and vppsys clock nodes") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-2-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 414cbe3451270..8f33b3226435a 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -1492,6 +1492,7 @@ compatible = "mediatek,mt8195-vppsys0"; reg = <0 0x14000000 0 0x1000>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce1 SUBSYS_1400XXXX 0 0x1000>; }; smi_sub_common_vpp0_vpp1_2x1: smi@14010000 { @@ -1597,6 +1598,7 @@ compatible = "mediatek,mt8195-vppsys1"; reg = <0 0x14f00000 0 0x1000>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce1 SUBSYS_14f0XXXX 0 0x1000>; }; larb5: larb@14f02000 { @@ -1982,6 +1984,7 @@ reg = <0 0x1c01a000 0 0x1000>; mboxes = <&gce0 0 CMDQ_THR_PRIO_4>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0xa000 0x1000>; }; larb20: larb@1b010000 { -- GitLab From e8ac4490db15ca97349f64d0241541cb98ef4870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:30 -0500 Subject: [PATCH 1901/2290] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3b129949184a1251e6a42db714f6d68b75fabedd ] Add the missing mediatek,gce-client-reg property to the mutex node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: b852ee68fd72 ("arm64: dts: mt8195: Add display node for vdosys0") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-3-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 8f33b3226435a..bdf002e9cece1 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -2088,6 +2088,7 @@ interrupts = ; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>; clocks = <&vdosys0 CLK_VDO0_DISP_MUTEX0>; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0x6000 0x1000>; mediatek,gce-events = ; }; -- GitLab From 942debbea563c4ccfeeeed21a304c9b84b84090d Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:02 +0800 Subject: [PATCH 1902/2290] arm64: dts: mediatek: mt8192-asurada: Update min voltage constraint for MT6315 [ Upstream commit 374a7c6400e314458178255a63c37d6347845092 ] Update the minimum voltage from 300000 uV to 400000 uV so it matches the MT6315 datasheet. Also update the minimum voltage for Vgpu regulator from 606250 uV to 400000 uV because the requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled. Fixes: 3183cb62b033 ("arm64: dts: mediatek: asurada: Add SPMI regulators") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-2-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index c6080af1e4a30..0814ed6a7272d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -903,7 +903,7 @@ mt6315_6_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vbcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; @@ -913,7 +913,7 @@ mt6315_6_vbuck3: vbuck3 { regulator-compatible = "vbuck3"; regulator-name = "Vlcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; @@ -930,7 +930,7 @@ mt6315_7_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vgpu"; - regulator-min-microvolt = <606250>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <800000>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; -- GitLab From 818f56a8b32fd734c5ab36bdc0a8985e939060f8 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:03 +0800 Subject: [PATCH 1903/2290] arm64: dts: mediatek: mt8195-cherry: Update min voltage constraint for MT6315 [ Upstream commit e9a6b8b5c61350535c7eb5ea9b2dde0d5745bd1b ] Update the minimum voltage from 300000 uV to 400000 uV so it matches the MT6315 datasheet. Also update the minimum voltage for Vgpu regulator from 625000 uV to 400000 uV because the requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled. Fixes: 260c04d425eb ("arm64: dts: mediatek: cherry: Enable MT6315 regulators on SPMI bus") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-3-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index 4b8a1c462906e..9180a73db066e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -845,7 +845,7 @@ mt6315_6_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vbcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-ramp-delay = <6250>; @@ -863,7 +863,7 @@ mt6315_7_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vgpu"; - regulator-min-microvolt = <625000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-ramp-delay = <6250>; -- GitLab From ce782b5a748531abd91bfc82333c872446298124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:47 +0100 Subject: [PATCH 1904/2290] arm64: dts: mediatek: mt7622: fix clock controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3ba5a61594347ab46e7c2cff6cd63ea0f1282efb ] 1. Drop unneeded "syscon"s (bindings were updated recently) 2. Use "clock-controller" in nodenames 3. Add missing "#clock-cells" Fixes: d7167881e03e ("arm64: dts: mt7622: add clock controller device nodes") Fixes: e9b65ecb7c30 ("arm64: dts: mediatek: mt7622: introduce nodes for Wireless Ethernet Dispatch") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-2-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 7bb316922a3a9..c1747483350ef 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -282,16 +282,14 @@ }; }; - apmixedsys: apmixedsys@10209000 { - compatible = "mediatek,mt7622-apmixedsys", - "syscon"; + apmixedsys: clock-controller@10209000 { + compatible = "mediatek,mt7622-apmixedsys"; reg = <0 0x10209000 0 0x1000>; #clock-cells = <1>; }; - topckgen: topckgen@10210000 { - compatible = "mediatek,mt7622-topckgen", - "syscon"; + topckgen: clock-controller@10210000 { + compatible = "mediatek,mt7622-topckgen"; reg = <0 0x10210000 0 0x1000>; #clock-cells = <1>; }; @@ -734,9 +732,8 @@ power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>; }; - ssusbsys: ssusbsys@1a000000 { - compatible = "mediatek,mt7622-ssusbsys", - "syscon"; + ssusbsys: clock-controller@1a000000 { + compatible = "mediatek,mt7622-ssusbsys"; reg = <0 0x1a000000 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -793,9 +790,8 @@ }; }; - pciesys: pciesys@1a100800 { - compatible = "mediatek,mt7622-pciesys", - "syscon"; + pciesys: clock-controller@1a100800 { + compatible = "mediatek,mt7622-pciesys"; reg = <0 0x1a100800 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -921,12 +917,13 @@ }; }; - hifsys: syscon@1af00000 { - compatible = "mediatek,mt7622-hifsys", "syscon"; + hifsys: clock-controller@1af00000 { + compatible = "mediatek,mt7622-hifsys"; reg = <0 0x1af00000 0 0x70>; + #clock-cells = <1>; }; - ethsys: syscon@1b000000 { + ethsys: clock-controller@1b000000 { compatible = "mediatek,mt7622-ethsys", "syscon"; reg = <0 0x1b000000 0 0x1000>; -- GitLab From da3c0740f0aa774c84ac985ed180f76771d0df15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:48 +0100 Subject: [PATCH 1905/2290] arm64: dts: mediatek: mt7622: fix IR nodename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 800dc93c3941e372c94278bf4059e6e82f60bd66 ] Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: cir@10009000: $nodename:0: 'cir@10009000' does not match '^ir(-receiver)?(@[a-f0-9]+)?$' from schema $id: http://devicetree.org/schemas/media/mediatek,mt7622-cir.yaml# Fixes: ae457b7679c4 ("arm64: dts: mt7622: add SoC and peripheral related device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-3-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index c1747483350ef..87f692a041a24 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -251,7 +251,7 @@ clock-names = "hif_sel"; }; - cir: cir@10009000 { + cir: ir-receiver@10009000 { compatible = "mediatek,mt7622-cir"; reg = <0 0x10009000 0 0x1000>; interrupts = ; -- GitLab From d078de867493c5ad05a3189c317f727299d9d69f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:49 +0100 Subject: [PATCH 1906/2290] arm64: dts: mediatek: mt7622: fix ethernet controller "compatible" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 208add29ce5b7291f6c466e4dfd9cbf61c72888e ] Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: ethernet@1b100000: compatible: ['mediatek,mt7622-eth', 'mediatek,mt2701-eth', 'syscon'] is too long from schema $id: http://devicetree.org/schemas/net/mediatek,net.yaml# (and other complains about wrong clocks). Fixes: 5f599b3a0bb8 ("arm64: dts: mt7622: add ethernet device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-4-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 87f692a041a24..5b7be71afa5c1 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -963,9 +963,7 @@ }; eth: ethernet@1b100000 { - compatible = "mediatek,mt7622-eth", - "mediatek,mt2701-eth", - "syscon"; + compatible = "mediatek,mt7622-eth"; reg = <0 0x1b100000 0 0x20000>; interrupts = , , -- GitLab From 1aea205a4226c8ef60bddb805cbf9a347b1111f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:50 +0100 Subject: [PATCH 1907/2290] arm64: dts: mediatek: mt7622: drop "reset-names" from thermal block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ecb5b0034f5bcc35003b4b965cf50c6e98316e79 ] Binding doesn't specify "reset-names" property and Linux driver also doesn't use it. Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: thermal@1100b000: Unevaluated properties are not allowed ('reset-names' was unexpected) from schema $id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml# Fixes: ae457b7679c4 ("arm64: dts: mt7622: add SoC and peripheral related device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-5-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 5b7be71afa5c1..f8a32006885bb 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -512,7 +512,6 @@ <&pericfg CLK_PERI_AUXADC_PD>; clock-names = "therm", "auxadc"; resets = <&pericfg MT7622_PERI_THERM_SW_RST>; - reset-names = "therm"; mediatek,auxadc = <&auxadc>; mediatek,apmixedsys = <&apmixedsys>; nvmem-cells = <&thermal_calibration>; -- GitLab From af45b5bc30f0dc2caf0a3d6a7e40f8da288d0fa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 1 Mar 2024 08:47:41 +0100 Subject: [PATCH 1908/2290] arm64: dts: mediatek: mt2712: fix validation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3baac7291effb501c4d52df7019ebf52011e5772 ] 1. Fixup infracfg clock controller binding It also acts as reset controller so #reset-cells is required. 2. Use -pins suffix for pinctrl This fixes: arch/arm64/boot/dts/mediatek/mt2712-evb.dtb: syscon@10001000: '#reset-cells' is a required property from schema $id: http://devicetree.org/schemas/arm/mediatek/mediatek,infracfg.yaml# arch/arm64/boot/dts/mediatek/mt2712-evb.dtb: pinctrl@1000b000: 'eth_default', 'eth_sleep', 'usb0_iddig', 'usb1_iddig' do not match any of the regexes: 'pinctrl-[0-9]+', 'pins$' from schema $id: http://devicetree.org/schemas/pinctrl/mediatek,mt65xx-pinctrl.yaml# Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240301074741.8362-1-zajec5@gmail.com [Angelo: Added Fixes tags] Fixes: 5d4839709c8e ("arm64: dts: mt2712: Add clock controller device nodes") Fixes: 1724f4cc5133 ("arm64: dts: Add USB3 related nodes for MT2712") Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt2712-evb.dts | 8 ++++---- arch/arm64/boot/dts/mediatek/mt2712e.dtsi | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts index d31a194124c91..03fd9df16999e 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts @@ -128,7 +128,7 @@ }; &pio { - eth_default: eth_default { + eth_default: eth-default-pins { tx_pins { pinmux = , , @@ -155,7 +155,7 @@ }; }; - eth_sleep: eth_sleep { + eth_sleep: eth-sleep-pins { tx_pins { pinmux = , , @@ -181,14 +181,14 @@ }; }; - usb0_id_pins_float: usb0_iddig { + usb0_id_pins_float: usb0-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; }; }; - usb1_id_pins_float: usb1_iddig { + usb1_id_pins_float: usb1-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi index 1ac0b2cf3d406..fde2b165f55d2 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi @@ -249,10 +249,11 @@ #clock-cells = <1>; }; - infracfg: syscon@10001000 { + infracfg: clock-controller@10001000 { compatible = "mediatek,mt2712-infracfg", "syscon"; reg = <0 0x10001000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; pericfg: syscon@10003000 { -- GitLab From 0277e73e8ea50e04888ad1b455ea34e3aee773c2 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Tue, 5 Mar 2024 15:32:18 +0100 Subject: [PATCH 1909/2290] arm64: dts: rockchip: regulator for sd needs to be always on for BPI-R2Pro [ Upstream commit 433d54818f64a2fe0562f8c04c7a81f562368515 ] With default dts configuration for BPI-R2Pro, the regulator for sd card is powered off when reboot is commanded, and the only solution to detect the sd card again, and therefore, allow rebooting from there, is to do a hardware reset. Configure the regulator for sd to be always on for BPI-R2Pro in order to avoid this issue. Fixes: f901aaadaa2a ("arm64: dts: rockchip: Add Bananapi R2 Pro") Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20240305143222.189413-1-jtornosm@redhat.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 7952a14314360..856fe4b66a0b9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -412,6 +412,8 @@ vccio_sd: LDO_REG5 { regulator-name = "vccio_sd"; + regulator-always-on; + regulator-boot-on; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; -- GitLab From bab058e31a92f489557e3a00e16d34853ed38ab4 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 29 Mar 2024 10:36:50 +0000 Subject: [PATCH 1910/2290] ARC: [plat-hsdk]: Remove misplaced interrupt-cells property [ Upstream commit 61231eb8113ce47991f35024f9c20810b37996bf ] "gmac" node stands for just an ordinary Ethernet controller, which is by no means a provider of interrupts, i.e. it doesn't serve as an interrupt controller, thus "#interrupt-cells" property doesn't belong to it and so we remove it. Fixes: ------------>8------------ DTC arch/arc/boot/dts/hsdk.dtb arch/arc/boot/dts/hsdk.dts:207.23-235.5: Warning (interrupt_provider): /soc/ethernet@8000: '#interrupt-cells' found, but node is not an interrupt provider arch/arc/boot/dts/hsdk.dtb: Warning (interrupt_map): Failed prerequisite 'interrupt_provider' ------------>8------------ Reported-by: Vineet Gupta Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/boot/dts/hsdk.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 6691f42550778..41b980df862b1 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -205,7 +205,6 @@ }; gmac: ethernet@8000 { - #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = <0x8000 0x2000>; interrupts = <10>; -- GitLab From d20e3beb83da0f3a09239a833f85bccecfe30052 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 15 Apr 2024 11:54:43 +0300 Subject: [PATCH 1911/2290] wifi: iwlwifi: mvm: remove old PASN station when adding a new one [ Upstream commit dbfff5bf9292714f02ace002fea8ce6599ea1145 ] If a PASN station is added, and an old PASN station already exists for the same mac address, remove the old station before adding the new one. Keeping the old station caueses old security context to be used in measurements. Fixes: 0739a7d70e00 ("iwlwifi: mvm: initiator: add option for adding a PASN responder") Signed-off-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://msgid.link/20240415114847.ef3544a416f2.I4e8c7c8ca22737f4f908ae5cd4fc0b920c703dd3@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index 8c5b97fb19414..5b0b4bb2bb684 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -48,6 +48,8 @@ int iwl_mvm_ftm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (!pasn) return -ENOBUFS; + iwl_mvm_ftm_remove_pasn_sta(mvm, addr); + pasn->cipher = iwl_mvm_cipher_to_location_cipher(cipher); switch (pasn->cipher) { -- GitLab From 9b9c4adad6d132e1d591d21d6b618a18aa25da80 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Apr 2024 11:54:44 +0300 Subject: [PATCH 1912/2290] wifi: iwlwifi: mvm: return uid from iwl_mvm_build_scan_cmd [ Upstream commit bada85a3f584763deadd201147778c3e791d279c ] This function is supposed to return a uid on success, and an errno in failure. But it currently returns the return value of the specific cmd version handler, which in turn returns 0 on success and errno otherwise. This means that on success, iwl_mvm_build_scan_cmd will return 0 regardless if the actual uid. Fix this by returning the uid if the handler succeeded. Fixes: 687db6ff5b70 ("iwlwifi: scan: make new scan req versioning flow") Signed-off-by: Miri Korenblit Reviewed-by: Ilan Peer Link: https://msgid.link/20240415114847.5e2d602b3190.I4c4931021be74a67a869384c8f8ee7463e0c7857@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index acd8803dbcdd6..b20d64dbba1ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -2650,7 +2650,8 @@ static int iwl_mvm_build_scan_cmd(struct iwl_mvm *mvm, if (ver_handler->version != scan_ver) continue; - return ver_handler->handler(mvm, vif, params, type, uid); + err = ver_handler->handler(mvm, vif, params, type, uid); + return err ? : uid; } err = iwl_mvm_scan_umac(mvm, vif, params, type, uid); -- GitLab From 9064163f1cf3cb90150ec1a94bfb349fe58630b1 Mon Sep 17 00:00:00 2001 From: David Bauer Date: Thu, 18 Apr 2024 15:29:08 +0200 Subject: [PATCH 1913/2290] vxlan: drop packets from invalid src-address [ Upstream commit f58f45c1e5b92975e91754f5407250085a6ae7cf ] The VXLAN driver currently does not check if the inner layer2 source-address is valid. In case source-address snooping/learning is enabled, a entry in the FDB for the invalid address is created with the layer3 address of the tunnel endpoint. If the frame happens to have a non-unicast address set, all this non-unicast traffic is subsequently not flooded to the tunnel network but sent to the learnt host in the FDB. To make matters worse, this FDB entry does not expire. Apply the same filtering for packets as it is done for bridges. This not only drops these invalid packets but avoids them from being learnt into the FDB. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Suggested-by: Ido Schimmel Signed-off-by: David Bauer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vxlan/vxlan_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 619dd71c9d75e..fbd36dff9ec27 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1662,6 +1662,10 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) return false; + /* Ignore packets from invalid src-address */ + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) + return false; + /* Get address from the outer IP header */ if (vxlan_get_sk_family(vs) == AF_INET) { saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; -- GitLab From 3f7ecad54c01d1c0f27d546f968de69fa0da1efa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2024 15:46:06 +0200 Subject: [PATCH 1914/2290] mlxsw: core: Unregister EMAD trap using FORWARD action [ Upstream commit 976c44af48141cd8595601c0af2a19a43c5b228b ] The device's manual (PRM - Programmer's Reference Manual) classifies the trap that is used to deliver EMAD responses as an "event trap". Among other things, it means that the only actions that can be associated with the trap are TRAP and FORWARD (NOP). Currently, during driver de-initialization the driver unregisters the trap by setting its action to DISCARD, which violates the above guideline. Future firmware versions will prevent such misuses by returning an error. This does not prevent the driver from working, but an error will be printed to the kernel log during module removal / devlink reload: mlxsw_spectrum 0000:03:00.0: Reg cmd access status failed (status=7(bad parameter)) mlxsw_spectrum 0000:03:00.0: Reg cmd access failed (reg_id=7003(hpkt),type=write) Suppress the error message by aligning the driver to the manual and use a FORWARD (NOP) action when unregistering the trap. Fixes: 4ec14b7634b2 ("mlxsw: Add interface to access registers and process events") Cc: Jiri Pirko Cc: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/753a89e14008fde08cb4a2c1e5f537b81d8eb2d6.1713446092.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e2a985ec2c765..f36a416ffcfe9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -792,7 +792,7 @@ free_skb: static const struct mlxsw_listener mlxsw_emad_rx_listener = MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false, - EMAD, DISCARD); + EMAD, FORWARD); static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { -- GitLab From d3c4b14c8a99dc41429db4b56b1dc104600cc9d8 Mon Sep 17 00:00:00 2001 From: Andrei Simion Date: Thu, 4 Apr 2024 15:38:23 +0300 Subject: [PATCH 1915/2290] ARM: dts: microchip: at91-sama7g5ek: Replace regulator-suspend-voltage with the valid property [ Upstream commit e027b71762e84ee9d4ba9ad5401b956b9e83ed2a ] By checking the pmic node with microchip,mcp16502.yaml# 'regulator-suspend-voltage' does not match any of the regexes 'pinctrl-[0-9]+' from schema microchip,mcp16502.yaml# which inherits regulator.yaml#. So replace regulator-suspend-voltage with regulator-suspend-microvolt to avoid the inconsitency. Fixes: 85b1304b9daa ("ARM: dts: at91: sama7g5ek: set regulator voltages for standby state") Signed-off-by: Andrei Simion Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20240404123824.19182-2-andrei.simion@microchip.com [claudiu.beznea: added a dot before starting the last sentence in commit description] Signed-off-by: Claudiu Beznea Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91-sama7g5ek.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts index 4af8a1c96ed63..bede6e88ae110 100644 --- a/arch/arm/boot/dts/at91-sama7g5ek.dts +++ b/arch/arm/boot/dts/at91-sama7g5ek.dts @@ -293,7 +293,7 @@ regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1150000>; + regulator-suspend-microvolt = <1150000>; regulator-mode = <4>; }; @@ -314,7 +314,7 @@ regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1050000>; + regulator-suspend-microvolt = <1050000>; regulator-mode = <4>; }; @@ -331,7 +331,7 @@ regulator-always-on; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; @@ -346,7 +346,7 @@ regulator-max-microvolt = <3700000>; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; -- GitLab From 599c9ad5e1d43f5c12d869f5fd406ba5d8c55270 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 20 Apr 2024 07:01:16 +0000 Subject: [PATCH 1916/2290] icmp: prevent possible NULL dereferences from icmp_build_probe() [ Upstream commit c58e88d49097bd12dfcfef4f075b43f5d5830941 ] First problem is a double call to __in_dev_get_rcu(), because the second one could return NULL. if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) Second problem is a read from dev->ip6_ptr with no NULL check: if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) Use the correct RCU API to fix these. v2: add missing include Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Signed-off-by: Eric Dumazet Cc: Andreas Roeseler Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2b09ef70752f9..31051b327e53c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -92,6 +92,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -1029,6 +1030,8 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) struct icmp_ext_hdr *ext_hdr, _ext_hdr; struct icmp_ext_echo_iio *iio, _iio; struct net *net = dev_net(skb->dev); + struct inet6_dev *in6_dev; + struct in_device *in_dev; struct net_device *dev; char buff[IFNAMSIZ]; u16 ident_len; @@ -1112,10 +1115,15 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) /* Fill bits in reply message */ if (dev->flags & IFF_UP) status |= ICMP_EXT_ECHOREPLY_ACTIVE; - if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) + + in_dev = __in_dev_get_rcu(dev); + if (in_dev && rcu_access_pointer(in_dev->ifa_list)) status |= ICMP_EXT_ECHOREPLY_IPV4; - if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) + + in6_dev = __in6_dev_get(dev); + if (in6_dev && !list_empty(&in6_dev->addr_list)) status |= ICMP_EXT_ECHOREPLY_IPV6; + dev_put(dev); icmphdr->un.echo.sequence |= htons(status); return true; -- GitLab From 16be600293ca4a494fa7b8f70affc18c1113f405 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 19 Apr 2024 16:02:00 +0800 Subject: [PATCH 1917/2290] bridge/br_netlink.c: no need to return void function [ Upstream commit 4fd1edcdf13c0d234543ecf502092be65c5177db ] br_info_notify is a void function. There is no need to return. Fixes: b6d0425b816e ("bridge: cfm: Netlink Notifications.") Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d087fd4c784ac..d38eff27767dc 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -650,7 +650,7 @@ void br_ifinfo_notify(int event, const struct net_bridge *br, { u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; - return br_info_notify(event, br, port, filter); + br_info_notify(event, br, port, filter); } /* -- GitLab From b20beb0598ed6abfcabb0086f0961fc52dd0d8a9 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 19 Apr 2024 11:34:47 -0700 Subject: [PATCH 1918/2290] bnxt_en: refactor reset close code [ Upstream commit 7474b1c82be3780692d537d331f9aa7fc1e5a368 ] Introduce bnxt_fw_fatal_close() API which can be used to stop data path and disable device when firmware is in fatal state. Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller Stable-dep-of: a1acdc226bae ("bnxt_en: Fix the PCI-AER routines") Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0d0aad7141c15..e889017e3a7fb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11812,6 +11812,16 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) bnxt_rtnl_unlock_sp(bp); } +static void bnxt_fw_fatal_close(struct bnxt *bp) +{ + bnxt_tx_disable(bp); + bnxt_disable_napi(bp); + bnxt_disable_int_sync(bp); + bnxt_free_irq(bp); + bnxt_clear_int_mode(bp); + pci_disable_device(bp->pdev); +} + static void bnxt_fw_reset_close(struct bnxt *bp) { bnxt_ulp_stop(bp); @@ -11825,12 +11835,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val); if (val == 0xffff) bp->fw_reset_min_dsecs = 0; - bnxt_tx_disable(bp); - bnxt_disable_napi(bp); - bnxt_disable_int_sync(bp); - bnxt_free_irq(bp); - bnxt_clear_int_mode(bp); - pci_disable_device(bp->pdev); + bnxt_fw_fatal_close(bp); } __bnxt_close_nic(bp, true, false); bnxt_vf_reps_free(bp); -- GitLab From 25a82005d5686297e1bbc225c63674c3abe9c1cd Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 19 Apr 2024 11:34:48 -0700 Subject: [PATCH 1919/2290] bnxt_en: Fix the PCI-AER routines [ Upstream commit a1acdc226baec331512f815d6ac9dd6f8435cc7f ] We do not support two simultaneous recoveries so check for reset flag, BNXT_STATE_IN_FW_RESET, and do not proceed with AER further. When the pci channel state is pci_channel_io_frozen, the PCIe link can not be trusted so we disable the traffic immediately and stop BAR access by calling bnxt_fw_fatal_close(). BAR access after AER fatal error can cause an NMI. Fixes: f75d9a0aa967 ("bnxt_en: Re-write PCI BARs after PCI fatal error.") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e889017e3a7fb..70021b5eb54a6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13983,6 +13983,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, { struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); + bool abort = false; netdev_info(netdev, "PCI I/O error detected\n"); @@ -13991,16 +13992,27 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, bnxt_ulp_stop(bp); - if (state == pci_channel_io_perm_failure) { + if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + netdev_err(bp->dev, "Firmware reset already in progress\n"); + abort = true; + } + + if (abort || state == pci_channel_io_perm_failure) { rtnl_unlock(); return PCI_ERS_RESULT_DISCONNECT; } - if (state == pci_channel_io_frozen) + /* Link is not reliable anymore if state is pci_channel_io_frozen + * so we disable bus master to prevent any potential bad DMAs before + * freeing kernel memory. + */ + if (state == pci_channel_io_frozen) { set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state); + bnxt_fw_fatal_close(bp); + } if (netif_running(netdev)) - bnxt_close(netdev); + __bnxt_close_nic(bp, true, true); if (pci_is_enabled(pdev)) pci_disable_device(pdev); @@ -14086,6 +14098,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) } reset_exit: + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_clear_reservations(bp, true); rtnl_unlock(); -- GitLab From 424c69dbb2acd7cc09285ba4bd46cbdb5d97bd75 Mon Sep 17 00:00:00 2001 From: Paul Geurts Date: Thu, 18 Apr 2024 21:25:38 +0200 Subject: [PATCH 1920/2290] NFC: trf7970a: disable all regulators on removal [ Upstream commit 6bea4f03c6a4e973ef369e15aac88f37981db49e ] During module probe, regulator 'vin' and 'vdd-io' are used and enabled, but the vdd-io regulator overwrites the 'vin' regulator pointer. During remove, only the vdd-io is disabled, as the vin regulator pointer is not available anymore. When regulator_put() is called during resource cleanup a kernel warning is given, as the regulator is still enabled. Store the two regulators in separate pointers and disable both the regulators on module remove. Fixes: 49d22c70aaf0 ("NFC: trf7970a: Add device tree option of 1.8 Volt IO voltage") Signed-off-by: Paul Geurts Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/DB7PR09MB26847A4EBF88D9EDFEB1DA0F950E2@DB7PR09MB2684.eurprd09.prod.outlook.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/nfc/trf7970a.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 21d68664fe082..7968baa626d16 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -424,7 +424,8 @@ struct trf7970a { enum trf7970a_state state; struct device *dev; struct spi_device *spi; - struct regulator *regulator; + struct regulator *vin_regulator; + struct regulator *vddio_regulator; struct nfc_digital_dev *ddev; u32 quirks; bool is_initiator; @@ -1883,7 +1884,7 @@ static int trf7970a_power_up(struct trf7970a *trf) if (trf->state != TRF7970A_ST_PWR_OFF) return 0; - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "%s - Can't enable VIN: %d\n", __func__, ret); return ret; @@ -1926,7 +1927,7 @@ static int trf7970a_power_down(struct trf7970a *trf) if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) gpiod_set_value_cansleep(trf->en2_gpiod, 0); - ret = regulator_disable(trf->regulator); + ret = regulator_disable(trf->vin_regulator); if (ret) dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__, ret); @@ -2065,37 +2066,37 @@ static int trf7970a_probe(struct spi_device *spi) mutex_init(&trf->lock); INIT_DELAYED_WORK(&trf->timeout_work, trf7970a_timeout_work_handler); - trf->regulator = devm_regulator_get(&spi->dev, "vin"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vin_regulator = devm_regulator_get(&spi->dev, "vin"); + if (IS_ERR(trf->vin_regulator)) { + ret = PTR_ERR(trf->vin_regulator); dev_err(trf->dev, "Can't get VIN regulator: %d\n", ret); goto err_destroy_lock; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "Can't enable VIN: %d\n", ret); goto err_destroy_lock; } - uvolts = regulator_get_voltage(trf->regulator); + uvolts = regulator_get_voltage(trf->vin_regulator); if (uvolts > 4000000) trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3; - trf->regulator = devm_regulator_get(&spi->dev, "vdd-io"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vddio_regulator = devm_regulator_get(&spi->dev, "vdd-io"); + if (IS_ERR(trf->vddio_regulator)) { + ret = PTR_ERR(trf->vddio_regulator); dev_err(trf->dev, "Can't get VDD_IO regulator: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vddio_regulator); if (ret) { dev_err(trf->dev, "Can't enable VDD_IO: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - if (regulator_get_voltage(trf->regulator) == 1800000) { + if (regulator_get_voltage(trf->vddio_regulator) == 1800000) { trf->io_ctrl = TRF7970A_REG_IO_CTRL_IO_LOW; dev_dbg(trf->dev, "trf7970a config vdd_io to 1.8V\n"); } @@ -2108,7 +2109,7 @@ static int trf7970a_probe(struct spi_device *spi) if (!trf->ddev) { dev_err(trf->dev, "Can't allocate NFC digital device\n"); ret = -ENOMEM; - goto err_disable_regulator; + goto err_disable_vddio_regulator; } nfc_digital_set_parent_dev(trf->ddev, trf->dev); @@ -2137,8 +2138,10 @@ err_shutdown: trf7970a_shutdown(trf); err_free_ddev: nfc_digital_free_device(trf->ddev); -err_disable_regulator: - regulator_disable(trf->regulator); +err_disable_vddio_regulator: + regulator_disable(trf->vddio_regulator); +err_disable_vin_regulator: + regulator_disable(trf->vin_regulator); err_destroy_lock: mutex_destroy(&trf->lock); return ret; @@ -2157,7 +2160,8 @@ static void trf7970a_remove(struct spi_device *spi) nfc_digital_unregister_device(trf->ddev); nfc_digital_free_device(trf->ddev); - regulator_disable(trf->regulator); + regulator_disable(trf->vddio_regulator); + regulator_disable(trf->vin_regulator); mutex_destroy(&trf->lock); } -- GitLab From 0d14f104027e30720582448706c7d6b43065c851 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 19 Apr 2024 10:04:56 +0800 Subject: [PATCH 1921/2290] ax25: Fix netdev refcount issue [ Upstream commit 467324bcfe1a31ec65d0cf4aa59421d6b7a7d52b ] The dev_tracker is added to ax25_cb in ax25_bind(). When the ax25 device is detaching, the dev_tracker of ax25_cb should be deallocated in ax25_kill_by_device() instead of the dev_tracker of ax25_dev. The log reported by ref_tracker is shown below: [ 80.884935] ref_tracker: reference already released. [ 80.885150] ref_tracker: allocated in: [ 80.885349] ax25_dev_device_up+0x105/0x540 [ 80.885730] ax25_device_event+0xa4/0x420 [ 80.885730] notifier_call_chain+0xc9/0x1e0 [ 80.885730] __dev_notify_flags+0x138/0x280 [ 80.885730] dev_change_flags+0xd7/0x180 [ 80.885730] dev_ifsioc+0x6a9/0xa30 [ 80.885730] dev_ioctl+0x4d8/0xd90 [ 80.885730] sock_do_ioctl+0x1c2/0x2d0 [ 80.885730] sock_ioctl+0x38b/0x4f0 [ 80.885730] __se_sys_ioctl+0xad/0xf0 [ 80.885730] do_syscall_64+0xc4/0x1b0 [ 80.885730] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 80.885730] ref_tracker: freed in: [ 80.885730] ax25_device_event+0x272/0x420 [ 80.885730] notifier_call_chain+0xc9/0x1e0 [ 80.885730] dev_close_many+0x272/0x370 [ 80.885730] unregister_netdevice_many_notify+0x3b5/0x1180 [ 80.885730] unregister_netdev+0xcf/0x120 [ 80.885730] sixpack_close+0x11f/0x1b0 [ 80.885730] tty_ldisc_kill+0xcb/0x190 [ 80.885730] tty_ldisc_hangup+0x338/0x3d0 [ 80.885730] __tty_hangup+0x504/0x740 [ 80.885730] tty_release+0x46e/0xd80 [ 80.885730] __fput+0x37f/0x770 [ 80.885730] __x64_sys_close+0x7b/0xb0 [ 80.885730] do_syscall_64+0xc4/0x1b0 [ 80.885730] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 80.893739] ------------[ cut here ]------------ [ 80.894030] WARNING: CPU: 2 PID: 140 at lib/ref_tracker.c:255 ref_tracker_free+0x47b/0x6b0 [ 80.894297] Modules linked in: [ 80.894929] CPU: 2 PID: 140 Comm: ax25_conn_rel_6 Not tainted 6.9.0-rc4-g8cd26fd90c1a #11 [ 80.895190] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qem4 [ 80.895514] RIP: 0010:ref_tracker_free+0x47b/0x6b0 [ 80.895808] Code: 83 c5 18 4c 89 eb 48 c1 eb 03 8a 04 13 84 c0 0f 85 df 01 00 00 41 83 7d 00 00 75 4b 4c 89 ff 9 [ 80.896171] RSP: 0018:ffff888009edf8c0 EFLAGS: 00000286 [ 80.896339] RAX: 1ffff1100141ac00 RBX: 1ffff1100149463b RCX: dffffc0000000000 [ 80.896502] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff88800a0d6518 [ 80.896925] RBP: ffff888009edf9b0 R08: ffff88806d3288d3 R09: 1ffff1100da6511a [ 80.897212] R10: dffffc0000000000 R11: ffffed100da6511b R12: ffff88800a4a31d4 [ 80.897859] R13: ffff88800a4a31d8 R14: dffffc0000000000 R15: ffff88800a0d6518 [ 80.898279] FS: 00007fd88b7fe700(0000) GS:ffff88806d300000(0000) knlGS:0000000000000000 [ 80.899436] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 80.900181] CR2: 00007fd88c001d48 CR3: 000000000993e000 CR4: 00000000000006f0 ... [ 80.935774] ref_tracker: sp%d@000000000bb9df3d has 1/1 users at [ 80.935774] ax25_bind+0x424/0x4e0 [ 80.935774] __sys_bind+0x1d9/0x270 [ 80.935774] __x64_sys_bind+0x75/0x80 [ 80.935774] do_syscall_64+0xc4/0x1b0 [ 80.935774] entry_SYSCALL_64_after_hwframe+0x67/0x6f Change ax25_dev->dev_tracker to the dev_tracker of ax25_cb in order to mitigate the bug. Fixes: feef318c855a ("ax25: fix UAF bugs of net_device caused by rebinding operation") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20240419020456.29826-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ax25/af_ax25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 6b4c25a923774..0bffac238b615 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -103,7 +103,7 @@ again: s->ax25_dev = NULL; if (sk->sk_socket) { netdev_put(ax25_dev->dev, - &ax25_dev->dev_tracker); + &s->dev_tracker); ax25_dev_put(ax25_dev); } ax25_cb_del(s); -- GitLab From 82810873acb43f0a41802f11e200363e40892c9f Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 26 Feb 2024 02:24:52 +0000 Subject: [PATCH 1922/2290] net: make SK_MEMORY_PCPU_RESERV tunable [ Upstream commit 12a686c2e761f1f1f6e6e2117a9ab9c6de2ac8a7 ] This patch adds /proc/sys/net/core/mem_pcpu_rsv sysctl file, to make SK_MEMORY_PCPU_RESERV tunable. Commit 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") introduced per-cpu forward alloc cache: "Implement a per-cpu cache of +1/-1 MB, to reduce number of changes to sk->sk_prot->memory_allocated, which would otherwise be cause of false sharing." sk_prot->memory_allocated points to global atomic variable: atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; If increasing the per-cpu cache size from 1MB to e.g. 16MB, changes to sk->sk_prot->memory_allocated can be further reduced. Performance may be improved on system with many cores. Signed-off-by: Adam Li Reviewed-by: Christoph Lameter (Ampere) Signed-off-by: David S. Miller Stable-dep-of: 3584718cf2ec ("net: fix sk_memory_allocated_{add|sub} vs softirqs") Signed-off-by: Sasha Levin --- Documentation/admin-guide/sysctl/net.rst | 5 +++++ include/net/sock.h | 5 +++-- net/core/sock.c | 1 + net/core/sysctl_net_core.c | 9 +++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 6394f5dc2303d..e3894c928118c 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -205,6 +205,11 @@ Will increase power usage. Default: 0 (off) +mem_pcpu_rsv +------------ + +Per-cpu reserved forward alloc cache size in page units. Default 1MB per CPU. + rmem_default ------------ diff --git a/include/net/sock.h b/include/net/sock.h index 60577751ea9e8..6ef6ce43a2edc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1483,6 +1483,7 @@ sk_memory_allocated(const struct sock *sk) /* 1 MB per cpu, in page units */ #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) +extern int sysctl_mem_pcpu_rsv; static inline void sk_memory_allocated_add(struct sock *sk, int amt) @@ -1491,7 +1492,7 @@ sk_memory_allocated_add(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } @@ -1505,7 +1506,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } diff --git a/net/core/sock.c b/net/core/sock.c index c8803b95ea0da..550af616f5359 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -279,6 +279,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; +int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE; /* Maximal space eaten by iovec or ancillary data plus some space */ int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5b1ce656baa1d..d281d5343ff4a 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -29,6 +29,7 @@ static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; +static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -348,6 +349,14 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &min_rcvbuf, }, + { + .procname = "mem_pcpu_rsv", + .data = &sysctl_mem_pcpu_rsv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_mem_pcpu_rsv, + }, { .procname = "dev_weight", .data = &weight_p, -- GitLab From 1e9b694597d44d02ed464a1aa69f5b094c1041d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 17:52:48 +0000 Subject: [PATCH 1923/2290] net: fix sk_memory_allocated_{add|sub} vs softirqs [ Upstream commit 3584718cf2ec7e79b6814f2596dcf398c5fb2eca ] Jonathan Heathcote reported a regression caused by blamed commit on aarch64 architecture. x86 happens to have irq-safe __this_cpu_add_return() and __this_cpu_sub(), but this is not generic. I think my confusion came from "struct sock" argument, because these helpers are called with a locked socket. But the memory accounting is per-proto (and per-cpu after the blamed commit). We might cleanup these helpers later to directly accept a "struct proto *proto" argument. Switch to this_cpu_add_return() and this_cpu_xchg() operations, and get rid of preempt_disable()/preempt_enable() pairs. Fast path becomes a bit faster as a result :) Many thanks to Jonathan Heathcote for his awesome report and investigations. Fixes: 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") Reported-by: Jonathan Heathcote Closes: https://lore.kernel.org/netdev/VI1PR01MB42407D7947B2EA448F1E04EFD10D2@VI1PR01MB4240.eurprd01.prod.exchangelabs.com/ Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20240421175248.1692552-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/sock.h | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 6ef6ce43a2edc..77298c74822a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1485,32 +1485,34 @@ sk_memory_allocated(const struct sock *sk) #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) extern int sysctl_mem_pcpu_rsv; +static inline void proto_memory_pcpu_drain(struct proto *proto) +{ + int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0); + + if (val) + atomic_long_add(val, proto->memory_allocated); +} + static inline void -sk_memory_allocated_add(struct sock *sk, int amt) +sk_memory_allocated_add(const struct sock *sk, int val) { - int local_reserve; + struct proto *proto = sk->sk_prot; - preempt_disable(); - local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); } static inline void -sk_memory_allocated_sub(struct sock *sk, int amt) +sk_memory_allocated_sub(const struct sock *sk, int val) { - int local_reserve; + struct proto *proto = sk->sk_prot; - preempt_disable(); - local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); } #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 -- GitLab From 7a25bfd12733a8f38f8ca47c581f876c3d481ac0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 18:43:26 +0000 Subject: [PATCH 1924/2290] ipv4: check for NULL idev in ip_route_use_hint() [ Upstream commit 58a4c9b1e5a3e53c9148e80b90e1e43897ce77d1 ] syzbot was able to trigger a NULL deref in fib_validate_source() in an old tree [1]. It appears the bug exists in latest trees. All calls to __in_dev_get_rcu() must be checked for a NULL result. [1] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 2 PID: 3257 Comm: syz-executor.3 Not tainted 5.10.0-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:fib_validate_source+0xbf/0x15a0 net/ipv4/fib_frontend.c:425 Code: 18 f2 f2 f2 f2 42 c7 44 20 23 f3 f3 f3 f3 48 89 44 24 78 42 c6 44 20 27 f3 e8 5d 88 48 fc 4c 89 e8 48 c1 e8 03 48 89 44 24 18 <42> 80 3c 20 00 74 08 4c 89 ef e8 d2 15 98 fc 48 89 5c 24 10 41 bf RSP: 0018:ffffc900015fee40 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88800f7a4000 RCX: ffff88800f4f90c0 RDX: 0000000000000000 RSI: 0000000004001eac RDI: ffff8880160c64c0 RBP: ffffc900015ff060 R08: 0000000000000000 R09: ffff88800f7a4000 R10: 0000000000000002 R11: ffff88800f4f90c0 R12: dffffc0000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88800f7a4000 FS: 00007f938acfe6c0(0000) GS:ffff888058c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f938acddd58 CR3: 000000001248e000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip_route_use_hint+0x410/0x9b0 net/ipv4/route.c:2231 ip_rcv_finish_core+0x2c4/0x1a30 net/ipv4/ip_input.c:327 ip_list_rcv_finish net/ipv4/ip_input.c:612 [inline] ip_sublist_rcv+0x3ed/0xe50 net/ipv4/ip_input.c:638 ip_list_rcv+0x422/0x470 net/ipv4/ip_input.c:673 __netif_receive_skb_list_ptype net/core/dev.c:5572 [inline] __netif_receive_skb_list_core+0x6b1/0x890 net/core/dev.c:5620 __netif_receive_skb_list net/core/dev.c:5672 [inline] netif_receive_skb_list_internal+0x9f9/0xdc0 net/core/dev.c:5764 netif_receive_skb_list+0x55/0x3e0 net/core/dev.c:5816 xdp_recv_frames net/bpf/test_run.c:257 [inline] xdp_test_run_batch net/bpf/test_run.c:335 [inline] bpf_test_run_xdp_live+0x1818/0x1d00 net/bpf/test_run.c:363 bpf_prog_test_run_xdp+0x81f/0x1170 net/bpf/test_run.c:1376 bpf_prog_test_run+0x349/0x3c0 kernel/bpf/syscall.c:3736 __sys_bpf+0x45c/0x710 kernel/bpf/syscall.c:5115 __do_sys_bpf kernel/bpf/syscall.c:5201 [inline] __se_sys_bpf kernel/bpf/syscall.c:5199 [inline] __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5199 Fixes: 02b24941619f ("ipv4: use dst hint for ipv4 list receive") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240421184326.1704930-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a0c687ff25987..6c0f1e347b855 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2168,6 +2168,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, int err = -EINVAL; u32 tag = 0; + if (!in_dev) + return -EINVAL; + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; -- GitLab From 5e5e1865b73ba1628cc0ac92b9d614a489982230 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 19:38:28 +0000 Subject: [PATCH 1925/2290] net: usb: ax88179_178a: stop lying about skb->truesize [ Upstream commit 4ce62d5b2f7aecd4900e7d6115588ad7f9acccca ] Some usb drivers try to set small skb->truesize and break core networking stacks. In this patch, I removed one of the skb->truesize overide. I also replaced one skb_clone() by an allocation of a fresh and small skb, to get minimally sized skbs, like we did in commit 1e2c61172342 ("net: cdc_ncm: reduce skb truesize in rx path") Fixes: f8ebb3ac881b ("net: usb: ax88179_178a: Fix packet receiving") Reported-by: shironeko Closes: https://lore.kernel.org/netdev/c110f41a0d2776b525930f213ca9715c@tesaguri.club/ Signed-off-by: Eric Dumazet Cc: Jose Alonso Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240421193828.1966195-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/ax88179_178a.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 3078511f76083..21b6c4d94a632 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1456,21 +1456,16 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Skip IP alignment pseudo header */ skb_pull(skb, 2); - skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); ax88179_rx_checksum(skb, pkt_hdr); return 1; } - ax_skb = skb_clone(skb, GFP_ATOMIC); + ax_skb = netdev_alloc_skb_ip_align(dev->net, pkt_len); if (!ax_skb) return 0; - skb_trim(ax_skb, pkt_len); + skb_put(ax_skb, pkt_len); + memcpy(ax_skb->data, skb->data + 2, pkt_len); - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - - skb->truesize = pkt_len_plus_padd + - SKB_DATA_ALIGN(sizeof(struct sk_buff)); ax88179_rx_checksum(ax_skb, pkt_hdr); usbnet_skb_return(dev, ax_skb); -- GitLab From 25a1c2d4b1fcf938356a9688a96a6456abd44b29 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:39:30 -0400 Subject: [PATCH 1926/2290] net: gtp: Fix Use-After-Free in gtp_dellink [ Upstream commit f2a904107ee2b647bb7794a1a82b67740d7c8a64 ] Since call_rcu, which is called in the hlist_for_each_entry_rcu traversal of gtp_dellink, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 94dc550a5062 ("gtp: fix an use-after-free in ipv4_pdp_find()") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/gtp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 7086acfed5b90..05b5914d83582 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1110,11 +1110,12 @@ out_hashtable: static void gtp_dellink(struct net_device *dev, struct list_head *head) { struct gtp_dev *gtp = netdev_priv(dev); + struct hlist_node *next; struct pdp_ctx *pctx; int i; for (i = 0; i < gtp->hash_size; i++) - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) + hlist_for_each_entry_safe(pctx, next, >p->tid_hash[i], hlist_tid) pdp_context_delete(pctx); list_del_rcu(>p->list); -- GitLab From 14051cbcf386b1358d687021d855b82a0ffc485f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 16 Apr 2024 15:34:45 -0400 Subject: [PATCH 1927/2290] Bluetooth: MGMT: Fix failing to MGMT_OP_ADD_UUID/MGMT_OP_REMOVE_UUID [ Upstream commit 6eb5fcc416f127f220b9177a5c9ae751cac1cda8 ] These commands don't require the adapter to be up and running so don't use hci_cmd_sync_queue which would check that flag, instead use hci_cmd_sync_submit which would ensure mgmt_class_complete is set properly regardless if any command was actually run or not. Link: https://github.com/bluez/bluez/issues/809 Fixes: d883a4669a1d ("Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 716f6dc4934b7..4f4b394370bf2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2680,7 +2680,11 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } - err = hci_cmd_sync_queue(hdev, add_uuid_sync, cmd, mgmt_class_complete); + /* MGMT_OP_ADD_UUID don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, add_uuid_sync, cmd, + mgmt_class_complete); if (err < 0) { mgmt_pending_free(cmd); goto failed; @@ -2774,8 +2778,11 @@ update_class: goto unlock; } - err = hci_cmd_sync_queue(hdev, remove_uuid_sync, cmd, - mgmt_class_complete); + /* MGMT_OP_REMOVE_UUID don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, remove_uuid_sync, cmd, + mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); @@ -2841,8 +2848,11 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - err = hci_cmd_sync_queue(hdev, set_class_sync, cmd, - mgmt_class_complete); + /* MGMT_OP_SET_DEV_CLASS don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, set_class_sync, cmd, + mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); -- GitLab From 31f18a1f58117b7da9bdbdfe22e1da424a67b45c Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Wed, 24 Apr 2024 21:59:03 +0800 Subject: [PATCH 1928/2290] Bluetooth: hci_sync: Using hci_cmd_sync_submit when removing Adv Monitor [ Upstream commit 88cd6e6b2d327faa13e4505b07f1e380e51b21ff ] Since the d883a4669a1de be introduced in v6.4, bluetooth daemon got the following failed message of MGMT_OP_REMOVE_ADV_MONITOR command when controller is power-off: bluetoothd[20976]: src/adapter.c:reset_adv_monitors_complete() Failed to reset Adv Monitors: Failed> Normally this situation is happened when the bluetoothd deamon be started manually after system booting. Which means that bluetoothd received MGMT_EV_INDEX_ADDED event after kernel runs hci_power_off(). Base on doc/mgmt-api.txt, the MGMT_OP_REMOVE_ADV_MONITOR command can be used when the controller is not powered. This patch changes the code in remove_adv_monitor() to use hci_cmd_sync_submit() instead of hci_cmd_sync_queue(). Fixes: d883a4669a1de ("Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running") Cc: Luiz Augusto von Dentz Cc: Manish Mandlik Cc: Archie Pusaka Cc: Miao-chen Chou Signed-off-by: Chun-Yi Lee Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4f4b394370bf2..76dac5a90aef0 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5540,8 +5540,8 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, goto unlock; } - err = hci_cmd_sync_queue(hdev, mgmt_remove_adv_monitor_sync, cmd, - mgmt_remove_adv_monitor_complete); + err = hci_cmd_sync_submit(hdev, mgmt_remove_adv_monitor_sync, cmd, + mgmt_remove_adv_monitor_complete); if (err) { mgmt_pending_remove(cmd); -- GitLab From 4115403dc9aa984a036b5f639b9059d6191b25cc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 24 Apr 2024 14:29:32 +0200 Subject: [PATCH 1929/2290] Bluetooth: qca: set power_ctrl_enabled on NULL returned by gpiod_get_optional() [ Upstream commit 3d05fc82237aa97162d0d7dc300b55bb34e91d02 ] Any return value from gpiod_get_optional() other than a pointer to a GPIO descriptor or a NULL-pointer is an error and the driver should abort probing. That being said: commit 56d074d26c58 ("Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional()") no longer sets power_ctrl_enabled on NULL-pointer returned by devm_gpiod_get_optional(). Restore this behavior but bail-out on errors. While at it: also bail-out on error returned when trying to get the "swctrl" GPIO. Reported-by: Wren Turkal Reported-by: Zijun Hu Closes: https://lore.kernel.org/linux-bluetooth/1713449192-25926-2-git-send-email-quic_zijuhu@quicinc.com/ Fixes: 56d074d26c58 ("Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional()") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski Tested-by: Wren Turkal Reported-by: Wren Turkal Reported-by: Zijun Hu Reviewed-by: Krzysztof Kozlowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 33956ddec9337..ca6065297a7b2 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2257,16 +2257,21 @@ static int qca_serdev_probe(struct serdev_device *serdev) (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); - power_ctrl_enabled = false; + return PTR_ERR(qcadev->bt_en); } + if (!qcadev->bt_en) + power_ctrl_enabled = false; + qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); if (IS_ERR(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855 || - data->soc_type == QCA_WCN7850)) - dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + data->soc_type == QCA_WCN7850)) { + dev_err(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + return PTR_ERR(qcadev->sw_ctrl); + } qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); if (IS_ERR(qcadev->susclk)) { @@ -2285,10 +2290,13 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(qcadev->bt_en)) { - dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); - power_ctrl_enabled = false; + dev_err(&serdev->dev, "failed to acquire enable gpio\n"); + return PTR_ERR(qcadev->bt_en); } + if (!qcadev->bt_en) + power_ctrl_enabled = false; + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); if (IS_ERR(qcadev->susclk)) { dev_warn(&serdev->dev, "failed to acquire clk\n"); -- GitLab From f4861f052f2d4daa76379abae66fbe2c14f48ec4 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Sun, 21 Apr 2024 16:22:32 +0200 Subject: [PATCH 1930/2290] ipvs: Fix checksumming on GSO of SCTP packets [ Upstream commit e10d3ba4d434ed172914617ed8d74bd411421193 ] It was observed in the wild that pairs of consecutive packets would leave the IPVS with the same wrong checksum, and the issue only went away when disabling GSO. IPVS needs to avoid computing the SCTP checksum when using GSO. Fixes: 90017accff61 ("sctp: Add GSO support") Co-developed-by: Firo Yang Signed-off-by: Ismael Luceno Tested-by: Andreas Taschner Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a0921adc31a9f..1e689c7141271 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,8 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -174,7 +175,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; } -- GitLab From bca6fa2d9a9f560e6b89fd5190b05cc2f5d422c1 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:37:17 -0400 Subject: [PATCH 1931/2290] net: openvswitch: Fix Use-After-Free in ovs_ct_exit [ Upstream commit 5ea7b72d4fac2fdbc0425cd8f2ea33abe95235b2 ] Since kfree_rcu, which is called in the hlist_for_each_entry_rcu traversal of ovs_ct_limit_exit, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Reviewed-by: Aaron Conole Link: https://lore.kernel.org/r/ZiYvzQN/Ry5oeFQW@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/openvswitch/conntrack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e4ba86b84b9b1..2302bae1e0128 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1920,9 +1920,9 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; + struct hlist_node *next; - hlist_for_each_entry_rcu(ct_limit, head, hlist_node, - lockdep_ovsl_is_held()) + hlist_for_each_entry_safe(ct_limit, next, head, hlist_node) kfree_rcu(ct_limit, rcu); } kfree(info->limits); -- GitLab From 19ebdce6609e8cc462ff1f8478c09b39bf25c159 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:55 +0200 Subject: [PATCH 1932/2290] mlxsw: spectrum_acl_tcam: Fix race during rehash delayed work [ Upstream commit d90cfe20562407d9f080d24123078d666d730707 ] The purpose of the rehash delayed work is to reduce the number of masks (eRPs) used by an ACL region as the eRP bank is a global and limited resource. This is done in three steps: 1. Creating a new set of masks and a new ACL region which will use the new masks and to which the existing filters will be migrated to. The new region is assigned to 'vregion->region' and the region from which the filters are migrated from is assigned to 'vregion->region2'. 2. Migrating all the filters from the old region to the new region. 3. Destroying the old region and setting 'vregion->region2' to NULL. Only the second steps is performed under the 'vregion->lock' mutex although its comments says that among other things it "Protects consistency of region, region2 pointers". This is problematic as the first step can race with filter insertion from user space that uses 'vregion->region', but under the mutex. Fix by holding the mutex across the entirety of the delayed work and not only during the second step. Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1ec1d54edf2bad0a369e6b4fa030aba64e1f124b.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 41eac7dfb67e7..508c0b1b80fd9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -780,7 +780,9 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) rehash.dw.work); int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS; + mutex_lock(&vregion->lock); mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits); + mutex_unlock(&vregion->lock); if (credits < 0) /* Rehash gone out of credits so it was interrupted. * Schedule the work as soon as possible to continue. @@ -1420,7 +1422,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, int err, err2; trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); - mutex_lock(&vregion->lock); err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { @@ -1440,7 +1441,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, /* Let the rollback to be continued later on. */ } } - mutex_unlock(&vregion->lock); trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion); return err; } -- GitLab From b996e8699da810e4c915841d6aaef761007f933a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:56 +0200 Subject: [PATCH 1933/2290] mlxsw: spectrum_acl_tcam: Fix possible use-after-free during activity update [ Upstream commit 79b5b4b18bc85b19d3a518483f9abbbe6d7b3ba4 ] The rule activity update delayed work periodically traverses the list of configured rules and queries their activity from the device. As part of this task it accesses the entry pointed by 'ventry->entry', but this entry can be changed concurrently by the rehash delayed work, leading to a use-after-free [1]. Fix by closing the race and perform the activity query under the 'vregion->lock' mutex. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_acl_tcam_flower_rule_activity_get+0x121/0x140 Read of size 8 at addr ffff8881054ed808 by task kworker/0:18/181 CPU: 0 PID: 181 Comm: kworker/0:18 Not tainted 6.9.0-rc2-custom-00781-gd5ab772d32f7 #2 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_rule_activity_update_work Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_acl_tcam_flower_rule_activity_get+0x121/0x140 mlxsw_sp_acl_rule_activity_update_work+0x219/0x400 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Allocated by task 1039: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc+0x19c/0x360 mlxsw_sp_acl_tcam_entry_create+0x7b/0x1f0 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x30d/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Freed by task 1039: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x102/0x170 __kasan_slab_free+0x14/0x30 kfree+0xc1/0x290 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x3d7/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1fcce0a60b231ebeb2515d91022284ba7b4ffe7a.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 508c0b1b80fd9..8cbce127d231d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1256,8 +1256,14 @@ mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_ventry *ventry, bool *activity) { - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, - ventry->entry, activity); + struct mlxsw_sp_acl_tcam_vregion *vregion = ventry->vchunk->vregion; + int err; + + mutex_lock(&vregion->lock); + err = mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, ventry->entry, + activity); + mutex_unlock(&vregion->lock); + return err; } static int -- GitLab From 813e2ab753a8f8c243a39ede20c2e0adc15f3887 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:57 +0200 Subject: [PATCH 1934/2290] mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash [ Upstream commit 54225988889931467a9b55fdbef534079b665519 ] The rehash delayed work migrates filters from one region to another according to the number of available credits. The migrated from region is destroyed at the end of the work if the number of credits is non-negative as the assumption is that this is indicative of migration being complete. This assumption is incorrect as a non-negative number of credits can also be the result of a failed migration. The destruction of a region that still has filters referencing it can result in a use-after-free [1]. Fix by not destroying the region if migration failed. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_acl_ctcam_region_entry_remove+0x21d/0x230 Read of size 8 at addr ffff8881735319e8 by task kworker/0:31/3858 CPU: 0 PID: 3858 Comm: kworker/0:31 Tainted: G W 6.9.0-rc2-custom-00782-gf2275c2157d8 #5 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_acl_ctcam_region_entry_remove+0x21d/0x230 mlxsw_sp_acl_ctcam_entry_del+0x2e/0x70 mlxsw_sp_acl_atcam_entry_del+0x81/0x210 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x3cd/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Allocated by task 174: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc+0x19c/0x360 mlxsw_sp_acl_tcam_region_create+0xdf/0x9c0 mlxsw_sp_acl_tcam_vregion_rehash_work+0x954/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Freed by task 7: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x102/0x170 __kasan_slab_free+0x14/0x30 kfree+0xc1/0x290 mlxsw_sp_acl_tcam_region_destroy+0x272/0x310 mlxsw_sp_acl_tcam_vregion_rehash_work+0x731/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Fixes: c9c9af91f1d9 ("mlxsw: spectrum_acl: Allow to interrupt/continue rehash work") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/3e412b5659ec2310c5c615760dfe5eac18dd7ebd.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 8cbce127d231d..44c750e1025ac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1548,6 +1548,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, ctx, credits); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + return; } if (*credits >= 0) -- GitLab From 78884187c09fbd41b6c0a4d1a0da6571df381ddb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:58 +0200 Subject: [PATCH 1935/2290] mlxsw: spectrum_acl_tcam: Rate limit error message [ Upstream commit 5bcf925587e9b5d36420d572a0b4d131c90fb306 ] In the rare cases when the device resources are exhausted it is likely that the rehash delayed work will fail. An error message will be printed whenever this happens which can be overwhelming considering the fact that the work is per-region and that there can be hundreds of regions. Fix by rate limiting the error message. Fixes: e5e7962ee5c2 ("mlxsw: spectrum_acl: Implement region migration according to hints") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/c510763b2ebd25e7990d80183feff91cde593145.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 44c750e1025ac..b0396cbf3cce8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1547,7 +1547,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, ctx, credits); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); return; } -- GitLab From b822644fd90992ee362c5e0c8d2556efc8856c76 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:59 +0200 Subject: [PATCH 1936/2290] mlxsw: spectrum_acl_tcam: Fix memory leak during rehash [ Upstream commit 8ca3f7a7b61393804c46f170743c3b839df13977 ] The rehash delayed work migrates filters from one region to another. This is done by iterating over all chunks (all the filters with the same priority) in the region and in each chunk iterating over all the filters. If the migration fails, the code tries to migrate the filters back to the old region. However, the rollback itself can also fail in which case another migration will be erroneously performed. Besides the fact that this ping pong is not a very good idea, it also creates a problem. Each virtual chunk references two chunks: The currently used one ('vchunk->chunk') and a backup ('vchunk->chunk2'). During migration the first holds the chunk we want to migrate filters to and the second holds the chunk we are migrating filters from. The code currently assumes - but does not verify - that the backup chunk does not exist (NULL) if the currently used chunk does not reference the target region. This assumption breaks when we are trying to rollback a rollback, resulting in the backup chunk being overwritten and leaked [1]. Fix by not rolling back a failed rollback and add a warning to avoid future cases. [1] WARNING: CPU: 5 PID: 1063 at lib/parman.c:291 parman_destroy+0x17/0x20 Modules linked in: CPU: 5 PID: 1063 Comm: kworker/5:11 Tainted: G W 6.9.0-rc2-custom-00784-gc6a05c468a0b #14 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:parman_destroy+0x17/0x20 [...] Call Trace: mlxsw_sp_acl_atcam_region_fini+0x19/0x60 mlxsw_sp_acl_tcam_region_destroy+0x49/0xf0 mlxsw_sp_acl_tcam_vregion_rehash_work+0x1f1/0x470 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: 843500518509 ("mlxsw: spectrum_acl: Do rollback as another call to mlxsw_sp_acl_tcam_vchunk_migrate_all()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/d5edd4f4503934186ae5cfe268503b16345b4e0f.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index b0396cbf3cce8..adaad9fc5fa50 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1297,6 +1297,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_chunk *new_chunk; + WARN_ON(vchunk->chunk2); + new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); if (IS_ERR(new_chunk)) return PTR_ERR(new_chunk); @@ -1431,6 +1433,8 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { + if (ctx->this_is_rollback) + return err; /* In case migration was not successful, we need to swap * so the original region pointer is assigned again * to vregion->region. -- GitLab From 751d352858108314efd33dddd5a9a2b6bf7d6916 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:00 +0200 Subject: [PATCH 1937/2290] mlxsw: spectrum_acl_tcam: Fix warning during rehash [ Upstream commit 743edc8547a92b6192aa1f1b6bb78233fa21dc9b ] As previously explained, the rehash delayed work migrates filters from one region to another. This is done by iterating over all chunks (all the filters with the same priority) in the region and in each chunk iterating over all the filters. When the work runs out of credits it stores the current chunk and entry as markers in the per-work context so that it would know where to resume the migration from the next time the work is scheduled. Upon error, the chunk marker is reset to NULL, but without resetting the entry markers despite being relative to it. This can result in migration being resumed from an entry that does not belong to the chunk being migrated. In turn, this will eventually lead to a chunk being iterated over as if it is an entry. Because of how the two structures happen to be defined, this does not lead to KASAN splats, but to warnings such as [1]. Fix by creating a helper that resets all the markers and call it from all the places the currently only reset the chunk marker. For good measures also call it when starting a completely new rehash. Add a warning to avoid future cases. [1] WARNING: CPU: 7 PID: 1076 at drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c:407 mlxsw_afk_encode+0x242/0x2f0 Modules linked in: CPU: 7 PID: 1076 Comm: kworker/7:24 Tainted: G W 6.9.0-rc3-custom-00880-g29e61d91b77b #29 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:mlxsw_afk_encode+0x242/0x2f0 [...] Call Trace: mlxsw_sp_acl_atcam_entry_add+0xd9/0x3c0 mlxsw_sp_acl_tcam_entry_create+0x5e/0xa0 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x109/0x290 mlxsw_sp_acl_tcam_vregion_rehash_work+0x6c/0x470 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 Fixes: 6f9579d4e302 ("mlxsw: spectrum_acl: Remember where to continue rehash migration") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/cc17eed86b41dd829d39b07906fec074a9ce580e.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index adaad9fc5fa50..1a6c774c8b7b0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -792,6 +792,17 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); } +static void +mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + /* The entry markers are relative to the current chunk and therefore + * needs to be reset together with the chunk marker. + */ + ctx->current_vchunk = NULL; + ctx->start_ventry = NULL; + ctx->stop_ventry = NULL; +} + static void mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk) { @@ -814,7 +825,7 @@ mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *v * the current chunk pointer to make sure all chunks * are properly migrated. */ - vregion->rehash.ctx.current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(&vregion->rehash.ctx); } static struct mlxsw_sp_acl_tcam_vregion * @@ -1317,7 +1328,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); vchunk->chunk2 = NULL; - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); } static int @@ -1349,6 +1360,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, ventry = list_first_entry(&vchunk->ventry_list, typeof(*ventry), list); + WARN_ON(ventry->vchunk != vchunk); + list_for_each_entry_from(ventry, &vchunk->ventry_list, list) { /* During rollback, once we reach the ventry that failed * to migrate, we are done. @@ -1440,7 +1453,7 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, * to vregion->region. */ swap(vregion->region, vregion->region2); - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); ctx->this_is_rollback = true; err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); @@ -1499,6 +1512,7 @@ mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp, ctx->hints_priv = hints_priv; ctx->this_is_rollback = false; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); return 0; -- GitLab From 4526a56e02da3725db979358964df9cd9c567154 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:01 +0200 Subject: [PATCH 1938/2290] mlxsw: spectrum_acl_tcam: Fix incorrect list API usage [ Upstream commit b377add0f0117409c418ddd6504bd682ebe0bf79 ] Both the function that migrates all the chunks within a region and the function that migrates all the entries within a chunk call list_first_entry() on the respective lists without checking that the lists are not empty. This is incorrect usage of the API, which leads to the following warning [1]. Fix by returning if the lists are empty as there is nothing to migrate in this case. [1] WARNING: CPU: 0 PID: 6437 at drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c:1266 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x1f1/0> Modules linked in: CPU: 0 PID: 6437 Comm: kworker/0:37 Not tainted 6.9.0-rc3-custom-00883-g94a65f079ef6 #39 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:mlxsw_sp_acl_tcam_vchunk_migrate_all+0x1f1/0x2c0 [...] Call Trace: mlxsw_sp_acl_tcam_vregion_rehash_work+0x6c/0x4a0 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: 6f9579d4e302 ("mlxsw: spectrum_acl: Remember where to continue rehash migration") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/4628e9a22d1d84818e28310abbbc498e7bc31bc9.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 1a6c774c8b7b0..d0c7cb059616c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1351,6 +1351,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, return 0; } + if (list_empty(&vchunk->ventry_list)) + goto out; + /* If the migration got interrupted, we have the ventry to start from * stored in context. */ @@ -1402,6 +1405,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, } } +out: mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx); return 0; } @@ -1415,6 +1419,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_vchunk *vchunk; int err; + if (list_empty(&vregion->vchunk_list)) + return 0; + /* If the migration got interrupted, we have the vchunk * we are working on stored in context. */ -- GitLab From 5bfe7bf9656ed2633718388f12b7c38b86414a04 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:02 +0200 Subject: [PATCH 1939/2290] mlxsw: spectrum_acl_tcam: Fix memory leak when canceling rehash work [ Upstream commit fb4e2b70a7194b209fc7320bbf33b375f7114bd5 ] The rehash delayed work is rescheduled with a delay if the number of credits at end of the work is not negative as supposedly it means that the migration ended. Otherwise, it is rescheduled immediately. After "mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash" the above is no longer accurate as a non-negative number of credits is no longer indicative of the migration being done. It can also happen if the work encountered an error in which case the migration will resume the next time the work is scheduled. The significance of the above is that it is possible for the work to be pending and associated with hints that were allocated when the migration started. This leads to the hints being leaked [1] when the work is canceled while pending as part of ACL region dismantle. Fix by freeing the hints if hints are associated with a work that was canceled while pending. Blame the original commit since the reliance on not having a pending work associated with hints is fragile. [1] unreferenced object 0xffff88810e7c3000 (size 256): comm "kworker/0:16", pid 176, jiffies 4295460353 hex dump (first 32 bytes): 00 30 95 11 81 88 ff ff 61 00 00 00 00 00 00 80 .0......a....... 00 00 61 00 40 00 00 00 00 00 00 00 04 00 00 00 ..a.@........... backtrace (crc 2544ddb9): [<00000000cf8cfab3>] kmalloc_trace+0x23f/0x2a0 [<000000004d9a1ad9>] objagg_hints_get+0x42/0x390 [<000000000b143cf3>] mlxsw_sp_acl_erp_rehash_hints_get+0xca/0x400 [<0000000059bdb60a>] mlxsw_sp_acl_tcam_vregion_rehash_work+0x868/0x1160 [<00000000e81fd734>] process_one_work+0x59c/0xf20 [<00000000ceee9e81>] worker_thread+0x799/0x12c0 [<00000000bda6fe39>] kthread+0x246/0x300 [<0000000070056d23>] ret_from_fork+0x34/0x70 [<00000000dea2b93e>] ret_from_fork_asm+0x1a/0x30 Fixes: c9c9af91f1d9 ("mlxsw: spectrum_acl: Allow to interrupt/continue rehash work") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/0cc12ebb07c4d4c41a1265ee2c28b392ff997a86.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index d0c7cb059616c..685bcf8cbfa9a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -898,10 +898,14 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam = vregion->tcam; if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx; + mutex_lock(&tcam->lock); list_del(&vregion->tlist); mutex_unlock(&tcam->lock); - cancel_delayed_work_sync(&vregion->rehash.dw); + if (cancel_delayed_work_sync(&vregion->rehash.dw) && + ctx->hints_priv) + ops->region_rehash_hints_put(ctx->hints_priv); } mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion); if (vregion->region2) -- GitLab From e32535744043fe9442bc3ce739492440414f3e76 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 23 Apr 2024 17:21:48 -0700 Subject: [PATCH 1940/2290] eth: bnxt: fix counting packets discarded due to OOM and netpoll [ Upstream commit 730117730709992c9f6535dd7b47638ee561ec45 ] I added OOM and netpoll discard counters, naively assuming that the cpr pointer is pointing to a common completion ring. Turns out that is usually *a* completion ring but not *the* completion ring which bnapi->cp_ring points to. bnapi->cp_ring is where the stats are read from, so we end up reporting 0 thru ethtool -S and qstat even though the drop events have happened. Make 100% sure we're recording statistics in the correct structure. Fixes: 907fd4a294db ("bnxt: count discards due to memory allocation errors") Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20240424002148.3937059-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 44 ++++++++++------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 70021b5eb54a6..77ea19bcdc6fe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1697,7 +1697,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } } else { @@ -1707,7 +1707,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC); if (!new_data) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } @@ -1723,7 +1723,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (!skb) { skb_free_frag(data); bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } skb_reserve(skb, bp->rx_offset); @@ -1734,7 +1734,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } } @@ -1950,11 +1950,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, cp_cons, agg_bufs, false); - if (!frag_len) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!frag_len) + goto oom_next_rx; } xdp_active = true; } @@ -1977,9 +1974,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, else bnxt_xdp_buff_frags_free(rxr, &xdp); } - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + goto oom_next_rx; } } else { u32 payload; @@ -1990,29 +1985,21 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, payload = 0; skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr, payload | len); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!skb) + goto oom_next_rx; } if (agg_bufs) { if (!xdp_active) { skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!skb) + goto oom_next_rx; } else { skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); if (!skb) { /* we should be able to free the old skb here */ bnxt_xdp_buff_frags_free(rxr, &xdp); - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + goto oom_next_rx; } } } @@ -2090,6 +2077,11 @@ next_rx_no_prod_no_len: *raw_cons = tmp_raw_cons; return rc; + +oom_next_rx: + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + rc = -ENOMEM; + goto next_rx; } /* In netpoll mode, if we are using a combined completion ring, we need to @@ -2135,7 +2127,7 @@ static int bnxt_force_rx_discard(struct bnxt *bp, } rc = bnxt_rx_pkt(bp, cpr, raw_cons, event); if (rc && rc != -EBUSY) - cpr->sw_stats.rx.rx_netpoll_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_netpoll_discards += 1; return rc; } -- GitLab From 13ba94f6cc820fdea15efeaa17d4c722874eebf9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Apr 2024 20:45:01 +0200 Subject: [PATCH 1941/2290] netfilter: nf_tables: honor table dormant flag from netdev release event path [ Upstream commit 8e30abc9ace4f0add4cd761dfdbfaebae5632dd2 ] Check for table dormant flag otherwise netdev release event path tries to unregister an already unregistered hook. [524854.857999] ------------[ cut here ]------------ [524854.858010] WARNING: CPU: 0 PID: 3386599 at net/netfilter/core.c:501 __nf_unregister_net_hook+0x21a/0x260 [...] [524854.858848] CPU: 0 PID: 3386599 Comm: kworker/u32:2 Not tainted 6.9.0-rc3+ #365 [524854.858869] Workqueue: netns cleanup_net [524854.858886] RIP: 0010:__nf_unregister_net_hook+0x21a/0x260 [524854.858903] Code: 24 e8 aa 73 83 ff 48 63 43 1c 83 f8 01 0f 85 3d ff ff ff e8 98 d1 f0 ff 48 8b 3c 24 e8 8f 73 83 ff 48 63 43 1c e9 26 ff ff ff <0f> 0b 48 83 c4 18 48 c7 c7 00 68 e9 82 5b 5d 41 5c 41 5d 41 5e 41 [524854.858914] RSP: 0018:ffff8881e36d79e0 EFLAGS: 00010246 [524854.858926] RAX: 0000000000000000 RBX: ffff8881339ae790 RCX: ffffffff81ba524a [524854.858936] RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff8881c8a16438 [524854.858945] RBP: ffff8881c8a16438 R08: 0000000000000001 R09: ffffed103c6daf34 [524854.858954] R10: ffff8881e36d79a7 R11: 0000000000000000 R12: 0000000000000005 [524854.858962] R13: ffff8881c8a16000 R14: 0000000000000000 R15: ffff8881351b5a00 [524854.858971] FS: 0000000000000000(0000) GS:ffff888390800000(0000) knlGS:0000000000000000 [524854.858982] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [524854.858991] CR2: 00007fc9be0f16f4 CR3: 00000001437cc004 CR4: 00000000001706f0 [524854.859000] Call Trace: [524854.859006] [524854.859013] ? __warn+0x9f/0x1a0 [524854.859027] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859044] ? report_bug+0x1b1/0x1e0 [524854.859060] ? handle_bug+0x3c/0x70 [524854.859071] ? exc_invalid_op+0x17/0x40 [524854.859083] ? asm_exc_invalid_op+0x1a/0x20 [524854.859100] ? __nf_unregister_net_hook+0x6a/0x260 [524854.859116] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859135] nf_tables_netdev_event+0x337/0x390 [nf_tables] [524854.859304] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859461] ? packet_notifier+0xb3/0x360 [524854.859476] ? _raw_spin_unlock_irqrestore+0x11/0x40 [524854.859489] ? dcbnl_netdevice_event+0x35/0x140 [524854.859507] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859661] notifier_call_chain+0x7d/0x140 [524854.859677] unregister_netdevice_many_notify+0x5e1/0xae0 Fixes: d54725cd11a5 ("netfilter: nf_tables: support for multiple devices per netdev hook") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_chain_filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 274b6f7e6bb57..d170758a1eb5d 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -338,7 +338,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; if (n > 1) { - nf_unregister_net_hook(ctx->net, &found->ops); + if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT)) + nf_unregister_net_hook(ctx->net, &found->ops); + list_del_rcu(&found->list); kfree_rcu(found, rcu); return; -- GitLab From 152ed360cf2d273f88fc99a518b7eb868aae2939 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 23 Apr 2024 11:27:17 -0700 Subject: [PATCH 1942/2290] i40e: Do not use WQ_MEM_RECLAIM flag for workqueue [ Upstream commit 2cc7d150550cc981aceedf008f5459193282425c ] Issue reported by customer during SRIOV testing, call trace: When both i40e and the i40iw driver are loaded, a warning in check_flush_dependency is being triggered. This seems to be because of the i40e driver workqueue is allocated with the WQ_MEM_RECLAIM flag, and the i40iw one is not. Similar error was encountered on ice too and it was fixed by removing the flag. Do the same for i40e too. [Feb 9 09:08] ------------[ cut here ]------------ [ +0.000004] workqueue: WQ_MEM_RECLAIM i40e:i40e_service_task [i40e] is flushing !WQ_MEM_RECLAIM infiniband:0x0 [ +0.000060] WARNING: CPU: 0 PID: 937 at kernel/workqueue.c:2966 check_flush_dependency+0x10b/0x120 [ +0.000007] Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_timer snd_seq_device snd soundcore nls_utf8 cifs cifs_arc4 nls_ucs2_utils rdma_cm iw_cm ib_cm cifs_md4 dns_resolver netfs qrtr rfkill sunrpc vfat fat intel_rapl_msr intel_rapl_common irdma intel_uncore_frequency intel_uncore_frequency_common ice ipmi_ssif isst_if_common skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp gnss coretemp ib_uverbs rapl intel_cstate ib_core iTCO_wdt iTCO_vendor_support acpi_ipmi mei_me ipmi_si intel_uncore ioatdma i2c_i801 joydev pcspkr mei ipmi_devintf lpc_ich intel_pch_thermal i2c_smbus ipmi_msghandler acpi_power_meter acpi_pad xfs libcrc32c ast sd_mod drm_shmem_helper t10_pi drm_kms_helper sg ixgbe drm i40e ahci crct10dif_pclmul libahci crc32_pclmul igb crc32c_intel libata ghash_clmulni_intel i2c_algo_bit mdio dca wmi dm_mirror dm_region_hash dm_log dm_mod fuse [ +0.000050] CPU: 0 PID: 937 Comm: kworker/0:3 Kdump: loaded Not tainted 6.8.0-rc2-Feb-net_dev-Qiueue-00279-gbd43c5687e05 #1 [ +0.000003] Hardware name: Intel Corporation S2600BPB/S2600BPB, BIOS SE5C620.86B.02.01.0013.121520200651 12/15/2020 [ +0.000001] Workqueue: i40e i40e_service_task [i40e] [ +0.000024] RIP: 0010:check_flush_dependency+0x10b/0x120 [ +0.000003] Code: ff 49 8b 54 24 18 48 8d 8b b0 00 00 00 49 89 e8 48 81 c6 b0 00 00 00 48 c7 c7 b0 97 fa 9f c6 05 8a cc 1f 02 01 e8 35 b3 fd ff <0f> 0b e9 10 ff ff ff 80 3d 78 cc 1f 02 00 75 94 e9 46 ff ff ff 90 [ +0.000002] RSP: 0018:ffffbd294976bcf8 EFLAGS: 00010282 [ +0.000002] RAX: 0000000000000000 RBX: ffff94d4c483c000 RCX: 0000000000000027 [ +0.000001] RDX: ffff94d47f620bc8 RSI: 0000000000000001 RDI: ffff94d47f620bc0 [ +0.000001] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffff7fff [ +0.000001] R10: ffffbd294976bb98 R11: ffffffffa0be65e8 R12: ffff94c5451ea180 [ +0.000001] R13: ffff94c5ab5e8000 R14: ffff94c5c20b6e05 R15: ffff94c5f1330ab0 [ +0.000001] FS: 0000000000000000(0000) GS:ffff94d47f600000(0000) knlGS:0000000000000000 [ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000001] CR2: 00007f9e6f1fca70 CR3: 0000000038e20004 CR4: 00000000007706f0 [ +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.000001] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ +0.000001] PKRU: 55555554 [ +0.000001] Call Trace: [ +0.000001] [ +0.000002] ? __warn+0x80/0x130 [ +0.000003] ? check_flush_dependency+0x10b/0x120 [ +0.000002] ? report_bug+0x195/0x1a0 [ +0.000005] ? handle_bug+0x3c/0x70 [ +0.000003] ? exc_invalid_op+0x14/0x70 [ +0.000002] ? asm_exc_invalid_op+0x16/0x20 [ +0.000006] ? check_flush_dependency+0x10b/0x120 [ +0.000002] ? check_flush_dependency+0x10b/0x120 [ +0.000002] __flush_workqueue+0x126/0x3f0 [ +0.000015] ib_cache_cleanup_one+0x1c/0xe0 [ib_core] [ +0.000056] __ib_unregister_device+0x6a/0xb0 [ib_core] [ +0.000023] ib_unregister_device_and_put+0x34/0x50 [ib_core] [ +0.000020] i40iw_close+0x4b/0x90 [irdma] [ +0.000022] i40e_notify_client_of_netdev_close+0x54/0xc0 [i40e] [ +0.000035] i40e_service_task+0x126/0x190 [i40e] [ +0.000024] process_one_work+0x174/0x340 [ +0.000003] worker_thread+0x27e/0x390 [ +0.000001] ? __pfx_worker_thread+0x10/0x10 [ +0.000002] kthread+0xdf/0x110 [ +0.000002] ? __pfx_kthread+0x10/0x10 [ +0.000002] ret_from_fork+0x2d/0x50 [ +0.000003] ? __pfx_kthread+0x10/0x10 [ +0.000001] ret_from_fork_asm+0x1b/0x30 [ +0.000004] [ +0.000001] ---[ end trace 0000000000000000 ]--- Fixes: 4d5957cbdecd ("i40e: remove WQ_UNBOUND and the task limit of our workqueue") Signed-off-by: Sindhu Devale Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Mateusz Polchlopek Signed-off-by: Aleksandr Loktionov Tested-by: Robert Ganzynkowicz Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a9db1ed74d3fc..d08d41545daeb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16716,7 +16716,7 @@ static int __init i40e_init_module(void) * since we need to be able to guarantee forward progress even under * memory pressure. */ - i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name); + i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; -- GitLab From 81ad28ac21b7d489a44a399351ef139330397c78 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 23 Apr 2024 11:27:18 -0700 Subject: [PATCH 1943/2290] i40e: Report MFS in decimal base instead of hex [ Upstream commit ef3c313119ea448c22da10366faa26b5b4b1a18e ] If the MFS is set below the default (0x2600), a warning message is reported like the following : MFS for port 1 has been set below the default: 600 This message is a bit confusing as the number shown here (600) is in fact an hexa number: 0x600 = 1536 Without any explicit "0x" prefix, this message is read like the MFS is set to 600 bytes. MFS, as per MTUs, are usually expressed in decimal base. This commit reports both current and default MFS values in decimal so it's less confusing for end-users. A typical warning message looks like the following : MFS for port 1 (1536) has been set below the default (9728) Signed-off-by: Erwan Velu Reviewed-by: Simon Horman Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") Link: https://lore.kernel.org/r/20240423182723.740401-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d08d41545daeb..9efd4b962dce2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16173,8 +16173,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) val = (rd32(&pf->hw, I40E_PRTGL_SAH) & I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; if (val < MAX_FRAME_SIZE_DEFAULT) - dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - pf->hw.port, val); + dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", + pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out -- GitLab From 777d7d0049ff055eb04b65953caf2095c8806d7b Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Tue, 23 Apr 2024 11:27:19 -0700 Subject: [PATCH 1944/2290] iavf: Fix TC config comparison with existing adapter TC config [ Upstream commit 54976cf58d6168b8d15cebb395069f23b2f34b31 ] Same number of TCs doesn't imply that underlying TC configs are same. The config could be different due to difference in number of queues in each TC. Add utility function to determine if TC configs are same. Fixes: d5b33d024496 ("i40evf: add ndo_setup_tc callback to i40evf") Signed-off-by: Sudheer Mogilappagari Tested-by: Mineri Bhange (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index b9c4b311cd625..53b9fe35d8035 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3631,6 +3631,34 @@ static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter) spin_unlock_bh(&adapter->cloud_filter_list_lock); } +/** + * iavf_is_tc_config_same - Compare the mqprio TC config with the + * TC config already configured on this adapter. + * @adapter: board private structure + * @mqprio_qopt: TC config received from kernel. + * + * This function compares the TC config received from the kernel + * with the config already configured on the adapter. + * + * Return: True if configuration is same, false otherwise. + **/ +static bool iavf_is_tc_config_same(struct iavf_adapter *adapter, + struct tc_mqprio_qopt *mqprio_qopt) +{ + struct virtchnl_channel_info *ch = &adapter->ch_config.ch_info[0]; + int i; + + if (adapter->num_tc != mqprio_qopt->num_tc) + return false; + + for (i = 0; i < adapter->num_tc; i++) { + if (ch[i].count != mqprio_qopt->count[i] || + ch[i].offset != mqprio_qopt->offset[i]) + return false; + } + return true; +} + /** * __iavf_setup_tc - configure multiple traffic classes * @netdev: network interface device structure @@ -3688,7 +3716,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) if (ret) return ret; /* Return if same TC config is requested */ - if (adapter->num_tc == num_tc) + if (iavf_is_tc_config_same(adapter, &mqprio_qopt->qopt)) return 0; adapter->num_tc = num_tc; -- GitLab From f05caed83394c8354bbceea9c90c15f648eec3cd Mon Sep 17 00:00:00 2001 From: Jason Reeder Date: Wed, 24 Apr 2024 12:46:26 +0530 Subject: [PATCH 1945/2290] net: ethernet: ti: am65-cpts: Fix PTPv1 message type on TX packets [ Upstream commit 1b9e743e923b256e353a9a644195372285e5a6c0 ] The CPTS, by design, captures the messageType (Sync, Delay_Req, etc.) field from the second nibble of the PTP header which is defined in the PTPv2 (1588-2008) specification. In the PTPv1 (1588-2002) specification the first two bytes of the PTP header are defined as the versionType which is always 0x0001. This means that any PTPv1 packets that are tagged for TX timestamping by the CPTS will have their messageType set to 0x0 which corresponds to a Sync message type. This causes issues when a PTPv1 stack is expecting a Delay_Req (messageType: 0x1) timestamp that never appears. Fix this by checking if the ptp_class of the timestamped TX packet is PTP_CLASS_V1 and then matching the PTP sequence ID to the stored sequence ID in the skb->cb data structure. If the sequence IDs match and the packet is of type PTPv1 then there is a chance that the messageType has been incorrectly stored by the CPTS so overwrite the messageType stored by the CPTS with the messageType from the skb->cb data structure. This allows the PTPv1 stack to receive TX timestamps for Delay_Req packets which are necessary to lock onto a PTP Leader. Signed-off-by: Jason Reeder Signed-off-by: Ravi Gunasekaran Tested-by: Ed Trexel Fixes: f6bd59526ca5 ("net: ethernet: ti: introduce am654 common platform time sync driver") Link: https://lore.kernel.org/r/20240424071626.32558-1-r-gunasekaran@ti.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/am65-cpts.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 9948ac14e68db..c1bdf045e9815 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -649,6 +649,11 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts, struct am65_cpts_skb_cb_data *skb_cb = (struct am65_cpts_skb_cb_data *)skb->cb; + if ((ptp_classify_raw(skb) & PTP_CLASS_V1) && + ((mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK) == + (skb_cb->skb_mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK))) + mtype_seqid = skb_cb->skb_mtype_seqid; + if (mtype_seqid == skb_cb->skb_mtype_seqid) { u64 ns = event->timestamp; -- GitLab From 2ceacda2709eab9c11ec3247031b2572af4c44ef Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 24 Apr 2024 10:04:43 -0700 Subject: [PATCH 1946/2290] af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc(). [ Upstream commit 1971d13ffa84a551d29a81fdf5b5ec5be166ac83 ] syzbot reported a lockdep splat regarding unix_gc_lock and unix_state_lock(). One is called from recvmsg() for a connected socket, and another is called from GC for TCP_LISTEN socket. So, the splat is false-positive. Let's add a dedicated lock class for the latter to suppress the splat. Note that this change is not necessary for net-next.git as the issue is only applied to the old GC impl. [0]: WARNING: possible circular locking dependency detected 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Not tainted ----------------------------------------------------- kworker/u8:1/11 is trying to acquire lock: ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 but task is already holding lock: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (unix_gc_lock){+.+.}-{2:2}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] unix_notinflight+0x13d/0x390 net/unix/garbage.c:140 unix_detach_fds net/unix/af_unix.c:1819 [inline] unix_destruct_scm+0x221/0x350 net/unix/af_unix.c:1876 skb_release_head_state+0x100/0x250 net/core/skbuff.c:1188 skb_release_all net/core/skbuff.c:1200 [inline] __kfree_skb net/core/skbuff.c:1216 [inline] kfree_skb_reason+0x16d/0x3b0 net/core/skbuff.c:1252 kfree_skb include/linux/skbuff.h:1262 [inline] manage_oob net/unix/af_unix.c:2672 [inline] unix_stream_read_generic+0x1125/0x2700 net/unix/af_unix.c:2749 unix_stream_splice_read+0x239/0x320 net/unix/af_unix.c:2981 do_splice_read fs/splice.c:985 [inline] splice_file_to_pipe+0x299/0x500 fs/splice.c:1295 do_splice+0xf2d/0x1880 fs/splice.c:1379 __do_splice fs/splice.c:1436 [inline] __do_sys_splice fs/splice.c:1652 [inline] __se_sys_splice+0x331/0x4a0 fs/splice.c:1634 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (&u->lock){+.+.}-{2:2}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(unix_gc_lock); lock(&u->lock); lock(unix_gc_lock); lock(&u->lock); *** DEADLOCK *** 3 locks held by kworker/u8:1/11: #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3229 [inline] #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_scheduled_works+0x8e0/0x17c0 kernel/workqueue.c:3335 #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3230 [inline] #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_scheduled_works+0x91b/0x17c0 kernel/workqueue.c:3335 #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 stack backtrace: CPU: 0 PID: 11 Comm: kworker/u8:1 Not tainted 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Workqueue: events_unbound __unix_gc Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Fixes: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Reported-and-tested-by: syzbot+fa379358c28cc87cc307@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa379358c28cc87cc307 Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240424170443.9832-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/af_unix.h | 3 +++ net/unix/garbage.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 16d6936baa2fb..e7d71a516bd4d 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -79,6 +79,9 @@ enum unix_socket_lock_class { U_LOCK_NORMAL, U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ + U_LOCK_GC_LISTENER, /* used for listening socket while determining gc + * candidates to close a small race window. + */ }; static inline void unix_state_lock_nested(struct sock *sk, diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 85c6f05c0fa3c..d2fc795394a52 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -260,7 +260,7 @@ void unix_gc(void) __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); if (sk->sk_state == TCP_LISTEN) { - unix_state_lock(sk); + unix_state_lock_nested(sk, U_LOCK_GC_LISTENER); unix_state_unlock(sk); } } -- GitLab From b3686200adba26dd1f8beee3d9c1b34563db1e65 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Feb 2023 16:09:45 -0800 Subject: [PATCH 1947/2290] cifs: Replace remaining 1-element arrays commit 35235e19b393b54db0e0d7c424d658ba45f20468 upstream. The kernel is globally removing the ambiguous 0-length and 1-element arrays in favor of flexible arrays, so that we can gain both compile-time and run-time array bounds checking[1]. Replace the trailing 1-element array with a flexible array in the following structures: struct cifs_spnego_msg struct cifs_quota_data struct get_dfs_referral_rsp struct file_alt_name_info NEGOTIATE_RSP SESSION_SETUP_ANDX TCONX_REQ TCONX_RSP TCONX_RSP_EXT ECHO_REQ ECHO_RSP OPEN_REQ OPENX_REQ LOCK_REQ RENAME_REQ COPY_REQ COPY_RSP NT_RENAME_REQ DELETE_FILE_REQ DELETE_DIRECTORY_REQ CREATE_DIRECTORY_REQ QUERY_INFORMATION_REQ SETATTR_REQ TRANSACT_IOCTL_REQ TRANSACT_CHANGE_NOTIFY_REQ TRANSACTION2_QPI_REQ TRANSACTION2_SPI_REQ TRANSACTION2_FFIRST_REQ TRANSACTION2_GET_DFS_REFER_REQ FILE_UNIX_LINK_INFO FILE_DIRECTORY_INFO FILE_FULL_DIRECTORY_INFO SEARCH_ID_FULL_DIR_INFO FILE_BOTH_DIRECTORY_INFO FIND_FILE_STANDARD_INFO Replace the trailing 1-element array with a flexible array, but leave the existing structure padding: FILE_ALL_INFO FILE_UNIX_INFO Remove unused structures: struct gea struct gealist Adjust all related size calculations to match the changes to sizeof(). No machine code output differences are produced after these changes. [1] For lots of details, see both: https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element-arrays https://people.kernel.org/kees/bounded-flexible-arrays-in-c Cc: Steve French Cc: Paulo Alcantara Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Kees Cook Signed-off-by: Steve French [ Salvatore Bonaccorso: Patch does not apply cleanly only due to a whitespace difference in fs/smb/client/cifspdu.h . Fixed up manually. ] Signed-off-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_spnego.h | 2 +- fs/smb/client/cifspdu.h | 96 ++++++++++++++++++------------------- fs/smb/client/readdir.c | 6 +-- fs/smb/client/smb2pdu.c | 4 +- fs/smb/client/smb2pdu.h | 2 +- 5 files changed, 53 insertions(+), 57 deletions(-) diff --git a/fs/smb/client/cifs_spnego.h b/fs/smb/client/cifs_spnego.h index 7f102ffeb6750..e4d751b0c8127 100644 --- a/fs/smb/client/cifs_spnego.h +++ b/fs/smb/client/cifs_spnego.h @@ -24,7 +24,7 @@ struct cifs_spnego_msg { uint32_t flags; uint32_t sesskey_len; uint32_t secblob_len; - uint8_t data[1]; + uint8_t data[]; }; #ifdef __KERNEL__ diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 97bb1838555b4..94f86374f4b71 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -562,7 +562,7 @@ typedef union smb_com_session_setup_andx { __u32 Reserved; __le32 Capabilities; /* see below */ __le16 ByteCount; - unsigned char SecurityBlob[1]; /* followed by */ + unsigned char SecurityBlob[]; /* followed by */ /* STRING NativeOS */ /* STRING NativeLanMan */ } __attribute__((packed)) req; /* NTLM request format (with @@ -582,7 +582,7 @@ typedef union smb_com_session_setup_andx { __u32 Reserved; /* see below */ __le32 Capabilities; __le16 ByteCount; - unsigned char CaseInsensitivePassword[1]; /* followed by: */ + unsigned char CaseInsensitivePassword[]; /* followed by: */ /* unsigned char * CaseSensitivePassword; */ /* STRING AccountName */ /* STRING PrimaryDomain */ @@ -599,7 +599,7 @@ typedef union smb_com_session_setup_andx { __le16 Action; /* see below */ __le16 SecurityBlobLength; __u16 ByteCount; - unsigned char SecurityBlob[1]; /* followed by */ + unsigned char SecurityBlob[]; /* followed by */ /* unsigned char * NativeOS; */ /* unsigned char * NativeLanMan; */ /* unsigned char * PrimaryDomain; */ @@ -618,7 +618,7 @@ typedef union smb_com_session_setup_andx { __le16 PasswordLength; __u32 Reserved; /* encrypt key len and offset */ __le16 ByteCount; - unsigned char AccountPassword[1]; /* followed by */ + unsigned char AccountPassword[]; /* followed by */ /* STRING AccountName */ /* STRING PrimaryDomain */ /* STRING NativeOS */ @@ -632,7 +632,7 @@ typedef union smb_com_session_setup_andx { __le16 AndXOffset; __le16 Action; /* see below */ __u16 ByteCount; - unsigned char NativeOS[1]; /* followed by */ + unsigned char NativeOS[]; /* followed by */ /* unsigned char * NativeLanMan; */ /* unsigned char * PrimaryDomain; */ } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */ @@ -693,7 +693,7 @@ typedef struct smb_com_tconx_req { __le16 Flags; /* see below */ __le16 PasswordLength; __le16 ByteCount; - unsigned char Password[1]; /* followed by */ + unsigned char Password[]; /* followed by */ /* STRING Path *//* \\server\share name */ /* STRING Service */ } __attribute__((packed)) TCONX_REQ; @@ -705,7 +705,7 @@ typedef struct smb_com_tconx_rsp { __le16 AndXOffset; __le16 OptionalSupport; /* see below */ __u16 ByteCount; - unsigned char Service[1]; /* always ASCII, not Unicode */ + unsigned char Service[]; /* always ASCII, not Unicode */ /* STRING NativeFileSystem */ } __attribute__((packed)) TCONX_RSP; @@ -718,7 +718,7 @@ typedef struct smb_com_tconx_rsp_ext { __le32 MaximalShareAccessRights; __le32 GuestMaximalShareAccessRights; __u16 ByteCount; - unsigned char Service[1]; /* always ASCII, not Unicode */ + unsigned char Service[]; /* always ASCII, not Unicode */ /* STRING NativeFileSystem */ } __attribute__((packed)) TCONX_RSP_EXT; @@ -755,14 +755,14 @@ typedef struct smb_com_echo_req { struct smb_hdr hdr; __le16 EchoCount; __le16 ByteCount; - char Data[1]; + char Data[]; } __attribute__((packed)) ECHO_REQ; typedef struct smb_com_echo_rsp { struct smb_hdr hdr; __le16 SequenceNumber; __le16 ByteCount; - char Data[1]; + char Data[]; } __attribute__((packed)) ECHO_RSP; typedef struct smb_com_logoff_andx_req { @@ -862,7 +862,7 @@ typedef struct smb_com_open_req { /* also handles create */ __le32 ImpersonationLevel; __u8 SecurityFlags; __le16 ByteCount; - char fileName[1]; + char fileName[]; } __attribute__((packed)) OPEN_REQ; /* open response: oplock levels */ @@ -939,7 +939,7 @@ typedef struct smb_com_openx_req { __le32 Timeout; __le32 Reserved; __le16 ByteCount; /* file name follows */ - char fileName[1]; + char fileName[]; } __attribute__((packed)) OPENX_REQ; typedef struct smb_com_openx_rsp { @@ -1087,7 +1087,7 @@ typedef struct smb_com_lock_req { __le16 NumberOfUnlocks; __le16 NumberOfLocks; __le16 ByteCount; - LOCKING_ANDX_RANGE Locks[1]; + LOCKING_ANDX_RANGE Locks[]; } __attribute__((packed)) LOCK_REQ; /* lock type */ @@ -1116,7 +1116,7 @@ typedef struct smb_com_rename_req { __le16 SearchAttributes; /* target file attributes */ __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII or Unicode */ - unsigned char OldFileName[1]; + unsigned char OldFileName[]; /* followed by __u8 BufferFormat2 */ /* followed by NewFileName */ } __attribute__((packed)) RENAME_REQ; @@ -1136,7 +1136,7 @@ typedef struct smb_com_copy_req { __le16 Flags; __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII or Unicode */ - unsigned char OldFileName[1]; + unsigned char OldFileName[]; /* followed by __u8 BufferFormat2 */ /* followed by NewFileName string */ } __attribute__((packed)) COPY_REQ; @@ -1146,7 +1146,7 @@ typedef struct smb_com_copy_rsp { __le16 CopyCount; /* number of files copied */ __u16 ByteCount; /* may be zero */ __u8 BufferFormat; /* 0x04 - only present if errored file follows */ - unsigned char ErrorFileName[1]; /* only present if error in copy */ + unsigned char ErrorFileName[]; /* only present if error in copy */ } __attribute__((packed)) COPY_RSP; #define CREATE_HARD_LINK 0x103 @@ -1160,7 +1160,7 @@ typedef struct smb_com_nt_rename_req { /* A5 - also used for create hardlink */ __le32 ClusterCount; __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII or Unicode */ - unsigned char OldFileName[1]; + unsigned char OldFileName[]; /* followed by __u8 BufferFormat2 */ /* followed by NewFileName */ } __attribute__((packed)) NT_RENAME_REQ; @@ -1175,7 +1175,7 @@ typedef struct smb_com_delete_file_req { __le16 SearchAttributes; __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ - unsigned char fileName[1]; + unsigned char fileName[]; } __attribute__((packed)) DELETE_FILE_REQ; typedef struct smb_com_delete_file_rsp { @@ -1187,7 +1187,7 @@ typedef struct smb_com_delete_directory_req { struct smb_hdr hdr; /* wct = 0 */ __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ - unsigned char DirName[1]; + unsigned char DirName[]; } __attribute__((packed)) DELETE_DIRECTORY_REQ; typedef struct smb_com_delete_directory_rsp { @@ -1199,7 +1199,7 @@ typedef struct smb_com_create_directory_req { struct smb_hdr hdr; /* wct = 0 */ __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ - unsigned char DirName[1]; + unsigned char DirName[]; } __attribute__((packed)) CREATE_DIRECTORY_REQ; typedef struct smb_com_create_directory_rsp { @@ -1211,7 +1211,7 @@ typedef struct smb_com_query_information_req { struct smb_hdr hdr; /* wct = 0 */ __le16 ByteCount; /* 1 + namelen + 1 */ __u8 BufferFormat; /* 4 = ASCII */ - unsigned char FileName[1]; + unsigned char FileName[]; } __attribute__((packed)) QUERY_INFORMATION_REQ; typedef struct smb_com_query_information_rsp { @@ -1231,7 +1231,7 @@ typedef struct smb_com_setattr_req { __le16 reserved[5]; /* must be zero */ __u16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ - unsigned char fileName[1]; + unsigned char fileName[]; } __attribute__((packed)) SETATTR_REQ; typedef struct smb_com_setattr_rsp { @@ -1313,7 +1313,7 @@ typedef struct smb_com_transaction_ioctl_req { __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS) */ __le16 ByteCount; __u8 Pad[3]; - __u8 Data[1]; + __u8 Data[]; } __attribute__((packed)) TRANSACT_IOCTL_REQ; typedef struct smb_com_transaction_compr_ioctl_req { @@ -1431,8 +1431,8 @@ typedef struct smb_com_transaction_change_notify_req { __u8 WatchTree; /* 1 = Monitor subdirectories */ __u8 Reserved2; __le16 ByteCount; -/* __u8 Pad[3];*/ -/* __u8 Data[1];*/ +/* __u8 Pad[3];*/ +/* __u8 Data[];*/ } __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ; /* BB eventually change to use generic ntransact rsp struct @@ -1521,7 +1521,7 @@ struct cifs_quota_data { __u64 space_used; __u64 soft_limit; __u64 hard_limit; - char sid[1]; /* variable size? */ + char sid[]; /* variable size? */ } __attribute__((packed)); /* quota sub commands */ @@ -1673,7 +1673,7 @@ typedef struct smb_com_transaction2_qpi_req { __u8 Pad; __le16 InformationLevel; __u32 Reserved4; - char FileName[1]; + char FileName[]; } __attribute__((packed)) TRANSACTION2_QPI_REQ; typedef struct smb_com_transaction2_qpi_rsp { @@ -1706,7 +1706,7 @@ typedef struct smb_com_transaction2_spi_req { __u16 Pad1; __le16 InformationLevel; __u32 Reserved4; - char FileName[1]; + char FileName[]; } __attribute__((packed)) TRANSACTION2_SPI_REQ; typedef struct smb_com_transaction2_spi_rsp { @@ -1813,7 +1813,7 @@ typedef struct smb_com_transaction2_ffirst_req { __le16 SearchFlags; __le16 InformationLevel; __le32 SearchStorageType; - char FileName[1]; + char FileName[]; } __attribute__((packed)) TRANSACTION2_FFIRST_REQ; typedef struct smb_com_transaction2_ffirst_rsp { @@ -2024,7 +2024,7 @@ typedef struct smb_com_transaction2_get_dfs_refer_req { perhaps?) followed by one byte pad - doesn't seem to matter though */ __le16 MaxReferralLevel; - char RequestFileName[1]; + char RequestFileName[]; } __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_REQ; #define DFS_VERSION cpu_to_le16(0x0003) @@ -2053,7 +2053,7 @@ struct get_dfs_referral_rsp { __le16 PathConsumed; __le16 NumberOfReferrals; __le32 DFSFlags; - REFERRAL3 referrals[1]; /* array of level 3 dfs_referral structures */ + REFERRAL3 referrals[]; /* array of level 3 dfs_referral structures */ /* followed by the strings pointed to by the referral structures */ } __packed; @@ -2292,7 +2292,10 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */ __le32 Mode; __le32 AlignmentRequirement; __le32 FileNameLength; - char FileName[1]; + union { + char __pad; + DECLARE_FLEX_ARRAY(char, FileName); + }; } __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */ typedef struct { @@ -2330,7 +2333,7 @@ typedef struct { } __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */ typedef struct { - char LinkDest[1]; + DECLARE_FLEX_ARRAY(char, LinkDest); } __attribute__((packed)) FILE_UNIX_LINK_INFO; /* level 0x201 QPathInfo */ /* The following three structures are needed only for @@ -2380,7 +2383,7 @@ struct file_end_of_file_info { } __attribute__((packed)); /* size info, level 0x104 for set, 0x106 for query */ struct file_alt_name_info { - __u8 alt_name[1]; + DECLARE_FLEX_ARRAY(__u8, alt_name); } __attribute__((packed)); /* level 0x0108 */ struct file_stream_info { @@ -2490,7 +2493,10 @@ typedef struct { __le32 NextEntryOffset; __u32 ResumeKey; /* as with FileIndex - no need to convert */ FILE_UNIX_BASIC_INFO basic; - char FileName[1]; + union { + char __pad; + DECLARE_FLEX_ARRAY(char, FileName); + }; } __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */ typedef struct { @@ -2504,7 +2510,7 @@ typedef struct { __le64 AllocationSize; __le32 ExtFileAttributes; __le32 FileNameLength; - char FileName[1]; + char FileName[]; } __attribute__((packed)) FILE_DIRECTORY_INFO; /* level 0x101 FF resp data */ typedef struct { @@ -2519,7 +2525,7 @@ typedef struct { __le32 ExtFileAttributes; __le32 FileNameLength; __le32 EaSize; /* length of the xattrs */ - char FileName[1]; + char FileName[]; } __attribute__((packed)) FILE_FULL_DIRECTORY_INFO; /* level 0x102 rsp data */ typedef struct { @@ -2536,7 +2542,7 @@ typedef struct { __le32 EaSize; /* EA size */ __le32 Reserved; __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ - char FileName[1]; + char FileName[]; } __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */ typedef struct { @@ -2554,7 +2560,7 @@ typedef struct { __u8 ShortNameLength; __u8 Reserved; __u8 ShortName[24]; - char FileName[1]; + char FileName[]; } __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FFrsp data */ typedef struct { @@ -2569,7 +2575,7 @@ typedef struct { __le32 AllocationSize; __le16 Attributes; /* verify not u32 */ __u8 FileNameLength; - char FileName[1]; + char FileName[]; } __attribute__((packed)) FIND_FILE_STANDARD_INFO; /* level 0x1 FF resp data */ @@ -2579,16 +2585,6 @@ struct win_dev { __le64 minor; } __attribute__((packed)); -struct gea { - unsigned char name_len; - char name[1]; -} __attribute__((packed)); - -struct gealist { - unsigned long list_len; - struct gea list[1]; -} __attribute__((packed)); - struct fea { unsigned char EA_flags; __u8 name_len; diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 5990bdbae598f..9a1f1913fb592 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -497,7 +497,7 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) FIND_FILE_STANDARD_INFO *pfData; pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo; - new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) + + new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) + 1 + pfData->FileNameLength; } else { u32 next_offset = le32_to_cpu(pDirInfo->NextEntryOffset); @@ -515,9 +515,9 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) new_entry, end_of_smb, old_entry); return NULL; } else if (((level == SMB_FIND_FILE_INFO_STANDARD) && - (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) + (new_entry + sizeof(FIND_FILE_STANDARD_INFO) + 1 > end_of_smb)) || ((level != SMB_FIND_FILE_INFO_STANDARD) && - (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) { + (new_entry + sizeof(FILE_DIRECTORY_INFO) + 1 > end_of_smb))) { cifs_dbg(VFS, "search entry %p extends after end of SMB %p\n", new_entry, end_of_smb); return NULL; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index cc425a616899a..e15bf116c7558 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -5073,10 +5073,10 @@ smb2_parse_query_directory(struct cifs_tcon *tcon, switch (srch_inf->info_level) { case SMB_FIND_FILE_DIRECTORY_INFO: - info_buf_size = sizeof(FILE_DIRECTORY_INFO) - 1; + info_buf_size = sizeof(FILE_DIRECTORY_INFO); break; case SMB_FIND_FILE_ID_FULL_DIR_INFO: - info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1; + info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO); break; case SMB_FIND_FILE_POSIX_INFO: /* note that posix payload are variable size */ diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 8d011fedecd03..3a13b9b564520 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -373,7 +373,7 @@ struct smb2_file_id_extd_directory_info { __le32 EaSize; /* EA size */ __le32 ReparsePointTag; /* valid if FILE_ATTR_REPARSE_POINT set in FileAttributes */ __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit */ - char FileName[1]; + char FileName[]; } __packed; /* level 60 */ extern char smb2_padding[7]; -- GitLab From 34410fcad91b3f1f70d444e74fb9d7e3cd63d088 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Apr 2024 12:53:46 +0200 Subject: [PATCH 1948/2290] Revert "crypto: api - Disallow identical driver names" This reverts commit 680eb0a99336f7b21ff149bc57579d059421c5de which is commit 27016f75f5ed47e2d8e0ca75a8ff1f40bc1a5e27 upstream. It is reported to cause problems in older kernels due to some crypto drivers having the same name, so revert it here to fix the problems. Link: https://lore.kernel.org/r/aceda6e2-cefb-4146-aef8-ff4bafa56e56@roeck-us.net Reported-by: Guenter Roeck Cc: Ovidiu Panait Cc: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index c73d1359b9d41..5dc9ccdd5a510 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -290,7 +290,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) } if (!strcmp(q->cra_driver_name, alg->cra_name) || - !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } -- GitLab From 539a2b995a4ed93125cb0efae0f793b00ab2158b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 3 Apr 2024 08:43:12 -0700 Subject: [PATCH 1949/2290] virtio_net: Do not send RSS key if it is not supported commit 059a49aa2e25c58f90b50151f109dd3c4cdb3a47 upstream. There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao Reviewed-by: Heng Qi Reviewed-by: Xuan Zhuo Signed-off-by: David S. Miller Signed-off-by: Konstantin Ovsepian Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 45f1a871b7da8..32cddb633793d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2948,19 +2948,35 @@ static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfu static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct virtnet_info *vi = netdev_priv(dev); + bool update = false; int i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; if (indir) { + if (!vi->has_rss) + return -EOPNOTSUPP; + for (i = 0; i < vi->rss_indir_table_size; ++i) vi->ctrl->rss.indirection_table[i] = indir[i]; + update = true; } - if (key) + + if (key) { + /* If either _F_HASH_REPORT or _F_RSS are negotiated, the + * device provides hash calculation capabilities, that is, + * hash_key is configured. + */ + if (!vi->has_rss && !vi->has_rss_hash_report) + return -EOPNOTSUPP; + memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); + update = true; + } - virtnet_commit_rss_command(vi); + if (update) + virtnet_commit_rss_command(vi); return 0; } @@ -3852,13 +3868,15 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; - if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { vi->has_rss = true; - if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_indir_table_size = virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); + } + + if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); -- GitLab From 0c42f7e039aba3de6d7dbf92da708e2b2ecba557 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 10 Apr 2024 17:14:41 +0800 Subject: [PATCH 1950/2290] fork: defer linking file vma until vma is fully initialized commit 35e351780fa9d8240dd6f7e4f245f9ea37e96c19 upstream. Thorvald reported a WARNING [1]. And the root cause is below race: CPU 1 CPU 2 fork hugetlbfs_fallocate dup_mmap hugetlbfs_punch_hole i_mmap_lock_write(mapping); vma_interval_tree_insert_after -- Child vma is visible through i_mmap tree. i_mmap_unlock_write(mapping); hugetlb_dup_vma_private -- Clear vma_lock outside i_mmap_rwsem! i_mmap_lock_write(mapping); hugetlb_vmdelete_list vma_interval_tree_foreach hugetlb_vma_trylock_write -- Vma_lock is cleared. tmp->vm_ops->open -- Alloc new vma_lock outside i_mmap_rwsem! hugetlb_vma_unlock_write -- Vma_lock is assigned!!! i_mmap_unlock_write(mapping); hugetlb_dup_vma_private() and hugetlb_vm_op_open() are called outside i_mmap_rwsem lock while vma lock can be used in the same time. Fix this by deferring linking file vma until vma is fully initialized. Those vmas should be initialized first before they can be used. Link: https://lkml.kernel.org/r/20240410091441.3539905-1-linmiaohe@huawei.com Fixes: 8d9bfb260814 ("hugetlb: add vma based lock for pmd sharing") Signed-off-by: Miaohe Lin Reported-by: Thorvald Natvig Closes: https://lore.kernel.org/linux-mm/20240129161735.6gmjsswx62o4pbja@revolver/T/ [1] Reviewed-by: Jane Chu Cc: Christian Brauner Cc: Heiko Carstens Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Muchun Song Cc: Oleg Nesterov Cc: Peng Zhang Cc: Tycho Andersen Cc: Signed-off-by: Andrew Morton Signed-off-by: Miaohe Lin Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 85617928041cf..7e9a5919299b4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -662,6 +662,15 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, } else if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); + /* + * Copy/update hugetlb private vma information. + */ + if (is_vm_hugetlb_page(tmp)) + hugetlb_dup_vma_private(tmp); + + if (tmp->vm_ops && tmp->vm_ops->open) + tmp->vm_ops->open(tmp); + file = tmp->vm_file; if (file) { struct address_space *mapping = file->f_mapping; @@ -678,12 +687,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, i_mmap_unlock_write(mapping); } - /* - * Copy/update hugetlb private vma information. - */ - if (is_vm_hugetlb_page(tmp)) - hugetlb_dup_vma_private(tmp); - /* Link the vma into the MT */ mas.index = tmp->vm_start; mas.last = tmp->vm_end - 1; @@ -695,9 +698,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(tmp, mpnt); - if (tmp->vm_ops && tmp->vm_ops->open) - tmp->vm_ops->open(tmp); - if (retval) goto loop_out; } -- GitLab From 6a190e7ca4e503635b9399e3c448e93c7ca1f75b Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Sun, 21 Apr 2024 21:17:28 +0200 Subject: [PATCH 1951/2290] x86/cpu: Fix check for RDPKRU in __show_regs() commit b53c6bd5d271d023857174b8fd3e32f98ae51372 upstream. cpu_feature_enabled(X86_FEATURE_OSPKE) does not necessarily reflect whether CR4.PKE is set on the CPU. In particular, they may differ on non-BSP CPUs before setup_pku() is executed. In this scenario, RDPKRU will #UD causing the system to hang. Fix by checking CR4 for PKE enablement which is always correct for the current CPU. The scenario happens by inserting a WARN* before setup_pku() in identiy_cpu() or some other diagnostic which would lead to calling __show_regs(). [ bp: Massage commit message. ] Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240421191728.32239-1-bp@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7f94dbbc397b7..a0d3059bee3de 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -137,7 +137,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, log_lvl, d3, d6, d7); } - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + if (cr4 & X86_CR4_PKE) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } -- GitLab From 9f882077f5180a280e96ddf423f2814f90c4f7bf Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 8 Mar 2024 09:36:31 +0000 Subject: [PATCH 1952/2290] rust: don't select CONSTRUCTORS commit 7d49f53af4b988b188d3932deac2c9c80fd7d9ce upstream. This was originally part of commit 4b9a68f2e59a0 ("rust: add support for static synchronisation primitives") from the old Rust branch, which used module constructors to initialize globals containing various synchronisation primitives with pin-init. That commit has never been upstreamed, but the `select CONSTRUCTORS` statement ended up being included in the patch that initially added Rust support to the Linux Kernel. We are not using module constructors, so let's remove the select. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Link: https://lore.kernel.org/r/20240308-constructors-v1-1-4c811342391c@google.com Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index b63dce6706c5c..ffda34c424564 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1924,7 +1924,6 @@ config RUST depends on !GCC_PLUGINS depends on !RANDSTRUCT depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE - select CONSTRUCTORS help Enables Rust support in the kernel. -- GitLab From 4e6cd21498c2d81b213c78a8e14db31400d9a900 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 4 Apr 2024 15:17:02 +0100 Subject: [PATCH 1953/2290] rust: make mutually exclusive with CFI_CLANG commit 8933cf4651e02853ca679be7b2d978dfcdcc5e0c upstream. On RISC-V and arm64, and presumably x86, if CFI_CLANG is enabled, loading a rust module will trigger a kernel panic. Support for sanitisers, including kcfi (CFI_CLANG), is in the works, but for now they're nightly-only options in rustc. Make RUST depend on !CFI_CLANG to prevent configuring a kernel without symmetrical support for kfi. [ Matthew Maurer writes [1]: This patch is fine by me - the last patch needed for KCFI to be functional in Rust just landed upstream last night, so we should revisit this (in the form of enabling it) once we move to `rustc-1.79.0` or later. Ramon de C Valle also gave feedback [2] on the status of KCFI for Rust and created a tracking issue [3] in upstream Rust. - Miguel ] Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Cc: stable@vger.kernel.org Signed-off-by: Conor Dooley Acked-by: Nathan Chancellor Link: https://lore.kernel.org/rust-for-linux/CAGSQo024u1gHJgzsO38Xg3c4or+JupoPABQx_+0BLEpPg0cOEA@mail.gmail.com/ [1] Link: https://lore.kernel.org/rust-for-linux/CAOcBZOS2kPyH0Dm7Fuh4GC3=v7nZhyzBj_-dKu3PfAnrHZvaxg@mail.gmail.com/ [2] Link: https://github.com/rust-lang/rust/issues/123479 [3] Link: https://lore.kernel.org/r/20240404-providing-emporium-e652e359c711@spud [ Added feedback from the list, links, and used Cc for the tag. ] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index ffda34c424564..537f01eba2e6f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1920,6 +1920,7 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE + depends on !CFI_CLANG depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT -- GitLab From c7882362897ba1d8248e6da07bea058550336b71 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 1 Apr 2024 11:24:17 -0700 Subject: [PATCH 1954/2290] Bluetooth: Fix type of len in {l2cap,sco}_sock_getsockopt_old() commit 9bf4e919ccad613b3596eebf1ff37b05b6405307 upstream. After an innocuous optimization change in LLVM main (19.0.0), x86_64 allmodconfig (which enables CONFIG_KCSAN / -fsanitize=thread) fails to build due to the checks in check_copy_size(): In file included from net/bluetooth/sco.c:27: In file included from include/linux/module.h:13: In file included from include/linux/stat.h:19: In file included from include/linux/time.h:60: In file included from include/linux/time32.h:13: In file included from include/linux/timex.h:67: In file included from arch/x86/include/asm/timex.h:6: In file included from arch/x86/include/asm/tsc.h:10: In file included from arch/x86/include/asm/msr.h:15: In file included from include/linux/percpu.h:7: In file included from include/linux/smp.h:118: include/linux/thread_info.h:244:4: error: call to '__bad_copy_from' declared with 'error' attribute: copy source size is too small 244 | __bad_copy_from(); | ^ The same exact error occurs in l2cap_sock.c. The copy_to_user() statements that are failing come from l2cap_sock_getsockopt_old() and sco_sock_getsockopt_old(). This does not occur with GCC with or without KCSAN or Clang without KCSAN enabled. len is defined as an 'int' because it is assigned from '__user int *optlen'. However, it is clamped against the result of sizeof(), which has a type of 'size_t' ('unsigned long' for 64-bit platforms). This is done with min_t() because min() requires compatible types, which results in both len and the result of sizeof() being casted to 'unsigned int', meaning len changes signs and the result of sizeof() is truncated. From there, len is passed to copy_to_user(), which has a third parameter type of 'unsigned long', so it is widened and changes signs again. This excessive casting in combination with the KCSAN instrumentation causes LLVM to fail to eliminate the __bad_copy_from() call, failing the build. The official recommendation from LLVM developers is to consistently use long types for all size variables to avoid the unnecessary casting in the first place. Change the type of len to size_t in both l2cap_sock_getsockopt_old() and sco_sock_getsockopt_old(). This clears up the error while allowing min_t() to be replaced with min(), resulting in simpler code with no casts and fewer implicit conversions. While len is a different type than optlen now, it should result in no functional change because the result of sizeof() will clamp all values of optlen in the same manner as before. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2007 Link: https://github.com/llvm/llvm-project/issues/85647 Signed-off-by: Nathan Chancellor Reviewed-by: Justin Stitt Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_sock.c | 7 ++++--- net/bluetooth/sco.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 4198ca66fbe10..e3c7029ec8a61 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -457,7 +457,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; struct l2cap_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; u32 opt; BT_DBG("sk %p", sk); @@ -504,7 +505,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mode 0x%2.2x", chan->mode); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) err = -EFAULT; @@ -554,7 +555,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = chan->conn->hcon->handle; memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *) &cinfo, len)) err = -EFAULT; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 2e9137c539a49..4a6bf60f3e7aa 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -971,7 +971,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct sco_options opts; struct sco_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; BT_DBG("sk %p", sk); @@ -993,7 +994,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mtu %u", opts.mtu); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *)&opts, len)) err = -EFAULT; @@ -1011,7 +1012,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *)&cinfo, len)) err = -EFAULT; -- GitLab From e6dd0117e947da0847c469a12ac9e11512d13be2 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 29 Mar 2024 10:34:39 +0800 Subject: [PATCH 1955/2290] Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x0bda:0x4853 commit d1a5a7eede2977da3d2002d5ea3b519019cc1a98 upstream. Add the support ID(0x0bda, 0x4853) to usb_device_id table for Realtek RTL8852BE. Without this change the device utilizes an obsolete version of the firmware that is encoded in it rather than the updated Realtek firmware and config files from the firmware directory. The latter files implement many new features. The device table is as follows: T: Bus=03 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=4853 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Cc: stable@vger.kernel.org Signed-off-by: Larry Finger Signed-off-by: WangYuli Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 954f7f3b5cc30..6a772b955d69d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -535,6 +535,8 @@ static const struct usb_device_id blacklist_table[] = { /* Realtek 8852BE Bluetooth devices */ { USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK | -- GitLab From e60502b907be350c518819297b565007a94c706d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Apr 2024 15:57:47 +0200 Subject: [PATCH 1956/2290] Bluetooth: qca: fix NULL-deref on non-serdev suspend commit 73e87c0a49fda31d7b589edccf4c72e924411371 upstream. Qualcomm ROME controllers can be registered from the Bluetooth line discipline and in this case the HCI UART serdev pointer is NULL. Add the missing sanity check to prevent a NULL-pointer dereference when wakeup() is called for a non-serdev controller during suspend. Just return true for now to restore the original behaviour and address the crash with pre-6.2 kernels, which do not have commit e9b3e5b8c657 ("Bluetooth: hci_qca: only assign wakeup with serial port support") that causes the crash to happen already at setup() time. Fixes: c1a74160eaf1 ("Bluetooth: hci_qca: Add device_may_wakeup support") Cc: stable@vger.kernel.org # 5.13 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ca6065297a7b2..179278b801eb3 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1645,6 +1645,9 @@ static bool qca_wakeup(struct hci_dev *hdev) struct hci_uart *hu = hci_get_drvdata(hdev); bool wakeup; + if (!hu->serdev) + return true; + /* BT SoC attached through the serial bus is handled by the serdev driver. * So we need to use the device handle of the serdev driver to get the * status of device may wakeup. -- GitLab From a957ea5aa3d3518067a1ba32c6127322ad348d20 Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Thu, 21 Mar 2024 14:30:01 +0000 Subject: [PATCH 1957/2290] mmc: sdhci-msm: pervent access to suspended controller commit f8def10f73a516b771051a2f70f2f0446902cb4f upstream. Generic sdhci code registers LED device and uses host->runtime_suspended flag to protect access to it. The sdhci-msm driver doesn't set this flag, which causes a crash when LED is accessed while controller is runtime suspended. Fix this by setting the flag correctly. Cc: stable@vger.kernel.org Fixes: 67e6db113c90 ("mmc: sdhci-msm: Add pm_runtime and system PM support") Signed-off-by: Mantas Pucka Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20240321-sdhci-mmc-suspend-v1-1-fbc555a64400@8devices.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-msm.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index a5ab2af3e5201..e37fb25577c0f 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -2831,6 +2831,11 @@ static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = true; + spin_unlock_irqrestore(&host->lock, flags); /* Drop the performance vote */ dev_pm_opp_set_rate(dev, 0); @@ -2845,6 +2850,7 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; int ret; ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks), @@ -2863,7 +2869,15 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) dev_pm_opp_set_rate(dev, msm_host->clk_rate); - return sdhci_msm_ice_resume(msm_host); + ret = sdhci_msm_ice_resume(msm_host); + if (ret) + return ret; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = false; + spin_unlock_irqrestore(&host->lock, flags); + + return ret; } static const struct dev_pm_ops sdhci_msm_pm_ops = { -- GitLab From 2b8bf690e05c5ef134aa9709b0a622f01e6eb337 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 23 Apr 2024 20:41:22 -0600 Subject: [PATCH 1958/2290] smb: client: Fix struct_group() usage in __packed structs commit 9a1f1d04f63c59550a5364858b46eeffdf03e8d6 upstream. Use struct_group_attr() in __packed structs, instead of struct_group(). Below you can see the pahole output before/after changes: pahole -C smb2_file_network_open_info fs/smb/client/smb2ops.o struct smb2_file_network_open_info { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ }; /* 0 56 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } network_open_info; /* 0 56 */ }; /* 0 56 */ __le32 Reserved; /* 56 4 */ /* size: 60, cachelines: 1, members: 2 */ /* last cacheline: 60 bytes */ } __attribute__((__packed__)); pahole -C smb2_file_network_open_info fs/smb/client/smb2ops.o struct smb2_file_network_open_info { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } __attribute__((__packed__)); /* 0 52 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } __attribute__((__packed__)) network_open_info; /* 0 52 */ }; /* 0 52 */ __le32 Reserved; /* 52 4 */ /* size: 56, cachelines: 1, members: 2 */ /* last cacheline: 56 bytes */ }; pahole -C smb_com_open_rsp fs/smb/client/cifssmb.o struct smb_com_open_rsp { ... union { struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ }; /* 48 40 */ struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } common_attributes; /* 48 40 */ }; /* 48 40 */ ... /* size: 111, cachelines: 2, members: 14 */ /* last cacheline: 47 bytes */ } __attribute__((__packed__)); pahole -C smb_com_open_rsp fs/smb/client/cifssmb.o struct smb_com_open_rsp { ... union { struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } __attribute__((__packed__)); /* 48 36 */ struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } __attribute__((__packed__)) common_attributes; /* 48 36 */ }; /* 48 36 */ ... /* size: 107, cachelines: 2, members: 14 */ /* last cacheline: 43 bytes */ } __attribute__((__packed__)); pahole -C FILE_ALL_INFO fs/smb/client/cifssmb.o typedef struct { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ }; /* 0 40 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } common_attributes; /* 0 40 */ }; /* 0 40 */ ... /* size: 113, cachelines: 2, members: 17 */ /* last cacheline: 49 bytes */ } __attribute__((__packed__)) FILE_ALL_INFO; pahole -C FILE_ALL_INFO fs/smb/client/cifssmb.o typedef struct { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } __attribute__((__packed__)); /* 0 36 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } __attribute__((__packed__)) common_attributes; /* 0 36 */ }; /* 0 36 */ ... /* size: 109, cachelines: 2, members: 17 */ /* last cacheline: 45 bytes */ } __attribute__((__packed__)) FILE_ALL_INFO; Fixes: 0015eb6e1238 ("smb: client, common: fix fortify warnings") Cc: stable@vger.kernel.org Reviewed-by: Namjae Jeon Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifspdu.h | 4 ++-- fs/smb/client/smb2pdu.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 94f86374f4b71..9cb4577063344 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -882,7 +882,7 @@ typedef struct smb_com_open_rsp { __u8 OplockLevel; __u16 Fid; __le32 CreateAction; - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; @@ -2270,7 +2270,7 @@ typedef struct { /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */ /******************************************************************************/ typedef struct { /* data block encoding of response to level 263 QPathInfo */ - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 3a13b9b564520..2823526b66f7e 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -339,7 +339,7 @@ struct smb2_file_reparse_point_info { } __packed; struct smb2_file_network_open_info { - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; -- GitLab From c7a4bca289e50bb4b2650f845c41bb3e453f4c66 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Apr 2024 12:49:50 -0500 Subject: [PATCH 1959/2290] smb3: fix lock ordering potential deadlock in cifs_sync_mid_result commit 8861fd5180476f45f9e8853db154600469a0284f upstream. Coverity spotted that the cifs_sync_mid_result function could deadlock "Thread deadlock (ORDER_REVERSAL) lock_order: Calling spin_lock acquires lock TCP_Server_Info.srv_lock while holding lock TCP_Server_Info.mid_lock" Addresses-Coverity: 1590401 ("Thread deadlock (ORDER_REVERSAL)") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index df44acaec9ae9..338b34c99b2de 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -931,12 +931,15 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } + spin_unlock(&server->mid_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; + goto sync_mid_done; } spin_unlock(&server->mid_lock); +sync_mid_done: release_mid(mid); return rc; } -- GitLab From 0561b65fbd53d3e788c5b0222d9112ca016fd6a1 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 18 Mar 2024 11:59:02 +0100 Subject: [PATCH 1960/2290] HID: i2c-hid: remove I2C_HID_READ_PENDING flag to prevent lock-up commit 9c0f59e47a90c54d0153f8ddc0f80d7a36207d0e upstream. The flag I2C_HID_READ_PENDING is used to serialize I2C operations. However, this is not necessary, because I2C core already has its own locking for that. More importantly, this flag can cause a lock-up: if the flag is set in i2c_hid_xfer() and an interrupt happens, the interrupt handler (i2c_hid_irq) will check this flag and return immediately without doing anything, then the interrupt handler will be invoked again in an infinite loop. Since interrupt handler is an RT task, it takes over the CPU and the flag-clearing task never gets scheduled, thus we have a lock-up. Delete this unnecessary flag. Reported-and-tested-by: Eva Kurchatova Closes: https://lore.kernel.org/r/CA+eeCSPUDpUg76ZO8dszSbAGn+UHjcyv8F1J-CUPVARAzEtW9w@mail.gmail.com Fixes: 4a200c3b9a40 ("HID: i2c-hid: introduce HID over i2c specification implementation") Cc: Signed-off-by: Nam Cao Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid-core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 969f8eb086f02..0b05bb1e4410e 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -61,7 +61,6 @@ /* flags */ #define I2C_HID_STARTED 0 #define I2C_HID_RESET_PENDING 1 -#define I2C_HID_READ_PENDING 2 #define I2C_HID_PWR_ON 0x00 #define I2C_HID_PWR_SLEEP 0x01 @@ -193,15 +192,10 @@ static int i2c_hid_xfer(struct i2c_hid *ihid, msgs[n].len = recv_len; msgs[n].buf = recv_buf; n++; - - set_bit(I2C_HID_READ_PENDING, &ihid->flags); } ret = i2c_transfer(client->adapter, msgs, n); - if (recv_len) - clear_bit(I2C_HID_READ_PENDING, &ihid->flags); - if (ret != n) return ret < 0 ? ret : -EIO; @@ -569,9 +563,6 @@ static irqreturn_t i2c_hid_irq(int irq, void *dev_id) { struct i2c_hid *ihid = dev_id; - if (test_bit(I2C_HID_READ_PENDING, &ihid->flags)) - return IRQ_HANDLED; - i2c_hid_get_input(ihid); return IRQ_HANDLED; -- GitLab From 8bdbcfaf3eac42f98e5486b3d7e130fa287811f6 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Apr 2024 10:45:47 +0200 Subject: [PATCH 1961/2290] btrfs: fix information leak in btrfs_ioctl_logical_to_ino() commit 2f7ef5bb4a2f3e481ef05fab946edb97c84f67cf upstream. Syzbot reported the following information leak for in btrfs_ioctl_logical_to_ino(): BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xbc/0x110 lib/usercopy.c:40 instrument_copy_to_user include/linux/instrumented.h:114 [inline] _copy_to_user+0xbc/0x110 lib/usercopy.c:40 copy_to_user include/linux/uaccess.h:191 [inline] btrfs_ioctl_logical_to_ino+0x440/0x750 fs/btrfs/ioctl.c:3499 btrfs_ioctl+0x714/0x1260 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x261/0x450 fs/ioctl.c:890 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:890 x64_sys_call+0x1883/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: __kmalloc_large_node+0x231/0x370 mm/slub.c:3921 __do_kmalloc_node mm/slub.c:3954 [inline] __kmalloc_node+0xb07/0x1060 mm/slub.c:3973 kmalloc_node include/linux/slab.h:648 [inline] kvmalloc_node+0xc0/0x2d0 mm/util.c:634 kvmalloc include/linux/slab.h:766 [inline] init_data_container+0x49/0x1e0 fs/btrfs/backref.c:2779 btrfs_ioctl_logical_to_ino+0x17c/0x750 fs/btrfs/ioctl.c:3480 btrfs_ioctl+0x714/0x1260 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x261/0x450 fs/ioctl.c:890 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:890 x64_sys_call+0x1883/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Bytes 40-65535 of 65536 are uninitialized Memory access of size 65536 starts at ffff888045a40000 This happens, because we're copying a 'struct btrfs_data_container' back to user-space. This btrfs_data_container is allocated in 'init_data_container()' via kvmalloc(), which does not zero-fill the memory. Fix this by using kvzalloc() which zeroes out the memory on allocation. CC: stable@vger.kernel.org # 4.14+ Reported-by: Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/backref.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 18cf801ab5908..23d0372e88821 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2475,20 +2475,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) size_t alloc_bytes; alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); - data = kvmalloc(alloc_bytes, GFP_KERNEL); + data = kvzalloc(alloc_bytes, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); - if (total_bytes >= sizeof(*data)) { + if (total_bytes >= sizeof(*data)) data->bytes_left = total_bytes - sizeof(*data); - data->bytes_missing = 0; - } else { + else data->bytes_missing = sizeof(*data) - total_bytes; - data->bytes_left = 0; - } - - data->elem_cnt = 0; - data->elem_missed = 0; return data; } -- GitLab From 38f17d1fbb5bfb56ca1419e2d06376d57a9396f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2024 17:05:54 -0700 Subject: [PATCH 1962/2290] cpu: Re-enable CPU mitigations by default for !X86 architectures commit fe42754b94a42d08cf9501790afc25c4f6a5f631 upstream. Rename x86's to CPU_MITIGATIONS, define it in generic code, and force it on for all architectures exception x86. A recent commit to turn mitigations off by default if SPECULATION_MITIGATIONS=n kinda sorta missed that "cpu_mitigations" is completely generic, whereas SPECULATION_MITIGATIONS is x86-specific. Rename x86's SPECULATIVE_MITIGATIONS instead of keeping both and have it select CPU_MITIGATIONS, as having two configs for the same thing is unnecessary and confusing. This will also allow x86 to use the knob to manage mitigations that aren't strictly related to speculative execution. Use another Kconfig to communicate to common code that CPU_MITIGATIONS is already defined instead of having x86's menu depend on the common CPU_MITIGATIONS. This allows keeping a single point of contact for all of x86's mitigations, and it's not clear that other architectures *want* to allow disabling mitigations at compile-time. Fixes: f337a6a21e2f ("x86/cpu: Actually turn off mitigations by default for SPECULATION_MITIGATIONS=n") Closes: https://lkml.kernel.org/r/20240413115324.53303a68%40canb.auug.org.au Reported-by: Stephen Rothwell Reported-by: Michael Ellerman Reported-by: Geert Uytterhoeven Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov (AMD) Acked-by: Josh Poimboeuf Acked-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240420000556.2645001-2-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 8 ++++++++ arch/x86/Kconfig | 11 ++++++----- kernel/cpu.c | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index f99fd9a4ca778..e959abf969ec3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -9,6 +9,14 @@ # source "arch/$(SRCARCH)/Kconfig" +config ARCH_CONFIGURES_CPU_MITIGATIONS + bool + +if !ARCH_CONFIGURES_CPU_MITIGATIONS +config CPU_MITIGATIONS + def_bool y +endif + menu "General architecture-dependent options" config CRASH_CORE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5f7a86f240db7..49cea5b81649d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -61,6 +61,7 @@ config X86 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_CONFIGURES_CPU_MITIGATIONS select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 @@ -2449,17 +2450,17 @@ config CC_HAS_SLS config CC_HAS_RETURN_THUNK def_bool $(cc-option,-mfunction-return=thunk-extern) -menuconfig SPECULATION_MITIGATIONS - bool "Mitigations for speculative execution vulnerabilities" +menuconfig CPU_MITIGATIONS + bool "Mitigations for CPU vulnerabilities" default y help - Say Y here to enable options which enable mitigations for - speculative execution hardware vulnerabilities. + Say Y here to enable options which enable mitigations for hardware + vulnerabilities (usually related to speculative execution). If you say N, all mitigations will be disabled. You really should know what you are doing to say so. -if SPECULATION_MITIGATIONS +if CPU_MITIGATIONS config PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" diff --git a/kernel/cpu.c b/kernel/cpu.c index 2c44dd12a158c..e0e09b700b430 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2788,8 +2788,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : - CPU_MITIGATIONS_OFF; + IS_ENABLED(CONFIG_CPU_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { -- GitLab From 94021d1d2b57f2b45638ad67babb29bd30518c3f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 1963/2290] LoongArch: Fix callchain parse error with kernel tracepoint events commit d3119bc985fb645ad3b2a9cf9952c1d56d9daaa3 upstream. In order to fix perf's callchain parse error for LoongArch, we implement perf_arch_fetch_caller_regs() which fills several necessary registers used for callchain unwinding, including sp, fp, and era. This is similar to the following commits. commit b3eac0265bf6: ("arm: perf: Fix callchain parse error with kernel tracepoint events") commit 5b09a094f2fb: ("arm64: perf: Fix callchain parse error with kernel tracepoint events") commit 9a7e8ec0d4cc: ("riscv: perf: Fix callchain parse error with kernel tracepoint events") Test with commands: perf record -e sched:sched_switch -g --call-graph dwarf perf report Without this patch: Children Self Command Shared Object Symbol ........ ........ ............. ................. .................... 43.41% 43.41% swapper [unknown] [k] 0000000000000000 10.94% 10.94% loong-container [unknown] [k] 0000000000000000 | |--5.98%--0x12006ba38 | |--2.56%--0x12006bb84 | --2.40%--0x12006b6b8 With this patch, callchain can be parsed correctly: Children Self Command Shared Object Symbol ........ ........ ............. ................. .................... 47.57% 47.57% swapper [kernel.vmlinux] [k] __schedule | ---__schedule 26.76% 26.76% loong-container [kernel.vmlinux] [k] __schedule | |--13.78%--0x12006ba38 | | | |--9.19%--__schedule | | | --4.59%--handle_syscall | do_syscall | sys_futex | do_futex | futex_wait | futex_wait_queue_me | hrtimer_start_range_ns | __schedule | |--8.38%--0x12006bb84 | handle_syscall | do_syscall | sys_epoll_pwait | do_epoll_wait | schedule_hrtimeout_range_clock | hrtimer_start_range_ns | __schedule | --4.59%--0x12006b6b8 handle_syscall do_syscall sys_nanosleep hrtimer_nanosleep do_nanosleep hrtimer_start_range_ns __schedule Cc: stable@vger.kernel.org Fixes: b37042b2bb7cd751f0 ("LoongArch: Add perf events support") Reported-by: Youling Tang Suggested-by: Youling Tang Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/include/asm/perf_event.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h index 2a35a0bc2aaab..52b638059e40b 100644 --- a/arch/loongarch/include/asm/perf_event.h +++ b/arch/loongarch/include/asm/perf_event.h @@ -7,6 +7,14 @@ #ifndef __LOONGARCH_PERF_EVENT_H__ #define __LOONGARCH_PERF_EVENT_H__ +#include + #define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->csr_era = (__ip); \ + (regs)->regs[3] = current_stack_pointer; \ + (regs)->regs[22] = (unsigned long) __builtin_frame_address(0); \ +} + #endif /* __LOONGARCH_PERF_EVENT_H__ */ -- GitLab From ba9bcc0e58f3e51dd0e2e018bae221d99d53c737 Mon Sep 17 00:00:00 2001 From: Jiantao Shan Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 1964/2290] LoongArch: Fix access error when read fault on a write-only VMA commit efb44ff64c95340b06331fc48634b99efc9dd77c upstream. As with most architectures, allow handling of read faults in VMAs that have VM_WRITE but without VM_READ (WRITE implies READ). Otherwise, reading before writing a write-only memory will error while reading after writing everything is fine. BTW, move the VM_EXEC judgement before VM_READ/VM_WRITE to make logic a little clearer. Cc: stable@vger.kernel.org Fixes: 09cfefb7fa70c3af01 ("LoongArch: Add memory management") Signed-off-by: Jiantao Shan Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index b829ab911a17b..007718d51f095 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -193,10 +193,10 @@ good_area: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { - if (!(vma->vm_flags & VM_READ) && address != exception_era(regs)) - goto bad_area; if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs)) + goto bad_area; } /* -- GitLab From ffddf569e35ec823b6af0bde5a2a5f32dcf7b1c0 Mon Sep 17 00:00:00 2001 From: Iskander Amara Date: Fri, 8 Mar 2024 09:52:42 +0100 Subject: [PATCH 1965/2290] arm64: dts: rockchip: enable internal pull-up for Q7_THRM# on RK3399 Puma commit 0ac417b8f124427c90ec8c2ef4f632b821d924cc upstream. Q7_THRM# pin is connected to a diode on the module which is used as a level shifter, and the pin have a pull-down enabled by default. We need to configure it to internal pull-up, other- wise whenever the pin is configured as INPUT and we try to control it externally the value will always remain zero. Signed-off-by: Iskander Amara Fixes: 2c66fc34e945 ("arm64: dts: rockchip: add RK3399-Q7 (Puma) SoM") Reviewed-by: Quentin Schulz Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240308085243.69903-1-iskander.amara@theobroma-systems.com Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index a77f922107c20..937a15005eb0e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -407,6 +407,16 @@ }; &pinctrl { + pinctrl-names = "default"; + pinctrl-0 = <&q7_thermal_pin>; + + gpios { + q7_thermal_pin: q7-thermal-pin { + rockchip,pins = + <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>; + }; + }; + i2c8 { i2c8_xfer_a: i2c8-xfer { rockchip,pins = -- GitLab From 404b0ae432cc3f50c06053b36dec426b2db191a3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 14 Apr 2024 21:20:56 -0400 Subject: [PATCH 1966/2290] drm/amdgpu/sdma5.2: use legacy HDP flush for SDMA2/3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9792b7cc18aaa0c2acae6af5d0acf249bcb1ab0d upstream. This avoids a potential conflict with firmwares with the newer HDP flush mechanism. Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 856db876af141..c7af36370b0de 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -345,17 +345,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** -- GitLab From 64f9d8ac2cd43b1df11c8fc4f9742993748e36cc Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 18 Apr 2024 11:32:34 -0400 Subject: [PATCH 1967/2290] drm/amdgpu: Fix leak when GPU memory allocation fails commit 25e9227c6afd200bed6774c866980b8e36d033af upstream. Free the sync object if the memory allocation fails for any reason. Signed-off-by: Mukul Joshi Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 260e6a3316db0..7d5fbaaba72f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1779,6 +1779,7 @@ err_node_allow: err_bo_create: amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); -- GitLab From dd681710ab77c8beafe2e263064cb1bd0e2d6ca9 Mon Sep 17 00:00:00 2001 From: Guanrui Huang Date: Thu, 18 Apr 2024 14:10:52 +0800 Subject: [PATCH 1968/2290] irqchip/gic-v3-its: Prevent double free on error commit c26591afd33adce296c022e3480dea4282b7ef91 upstream. The error handling path in its_vpe_irq_domain_alloc() causes a double free when its_vpe_init() fails after successfully allocating at least one interrupt. This happens because its_vpe_irq_domain_free() frees the interrupts along with the area bitmap and the vprop_page and its_vpe_irq_domain_alloc() subsequently frees the area bitmap and the vprop_page again. Fix this by unconditionally invoking its_vpe_irq_domain_free() which handles all cases correctly and by removing the bitmap/vprop_page freeing from its_vpe_irq_domain_alloc(). [ tglx: Massaged change log ] Fixes: 7d75bbb4bc1a ("irqchip/gic-v3-its: Add VPE irq domain allocation/teardown") Signed-off-by: Guanrui Huang Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Reviewed-by: Zenghui Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240418061053.96803-2-guanrui.huang@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 4d03fb3a82460..f9ab5cfc9b947 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4535,13 +4535,8 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq set_bit(i, bitmap); } - if (err) { - if (i > 0) - its_vpe_irq_domain_free(domain, virq, i); - - its_lpi_free(bitmap, base, nr_ids); - its_free_prop_table(vprop_page); - } + if (err) + its_vpe_irq_domain_free(domain, virq, i); return err; } -- GitLab From 01fc53be672acae37e611c80cc0b4f3939584de3 Mon Sep 17 00:00:00 2001 From: Jarred White Date: Fri, 1 Mar 2024 11:25:59 -0800 Subject: [PATCH 1969/2290] ACPI: CPPC: Use access_width over bit_width for system memory accesses commit 2f4a4d63a193be6fd530d180bb13c3592052904c upstream. To align with ACPI 6.3+, since bit_width can be any 8-bit value, it cannot be depended on to be always on a clean 8b boundary. This was uncovered on the Cobalt 100 platform. SError Interrupt on CPU26, code 0xbe000011 -- SError CPU: 26 PID: 1510 Comm: systemd-udevd Not tainted 5.15.2.1-13 #1 Hardware name: MICROSOFT CORPORATION, BIOS MICROSOFT CORPORATION pstate: 62400009 (nZCv daif +PAN -UAO +TCO -DIT -SSBS BTYPE=--) pc : cppc_get_perf_caps+0xec/0x410 lr : cppc_get_perf_caps+0xe8/0x410 sp : ffff8000155ab730 x29: ffff8000155ab730 x28: ffff0080139d0038 x27: ffff0080139d0078 x26: 0000000000000000 x25: ffff0080139d0058 x24: 00000000ffffffff x23: ffff0080139d0298 x22: ffff0080139d0278 x21: 0000000000000000 x20: ffff00802b251910 x19: ffff0080139d0000 x18: ffffffffffffffff x17: 0000000000000000 x16: ffffdc7e111bad04 x15: ffff00802b251008 x14: ffffffffffffffff x13: ffff013f1fd63300 x12: 0000000000000006 x11: ffffdc7e128f4420 x10: 0000000000000000 x9 : ffffdc7e111badec x8 : ffff00802b251980 x7 : 0000000000000000 x6 : ffff0080139d0028 x5 : 0000000000000000 x4 : ffff0080139d0018 x3 : 00000000ffffffff x2 : 0000000000000008 x1 : ffff8000155ab7a0 x0 : 0000000000000000 Kernel panic - not syncing: Asynchronous SError Interrupt CPU: 26 PID: 1510 Comm: systemd-udevd Not tainted 5.15.2.1-13 #1 Hardware name: MICROSOFT CORPORATION, BIOS MICROSOFT CORPORATION Call trace: dump_backtrace+0x0/0x1e0 show_stack+0x24/0x30 dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 panic+0x16c/0x384 add_taint+0x0/0xc0 arm64_serror_panic+0x7c/0x90 arm64_is_fatal_ras_serror+0x34/0xa4 do_serror+0x50/0x6c el1h_64_error_handler+0x40/0x74 el1h_64_error+0x7c/0x80 cppc_get_perf_caps+0xec/0x410 cppc_cpufreq_cpu_init+0x74/0x400 [cppc_cpufreq] cpufreq_online+0x2dc/0xa30 cpufreq_add_dev+0xc0/0xd4 subsys_interface_register+0x134/0x14c cpufreq_register_driver+0x1b0/0x354 cppc_cpufreq_init+0x1a8/0x1000 [cppc_cpufreq] do_one_initcall+0x50/0x250 do_init_module+0x60/0x27c load_module+0x2300/0x2570 __do_sys_finit_module+0xa8/0x114 __arm64_sys_finit_module+0x2c/0x3c invoke_syscall+0x78/0x100 el0_svc_common.constprop.0+0x180/0x1a0 do_el0_svc+0x84/0xa0 el0_svc+0x2c/0xc0 el0t_64_sync_handler+0xa4/0x12c el0t_64_sync+0x1a4/0x1a8 Instead, use access_width to determine the size and use the offset and width to shift and mask the bits to read/write out. Make sure to add a check for system memory since pcc redefines the access_width to subspace id. If access_width is not set, then fall back to using bit_width. Signed-off-by: Jarred White Reviewed-by: Easwar Hariharan Cc: 5.15+ # 5.15+ [ rjw: Subject and changelog edits, comment adjustments ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 093675b1a1ffb..c123fdbca693e 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -163,6 +163,13 @@ show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_freq); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); +/* Check for valid access_width, otherwise, fallback to using bit_width */ +#define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width) + +/* Shift and apply the mask for CPC reads/writes */ +#define MASK_VAL(reg, val) ((val) >> ((reg)->bit_offset & \ + GENMASK(((reg)->bit_width), 0))) + static ssize_t show_feedback_ctrs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -776,6 +783,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { if (gas_t->address) { void __iomem *addr; + size_t access_width; if (!osc_cpc_flexible_adr_space_confirmed) { pr_debug("Flexible address space capability not supported\n"); @@ -783,7 +791,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) goto out_free; } - addr = ioremap(gas_t->address, gas_t->bit_width/8); + access_width = GET_BIT_WIDTH(gas_t) / 8; + addr = ioremap(gas_t->address, access_width); if (!addr) goto out_free; cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr; @@ -979,6 +988,7 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) { void __iomem *vaddr = NULL; + int size; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; @@ -990,7 +1000,7 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = 0; if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = 8 << (reg->access_width - 1); + u32 width = GET_BIT_WIDTH(reg); u32 val_u32; acpi_status status; @@ -1014,7 +1024,9 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) return acpi_os_read_memory((acpi_physical_address)reg->address, val, reg->bit_width); - switch (reg->bit_width) { + size = GET_BIT_WIDTH(reg); + + switch (size) { case 8: *val = readb_relaxed(vaddr); break; @@ -1033,18 +1045,22 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) return -EFAULT; } + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + *val = MASK_VAL(reg, *val); + return 0; } static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) { int ret_val = 0; + int size; void __iomem *vaddr = NULL; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = 8 << (reg->access_width - 1); + u32 width = GET_BIT_WIDTH(reg); acpi_status status; status = acpi_os_write_port((acpi_io_address)reg->address, @@ -1066,7 +1082,12 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) return acpi_os_write_memory((acpi_physical_address)reg->address, val, reg->bit_width); - switch (reg->bit_width) { + size = GET_BIT_WIDTH(reg); + + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + val = MASK_VAL(reg, val); + + switch (size) { case 8: writeb_relaxed(val, vaddr); break; -- GitLab From 0f708a7e0abdceaaa35dc1844020e12a1153de53 Mon Sep 17 00:00:00 2001 From: Jarred White Date: Mon, 8 Apr 2024 22:23:09 -0700 Subject: [PATCH 1970/2290] ACPI: CPPC: Fix bit_offset shift in MASK_VAL() macro commit 05d92ee782eeb7b939bdd0189e6efcab9195bf95 upstream. Commit 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") neglected to properly wrap the bit_offset shift when it comes to applying the mask. This may cause incorrect values to be read and may cause the cpufreq module not be loaded. [ 11.059751] cpu_capacity: CPU0 missing/invalid highest performance. [ 11.066005] cpu_capacity: partial information: fallback to 1024 for all CPUs Also, corrected the bitmask generation in GENMASK (extra bit being added). Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Jarred White Cc: 5.15+ # 5.15+ Reviewed-by: Vanshidhar Konda Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index c123fdbca693e..f153f7d3e6b7f 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -167,8 +167,8 @@ show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); #define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width) /* Shift and apply the mask for CPC reads/writes */ -#define MASK_VAL(reg, val) ((val) >> ((reg)->bit_offset & \ - GENMASK(((reg)->bit_width), 0))) +#define MASK_VAL(reg, val) (((val) >> (reg)->bit_offset) & \ + GENMASK(((reg)->bit_width) - 1, 0)) static ssize_t show_feedback_ctrs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -- GitLab From ba234a54ee56e5b23e763551e28fc220072d19c8 Mon Sep 17 00:00:00 2001 From: Vanshidhar Konda Date: Thu, 11 Apr 2024 16:18:44 -0700 Subject: [PATCH 1971/2290] ACPI: CPPC: Fix access width used for PCC registers commit f489c948028b69cea235d9c0de1cc10eeb26a172 upstream. commit 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") modified cpc_read()/cpc_write() to use access_width to read CPC registers. However, for PCC registers the access width field in the ACPI register macro specifies the PCC subspace ID. For non-zero PCC subspace ID it is incorrectly treated as access width. This causes errors when reading from PCC registers in the CPPC driver. For PCC registers, base the size of read/write on the bit width field. The debug message in cpc_read()/cpc_write() is updated to print relevant information for the address space type used to read the register. Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Vanshidhar Konda Tested-by: Jarred White Reviewed-by: Jarred White Reviewed-by: Easwar Hariharan Cc: 5.15+ # 5.15+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index f153f7d3e6b7f..49339f37d9405 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -998,14 +998,14 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } *val = 0; + size = GET_BIT_WIDTH(reg); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); u32 val_u32; acpi_status status; status = acpi_os_read_port((acpi_io_address)reg->address, - &val_u32, width); + &val_u32, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to read SystemIO port %llx\n", reg->address); @@ -1014,17 +1014,22 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = val_u32; return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_read_ffh(cpu, reg, val); else return acpi_os_read_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); switch (size) { case 8: @@ -1040,8 +1045,13 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = readq_relaxed(vaddr); break; default: - pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot read %u bit width from system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", + size, pcc_ss_id); + } return -EFAULT; } @@ -1059,12 +1069,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; + size = GET_BIT_WIDTH(reg); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); acpi_status status; status = acpi_os_write_port((acpi_io_address)reg->address, - (u32)val, width); + (u32)val, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to write SystemIO port %llx\n", reg->address); @@ -1072,17 +1083,22 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) } return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_write_ffh(cpu, reg, val); else return acpi_os_write_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) val = MASK_VAL(reg, val); @@ -1101,8 +1117,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) writeq_relaxed(val, vaddr); break; default: - pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot write %u bit width to system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", + size, pcc_ss_id); + } ret_val = -EFAULT; break; } -- GitLab From 8e2c583c268003ce7241bb7cd839954273de4a5b Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:03 -0700 Subject: [PATCH 1972/2290] ethernet: Add helper for assigning packet type when dest address does not match device address commit 6e159fd653d7ebf6290358e0330a0cb8a75cf73b upstream. Enable reuse of logic in eth_type_trans for determining packet type. Suggested-by: Sabrina Dubroca Cc: stable@vger.kernel.org Signed-off-by: Rahul Rameshbabu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-3-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/linux/etherdevice.h | 25 +++++++++++++++++++++++++ net/ethernet/eth.c | 12 +----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index a541f0c4f146c..d7eef2158667d 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -593,6 +593,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, eth_hw_addr_set(dev, addr); } +/** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index e02daa74e8334..5ba7b460cbf76 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -164,17 +164,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); - if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) { - if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { - if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } else { - skb->pkt_type = PACKET_OTHERHOST; - } - } + eth_skb_pkt_type(skb, dev); /* * Some variants of DSA tagging don't have an ethertype field -- GitLab From c35fc180715d4756e231da326e5295fe8541ddeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCnster?= Date: Wed, 24 Apr 2024 15:51:52 +0200 Subject: [PATCH 1973/2290] net: b44: set pause params only when interface is up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e3eb7dd47bd4806f00e104eb6da092c435f9fb21 upstream. b44_free_rings() accesses b44::rx_buffers (and ::tx_buffers) unconditionally, but b44::rx_buffers is only valid when the device is up (they get allocated in b44_open(), and deallocated again in b44_close()), any other time these are just a NULL pointers. So if you try to change the pause params while the network interface is disabled/administratively down, everything explodes (which likely netifd tries to do). Link: https://github.com/openwrt/openwrt/issues/13789 Fixes: 1da177e4c3f4 (Linux-2.6.12-rc2) Cc: stable@vger.kernel.org Reported-by: Peter Münster Suggested-by: Jonas Gorski Signed-off-by: Vaclav Svoboda Tested-by: Peter Münster Reviewed-by: Andrew Lunn Signed-off-by: Peter Münster Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/87y192oolj.fsf@a16n.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/b44.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 7f876721596c1..5b6209f5a8017 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2033,12 +2033,14 @@ static int b44_set_pauseparam(struct net_device *dev, bp->flags |= B44_FLAG_TX_PAUSE; else bp->flags &= ~B44_FLAG_TX_PAUSE; - if (bp->flags & B44_FLAG_PAUSE_AUTO) { - b44_halt(bp); - b44_init_rings(bp); - b44_init_hw(bp, B44_FULL_RESET); - } else { - __b44_set_flow_ctrl(bp, bp->flags); + if (netif_running(dev)) { + if (bp->flags & B44_FLAG_PAUSE_AUTO) { + b44_halt(bp); + b44_init_rings(bp); + b44_init_hw(bp, B44_FULL_RESET); + } else { + __b44_set_flow_ctrl(bp, bp->flags); + } } spin_unlock_irq(&bp->lock); -- GitLab From d0205d6e0a5a3bfa25225b027bca0f70cbd7fdcf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 18 Apr 2024 16:11:33 +0200 Subject: [PATCH 1974/2290] stackdepot: respect __GFP_NOLOCKDEP allocation flag commit 6fe60465e1d53ea321ee909be26d97529e8f746c upstream. If stack_depot_save_flags() allocates memory it always drops __GFP_NOLOCKDEP flag. So when KASAN tries to track __GFP_NOLOCKDEP allocation we may end up with lockdep splat like bellow: ====================================================== WARNING: possible circular locking dependency detected 6.9.0-rc3+ #49 Not tainted ------------------------------------------------------ kswapd0/149 is trying to acquire lock: ffff88811346a920 (&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590 [xfs] but task is already holding lock: ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x5d9/0xad0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (fs_reclaim){+.+.}-{0:0}: __lock_acquire+0x7da/0x1030 lock_acquire+0x15d/0x400 fs_reclaim_acquire+0xb5/0x100 prepare_alloc_pages.constprop.0+0xc5/0x230 __alloc_pages+0x12a/0x3f0 alloc_pages_mpol+0x175/0x340 stack_depot_save_flags+0x4c5/0x510 kasan_save_stack+0x30/0x40 kasan_save_track+0x10/0x30 __kasan_slab_alloc+0x83/0x90 kmem_cache_alloc+0x15e/0x4a0 __alloc_object+0x35/0x370 __create_object+0x22/0x90 __kmalloc_node_track_caller+0x477/0x5b0 krealloc+0x5f/0x110 xfs_iext_insert_raw+0x4b2/0x6e0 [xfs] xfs_iext_insert+0x2e/0x130 [xfs] xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs] xfs_btree_visit_block+0xfb/0x290 [xfs] xfs_btree_visit_blocks+0x215/0x2c0 [xfs] xfs_iread_extents+0x1a2/0x2e0 [xfs] xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs] iomap_iter+0x1d1/0x2d0 iomap_file_buffered_write+0x120/0x1a0 xfs_file_buffered_write+0x128/0x4b0 [xfs] vfs_write+0x675/0x890 ksys_write+0xc3/0x160 do_syscall_64+0x94/0x170 entry_SYSCALL_64_after_hwframe+0x71/0x79 Always preserve __GFP_NOLOCKDEP to fix this. Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: Andrey Ryabinin Reported-by: Xiubo Li Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/ Reported-by: Damien Le Moal Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/ Suggested-by: Dave Chinner Tested-by: Xiubo Li Cc: Christoph Hellwig Cc: Alexander Potapenko Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/stackdepot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 79e894cf84064..77eb944b7a6bd 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -466,10 +466,10 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic - * contexts and I/O. + * contexts, I/O, nolockdep. */ alloc_flags &= ~GFP_ZONEMASK; - alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); + alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP); alloc_flags |= __GFP_NOWARN; page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); if (page) -- GitLab From e3f0519da4d77e339314a6477b8bf89836812765 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 23 Apr 2024 13:50:53 +0200 Subject: [PATCH 1975/2290] fbdev: fix incorrect address computation in deferred IO commit 78d9161d2bcd442d93d917339297ffa057dbee8c upstream. With deferred IO enabled, a page fault happens when data is written to the framebuffer device. Then driver determines which page is being updated by calculating the offset of the written virtual address within the virtual memory area, and uses this offset to get the updated page within the internal buffer. This page is later copied to hardware (thus the name "deferred IO"). This offset calculation is only correct if the virtual memory area is mapped to the beginning of the internal buffer. Otherwise this is wrong. For example, if users do: mmap(ptr, 4096, PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, 0xff000); Then the virtual memory area will mapped at offset 0xff000 within the internal buffer. This offset 0xff000 is not accounted for, and wrong page is updated. Correct the calculation by using vmf->pgoff instead. With this change, the variable "offset" will no longer hold the exact offset value, but it is rounded down to multiples of PAGE_SIZE. But this is still correct, because this variable is only used to calculate the page offset. Reported-by: Harshit Mogalapalli Closes: https://lore.kernel.org/linux-fbdev/271372d6-e665-4e7f-b088-dee5f4ab341a@oracle.com Fixes: 56c134f7f1b5 ("fbdev: Track deferred-I/O pages in pageref struct") Cc: Signed-off-by: Nam Cao Reviewed-by: Thomas Zimmermann Tested-by: Harshit Mogalapalli Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240423115053.4490-1-namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fb_defio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 49883c8012e60..3b376345d4d47 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -200,7 +200,7 @@ err_mutex_unlock: */ static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_info *info, struct vm_fault *vmf) { - unsigned long offset = vmf->address - vmf->vma->vm_start; + unsigned long offset = vmf->pgoff << PAGE_SHIFT; struct page *page = vmf->page; file_update_time(vmf->vma->vm_file); -- GitLab From 4ebf1ff60e53ebbb0985d3ead8a454d6a8fad8cd Mon Sep 17 00:00:00 2001 From: Yick Xie Date: Fri, 19 Apr 2024 01:06:10 +0800 Subject: [PATCH 1976/2290] udp: preserve the connected status if only UDP cmsg commit 680d11f6e5427b6af1321932286722d24a8b16c1 upstream. If "udp_cmsg_send()" returned 0 (i.e. only UDP cmsg), "connected" should not be set to 0. Otherwise it stops the connected socket from using the cached route. Fixes: 2e8de8576343 ("udp: add gso segment cmsg") Signed-off-by: Yick Xie Cc: stable@vger.kernel.org Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240418170610.867084-1-yick.xie@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 5 +++-- net/ipv6/udp.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2a78c78186c37..39fae7581d350 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,16 +1141,17 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = udp_cmsg_send(sk, msg, &ipc.gso_size); - if (err > 0) + if (err > 0) { err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); + connected = 0; + } if (unlikely(err < 0)) { kfree(ipc.opt); return err; } if (ipc.opt) free = 1; - connected = 0; } if (!ipc.opt) { struct ip_options_rcu *inet_opt; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1775e9b9b85ad..504ea27d08fb0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1493,9 +1493,11 @@ do_udp_sendmsg: ipc6.opt = opt; err = udp_cmsg_send(sk, msg, &ipc6.gso_size); - if (err > 0) + if (err > 0) { err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); + connected = false; + } if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1507,7 +1509,6 @@ do_udp_sendmsg: } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; - connected = false; } if (!opt) { opt = txopt_get(np); -- GitLab From ad371d69a6785f886b7240bad2a7a993b7e34040 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Apr 2024 16:30:04 +0200 Subject: [PATCH 1977/2290] mtd: diskonchip: work around ubsan link failure commit 21c9fb611c25d5cd038f6fe485232e7884bb0b3d upstream. I ran into a randconfig build failure with UBSAN using gcc-13.2: arm-linux-gnueabi-ld: error: unplaced orphan section `.bss..Lubsan_data31' from `drivers/mtd/nand/raw/diskonchip.o' I'm not entirely sure what is going on here, but I suspect this has something to do with the check for the end of the doc_locations[] array that contains an (unsigned long)0xffffffff element, which is compared against the signed (int)0xffffffff. If this is the case, we should get a runtime check for undefined behavior, but we instead get an unexpected build-time error. I would have expected this to work fine on 32-bit architectures despite the signed integer overflow, though on 64-bit architectures this likely won't ever work. Changing the contition to instead check for the size of the array makes the code safe everywhere and avoids the ubsan check that leads to the link error. The loop code goes back to before 2.6.12. Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240405143015.717429-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/diskonchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 5d2ddb037a9a2..2068025d56396 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -53,7 +53,7 @@ static unsigned long doc_locations[] __initdata = { 0xe8000, 0xea000, 0xec000, 0xee000, #endif #endif - 0xffffffff }; +}; static struct mtd_info *doclist = NULL; @@ -1552,7 +1552,7 @@ static int __init init_nanddoc(void) if (ret < 0) return ret; } else { - for (i = 0; (doc_locations[i] != 0xffffffff); i++) { + for (i = 0; i < ARRAY_SIZE(doc_locations); i++) { doc_probe(doc_locations[i]); } } -- GitLab From 2bd852307fdcefe474ff59730aec3f397f1585ae Mon Sep 17 00:00:00 2001 From: Aswin Unnikrishnan Date: Fri, 19 Apr 2024 21:50:13 +0000 Subject: [PATCH 1978/2290] rust: remove `params` from `module` macro example commit 19843452dca40e28d6d3f4793d998b681d505c7f upstream. Remove argument `params` from the `module` macro example, because the macro does not currently support module parameters since it was not sent with the initial merge. Signed-off-by: Aswin Unnikrishnan Reviewed-by: Alice Ryhl Cc: stable@vger.kernel.org Fixes: 1fbde52bde73 ("rust: add `macros` crate") Link: https://lore.kernel.org/r/20240419215015.157258-1-aswinunni01@gmail.com [ Reworded slightly. ] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/macros/lib.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index 91764bfb1f893..f2efa86a747a3 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -27,18 +27,6 @@ use proc_macro::TokenStream; /// author: b"Rust for Linux Contributors", /// description: b"My very own kernel module!", /// license: b"GPL", -/// params: { -/// my_i32: i32 { -/// default: 42, -/// permissions: 0o000, -/// description: b"Example of i32", -/// }, -/// writeable_i32: i32 { -/// default: 42, -/// permissions: 0o644, -/// description: b"Example of i32", -/// }, -/// }, /// } /// /// struct MyModule; -- GitLab From a8e8c79ed2eb195e04b8119b7d83c4ddcccf9739 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 24 Apr 2024 11:20:35 +0300 Subject: [PATCH 1979/2290] x86/tdx: Preserve shared bit on mprotect() commit a0a8d15a798be4b8f20aca2ba91bf6b688c6a640 upstream. The TDX guest platform takes one bit from the physical address to indicate if the page is shared (accessible by VMM). This bit is not part of the physical_mask and is not preserved during mprotect(). As a result, the 'shared' bit is lost during mprotect() on shared mappings. _COMMON_PAGE_CHG_MASK specifies which PTE bits need to be preserved during modification. AMD includes 'sme_me_mask' in the define to preserve the 'encrypt' bit. To cover both Intel and AMD cases, include 'cc_mask' in _COMMON_PAGE_CHG_MASK instead of 'sme_me_mask'. Reported-and-tested-by: Chris Oo Fixes: 41394e33f3a0 ("x86/tdx: Extend the confidential computing API to support TDX guests") Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Rick Edgecombe Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Tom Lendacky Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240424082035.4092071-1-kirill.shutemov%40linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/coco.h | 5 ++++- arch/x86/include/asm/pgtable_types.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 1f97d00ad8588..100a752c33bb3 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -13,9 +13,10 @@ enum cc_vendor { }; extern enum cc_vendor cc_vendor; -extern u64 cc_mask; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +extern u64 cc_mask; + static inline void cc_set_mask(u64 mask) { RIP_REL_REF(cc_mask) = mask; @@ -25,6 +26,8 @@ u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); void cc_random_init(void); #else +static const u64 cc_mask = 0; + static inline u64 cc_mkenc(u64 val) { return val; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f6116b66f2892..f0b9b37c4609b 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -127,7 +127,7 @@ */ #define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_CC | \ _PAGE_UFFD_WP) #define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) #define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) @@ -153,6 +153,7 @@ enum page_cache_mode { }; #endif +#define _PAGE_CC (_AT(pteval_t, cc_mask)) #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) #define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) -- GitLab From 56bce3fcf8471f93bfb00d4d9e08fa3d18c82189 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2024 14:21:07 +0100 Subject: [PATCH 1980/2290] dmaengine: owl: fix register access functions [ Upstream commit 43c633ef93a5d293c96ebcedb40130df13128428 ] When building with 'make W=1', clang notices that the computed register values are never actually written back but instead the wrong variable is set: drivers/dma/owl-dma.c:244:6: error: variable 'regval' set but not used [-Werror,-Wunused-but-set-variable] 244 | u32 regval; | ^ drivers/dma/owl-dma.c:268:6: error: variable 'regval' set but not used [-Werror,-Wunused-but-set-variable] 268 | u32 regval; | ^ Change these to what was most likely intended. Fixes: 47e20577c24d ("dmaengine: Add Actions Semi Owl family S900 DMA driver") Signed-off-by: Arnd Bergmann Reviewed-by: Peter Korsgaard Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240322132116.906475-1-arnd@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/owl-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c index b6e0ac8314e5c..0819f19c87cc5 100644 --- a/drivers/dma/owl-dma.c +++ b/drivers/dma/owl-dma.c @@ -249,7 +249,7 @@ static void pchan_update(struct owl_dma_pchan *pchan, u32 reg, else regval &= ~val; - writel(val, pchan->base + reg); + writel(regval, pchan->base + reg); } static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data) @@ -273,7 +273,7 @@ static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state) else regval &= ~val; - writel(val, od->base + reg); + writel(regval, od->base + reg); } static void dma_writel(struct owl_dma *od, u32 reg, u32 data) -- GitLab From 33d8e3e5f36468c30b610e810169dc2724b06013 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Fri, 15 Mar 2024 18:14:11 +0530 Subject: [PATCH 1981/2290] dmaengine: tegra186: Fix residual calculation [ Upstream commit 30f0ced9971b2d8c8c24ae75786f9079489a012d ] The existing residual calculation returns an incorrect value when bytes_xfer == bytes_req. This scenario occurs particularly with drivers like UART where DMA is scheduled for maximum number of bytes and is terminated when the bytes inflow stops. At higher baud rates, it could request the tx_status while there is no bytes left to transfer. This will lead to incorrect residual being set. Hence return residual as '0' when bytes transferred equals to the bytes requested. Fixes: ee17028009d4 ("dmaengine: tegra: Add tegra gpcdma driver") Signed-off-by: Akhil R Reviewed-by: Jon Hunter Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20240315124411.17582-1-akhilrajeev@nvidia.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/tegra186-gpc-dma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c index 75af3488a3baf..e70b7c41dcab7 100644 --- a/drivers/dma/tegra186-gpc-dma.c +++ b/drivers/dma/tegra186-gpc-dma.c @@ -742,6 +742,9 @@ static int tegra_dma_get_residual(struct tegra_dma_channel *tdc) bytes_xfer = dma_desc->bytes_xfer + sg_req[dma_desc->sg_idx].len - (wcount * 4); + if (dma_desc->bytes_req == bytes_xfer) + return 0; + residual = dma_desc->bytes_req - (bytes_xfer % dma_desc->bytes_req); return residual; -- GitLab From 2203a447fd1e71a9b92590db7fc942a7df92b031 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Mar 2024 14:04:21 +0200 Subject: [PATCH 1982/2290] idma64: Don't try to serve interrupts when device is powered off [ Upstream commit 9140ce47872bfd89fca888c2f992faa51d20c2bc ] When iDMA 64-bit device is powered off, the IRQ status register is all 1:s. This is never happen in real case and signalling that the device is simply powered off. Don't try to serve interrupts that are not ours. Fixes: 667dfed98615 ("dmaengine: add a driver for Intel integrated DMA 64-bit") Reported-by: Heiner Kallweit Closes: https://lore.kernel.org/r/700bbb84-90e1-4505-8ff0-3f17ea8bc631@gmail.com Tested-by: Heiner Kallweit Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240321120453.1360138-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/idma64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index f4c07ad3be15b..af8777a1ec2e3 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -167,6 +167,10 @@ static irqreturn_t idma64_irq(int irq, void *dev) u32 status_err; unsigned short i; + /* Since IRQ may be shared, check if DMA controller is powered on */ + if (status == GENMASK(31, 0)) + return IRQ_NONE; + dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status); /* Check if we have any interrupt from the DMA controller */ -- GitLab From 976df695f579bbb2914114b4e9974fe4ed1eb813 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 21 Mar 2024 19:47:30 +0300 Subject: [PATCH 1983/2290] phy: marvell: a3700-comphy: Fix out of bounds read [ Upstream commit e4308bc22b9d46cf33165c9dfaeebcf29cd56f04 ] There is an out of bounds read access of 'gbe_phy_init_fix[fix_idx].addr' every iteration after 'fix_idx' reaches 'ARRAY_SIZE(gbe_phy_init_fix)'. Make sure 'gbe_phy_init[addr]' is used when all elements of 'gbe_phy_init_fix' array are handled. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation") Signed-off-by: Mikhail Kobuk Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20240321164734.49273-1-m.kobuk@ispras.ru Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index d641b345afa35..392a8ae1bc667 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -610,11 +610,12 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane, * comparison to 3.125 Gbps values. These register values are * stored in "gbe_phy_init_fix" array. */ - if (!is_1gbps && gbe_phy_init_fix[fix_idx].addr == addr) { + if (!is_1gbps && + fix_idx < ARRAY_SIZE(gbe_phy_init_fix) && + gbe_phy_init_fix[fix_idx].addr == addr) { /* Use new value */ val = gbe_phy_init_fix[fix_idx].value; - if (fix_idx < ARRAY_SIZE(gbe_phy_init_fix)) - fix_idx++; + fix_idx++; } else { val = gbe_phy_init[addr]; } -- GitLab From d6a6bacd0a118db9a02d1f8e54182a2ec1b13e36 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 21 Mar 2024 19:47:31 +0300 Subject: [PATCH 1984/2290] phy: marvell: a3700-comphy: Fix hardcoded array size [ Upstream commit 627207703b73615653eea5ab7a841d5b478d961e ] Replace hardcoded 'gbe_phy_init' array size by explicit one. Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation") Signed-off-by: Mikhail Kobuk Link: https://lore.kernel.org/r/20240321164734.49273-2-m.kobuk@ispras.ru Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 392a8ae1bc667..251e1aedd4a6e 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -602,7 +602,7 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane, u16 val; fix_idx = 0; - for (addr = 0; addr < 512; addr++) { + for (addr = 0; addr < ARRAY_SIZE(gbe_phy_init); addr++) { /* * All PHY register values are defined in full for 3.125Gbps * SERDES speed. The values required for 1.25 Gbps are almost -- GitLab From ed4b981b1d8671b2aabbf8cf128ba8829471c6b6 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:47:01 +0800 Subject: [PATCH 1985/2290] phy: freescale: imx8m-pcie: Refine i.MX8MM PCIe PHY driver [ Upstream commit ca679c49c4463595499a053ba94328acb574fffa ] To make it more flexible and easy to expand. Refine i.MX8MM PCIe PHY driver. - Use gpr compatible string to avoid the codes duplications when add another platform PCIe PHY support. - Re-arrange the codes to let it more flexible and easy to expand. No functional change. Re-arrange the TX tuning, since internal registers can be wrote through APB interface before assertion of CMN_RST. Signed-off-by: Richard Zhu Signed-off-by: Lucas Stach Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Reviewed-by: Ahmad Fatoum Link: https://lore.kernel.org/r/1665625622-20551-4-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul Stable-dep-of: 3a161017f1de ("phy: freescale: imx8m-pcie: fix pcie link-up instability") Signed-off-by: Sasha Levin --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 106 +++++++++++++-------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index c93286483b425..f1476936b8d9a 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,15 @@ #define IMX8MM_GPR_PCIE_SSC_EN BIT(16) #define IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE BIT(9) +enum imx8_pcie_phy_type { + IMX8MM, +}; + +struct imx8_pcie_phy_drvdata { + const char *gpr; + enum imx8_pcie_phy_type variant; +}; + struct imx8_pcie_phy { void __iomem *base; struct clk *clk; @@ -57,6 +67,7 @@ struct imx8_pcie_phy { u32 tx_deemph_gen1; u32 tx_deemph_gen2; bool clkreq_unused; + const struct imx8_pcie_phy_drvdata *drvdata; }; static int imx8_pcie_phy_power_on(struct phy *phy) @@ -68,31 +79,17 @@ static int imx8_pcie_phy_power_on(struct phy *phy) reset_control_assert(imx8_phy->reset); pad_mode = imx8_phy->refclk_pad_mode; - /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */ - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE, - imx8_phy->clkreq_unused ? - 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_AUX_EN, - IMX8MM_GPR_PCIE_AUX_EN); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_POWER_OFF, 0); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_SSC_EN, 0); - - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_REF_CLK_SEL, - pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ? - IMX8MM_GPR_PCIE_REF_CLK_EXT : - IMX8MM_GPR_PCIE_REF_CLK_PLL); - usleep_range(100, 200); - - /* Do the PHY common block reset */ - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_CMN_RST, - IMX8MM_GPR_PCIE_CMN_RST); - usleep_range(200, 500); + switch (imx8_phy->drvdata->variant) { + case IMX8MM: + /* Tune PHY de-emphasis setting to pass PCIe compliance. */ + if (imx8_phy->tx_deemph_gen1) + writel(imx8_phy->tx_deemph_gen1, + imx8_phy->base + PCIE_PHY_TRSV_REG5); + if (imx8_phy->tx_deemph_gen2) + writel(imx8_phy->tx_deemph_gen2, + imx8_phy->base + PCIE_PHY_TRSV_REG6); + break; + } if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT || pad_mode == IMX8_PCIE_REFCLK_PAD_UNUSED) { @@ -120,15 +117,37 @@ static int imx8_pcie_phy_power_on(struct phy *phy) imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG065); } - /* Tune PHY de-emphasis setting to pass PCIe compliance. */ - if (imx8_phy->tx_deemph_gen1) - writel(imx8_phy->tx_deemph_gen1, - imx8_phy->base + PCIE_PHY_TRSV_REG5); - if (imx8_phy->tx_deemph_gen2) - writel(imx8_phy->tx_deemph_gen2, - imx8_phy->base + PCIE_PHY_TRSV_REG6); + /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */ + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE, + imx8_phy->clkreq_unused ? + 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_AUX_EN, + IMX8MM_GPR_PCIE_AUX_EN); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_POWER_OFF, 0); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_SSC_EN, 0); - reset_control_deassert(imx8_phy->reset); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_REF_CLK_SEL, + pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ? + IMX8MM_GPR_PCIE_REF_CLK_EXT : + IMX8MM_GPR_PCIE_REF_CLK_PLL); + usleep_range(100, 200); + + /* Do the PHY common block reset */ + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_CMN_RST, + IMX8MM_GPR_PCIE_CMN_RST); + + switch (imx8_phy->drvdata->variant) { + case IMX8MM: + reset_control_deassert(imx8_phy->reset); + usleep_range(200, 500); + break; + } /* Polling to check the phy is ready or not. */ ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG75, @@ -160,6 +179,17 @@ static const struct phy_ops imx8_pcie_phy_ops = { .owner = THIS_MODULE, }; +static const struct imx8_pcie_phy_drvdata imx8mm_drvdata = { + .gpr = "fsl,imx8mm-iomuxc-gpr", + .variant = IMX8MM, +}; + +static const struct of_device_id imx8_pcie_phy_of_match[] = { + {.compatible = "fsl,imx8mm-pcie-phy", .data = &imx8mm_drvdata, }, + { }, +}; +MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); + static int imx8_pcie_phy_probe(struct platform_device *pdev) { struct phy_provider *phy_provider; @@ -172,6 +202,8 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) if (!imx8_phy) return -ENOMEM; + imx8_phy->drvdata = of_device_get_match_data(dev); + /* get PHY refclk pad mode */ of_property_read_u32(np, "fsl,refclk-pad-mode", &imx8_phy->refclk_pad_mode); @@ -197,7 +229,7 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) /* Grab GPR config register range */ imx8_phy->iomuxc_gpr = - syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); + syscon_regmap_lookup_by_compatible(imx8_phy->drvdata->gpr); if (IS_ERR(imx8_phy->iomuxc_gpr)) { dev_err(dev, "unable to find iomuxc registers\n"); return PTR_ERR(imx8_phy->iomuxc_gpr); @@ -225,12 +257,6 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(phy_provider); } -static const struct of_device_id imx8_pcie_phy_of_match[] = { - {.compatible = "fsl,imx8mm-pcie-phy",}, - { }, -}; -MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); - static struct platform_driver imx8_pcie_phy_driver = { .probe = imx8_pcie_phy_probe, .driver = { -- GitLab From 4723dfe76de0c91ab70bda148b5c2af664a46c80 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Fri, 22 Mar 2024 14:06:32 +0100 Subject: [PATCH 1986/2290] phy: freescale: imx8m-pcie: fix pcie link-up instability [ Upstream commit 3a161017f1de55cc48be81f6156004c151f32677 ] Leaving AUX_PLL_REFCLK_SEL at its reset default of AUX_IN (PLL clock) proves to be more stable on the i.MX 8M Mini. Fixes: 1aa97b002258 ("phy: freescale: pcie: Initialize the imx8 pcie standalone phy driver") Signed-off-by: Marcel Ziswiler Reviewed-by: Richard Zhu Link: https://lore.kernel.org/r/20240322130646.1016630-2-marcel@ziswiler.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index f1476936b8d9a..211ce84d980f9 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -108,8 +108,10 @@ static int imx8_pcie_phy_power_on(struct phy *phy) /* Source clock from SoC internal PLL */ writel(ANA_PLL_CLK_OUT_TO_EXT_IO_SEL, imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG062); - writel(AUX_PLL_REFCLK_SEL_SYS_PLL, - imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063); + if (imx8_phy->drvdata->variant != IMX8MM) { + writel(AUX_PLL_REFCLK_SEL_SYS_PLL, + imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063); + } val = ANA_AUX_RX_TX_SEL_TX | ANA_AUX_TX_TERM; writel(val | ANA_AUX_RX_TERM_GND_EN, imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG064); -- GitLab From 199895b8b6f7a24dd4c40ef3cc678d6f41a8852a Mon Sep 17 00:00:00 2001 From: Michal Tomek Date: Thu, 4 Apr 2024 19:11:26 +0200 Subject: [PATCH 1987/2290] phy: rockchip-snps-pcie3: fix bifurcation on rk3588 [ Upstream commit f8020dfb311d2b6cf657668792aaa5fa8863a7dd ] So far all RK3588 boards use fully aggregated PCIe. CM3588 is one of the few boards using this feature and apparently it is broken. The PHY offers the following mapping options: port 0 lane 0 - always mapped to controller 0 (4L) port 0 lane 1 - to controller 0 or 2 (1L0) port 1 lane 0 - to controller 0 or 1 (2L) port 1 lane 1 - to controller 0, 1 or 3 (1L1) The data-lanes DT property maps these as follows: 0 = no controller (unsupported by the HW) 1 = 4L 2 = 2L 3 = 1L0 4 = 1L1 That allows the following configurations with first column being the mainline data-lane mapping, second column being the downstream name, third column being PCIE3PHY_GRF_CMN_CON0 and PHP_GRF_PCIESEL register values and final column being the user visible lane setup: <1 1 1 1> = AGGREG = [4 0] = x4 (aggregation) <1 1 2 2> = NANBNB = [0 0] = x2 x2 (no bif.) <1 3 2 2> = NANBBI = [1 1] = x2 x1x1 (bif. of port 0) <1 1 2 4> = NABINB = [2 2] = x1x1 x2 (bif. of port 1) <1 3 2 4> = NABIBI = [3 3] = x1x1 x1x1 (bif. of both ports) The driver currently does not program PHP_GRF_PCIESEL correctly, which is fixed by this patch. As a side-effect the new logic is much simpler than the old logic. Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3") Signed-off-by: Michal Tomek Signed-off-by: Sebastian Reichel Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-1-9907136eeafd@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- .../phy/rockchip/phy-rockchip-snps-pcie3.c | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c index 1d355b32ba559..4f32a2dc24580 100644 --- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c +++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c @@ -131,7 +131,7 @@ static const struct rockchip_p3phy_ops rk3568_ops = { static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) { u32 reg = 0; - u8 mode = 0; + u8 mode = RK3588_LANE_AGGREGATION; /* default */ int ret; /* Deassert PCIe PMA output clamp mode */ @@ -139,28 +139,20 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) /* Set bifurcation if needed */ for (int i = 0; i < priv->num_lanes; i++) { - if (!priv->lanes[i]) - mode |= (BIT(i) << 3); - if (priv->lanes[i] > 1) - mode |= (BIT(i) >> 1); - } - - if (!mode) - reg = RK3588_LANE_AGGREGATION; - else { - if (mode & (BIT(0) | BIT(1))) - reg |= RK3588_BIFURCATION_LANE_0_1; - - if (mode & (BIT(2) | BIT(3))) - reg |= RK3588_BIFURCATION_LANE_2_3; + mode &= ~RK3588_LANE_AGGREGATION; + if (priv->lanes[i] == 3) + mode |= RK3588_BIFURCATION_LANE_0_1; + if (priv->lanes[i] == 4) + mode |= RK3588_BIFURCATION_LANE_2_3; } + reg = mode; regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg); /* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */ if (!IS_ERR(priv->pipe_grf)) { - reg = (mode & (BIT(6) | BIT(7))) >> 6; + reg = mode & 3; if (reg) regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON, (reg << 16) | reg); -- GitLab From e71d5ec7c04362cdcffd51027b97f35cace4b616 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 4 Apr 2024 19:11:27 +0200 Subject: [PATCH 1988/2290] phy: rockchip-snps-pcie3: fix clearing PHP_GRF_PCIESEL_CON bits [ Upstream commit 55491a5fa163bf15158f34f3650b3985f25622b9 ] Currently the PCIe v3 PHY driver only sets the pcie1ln_sel bits, but does not clear them because of an incorrect write mask. This fixes up the issue by using a newly introduced constant for the write mask. While at it also introduces a proper GENMASK based constant for the PCIE30_PHY_MODE. Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3") Signed-off-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-2-9907136eeafd@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/rockchip/phy-rockchip-snps-pcie3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c index 4f32a2dc24580..c6aa6bc69e900 100644 --- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c +++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c @@ -39,6 +39,8 @@ #define RK3588_BIFURCATION_LANE_0_1 BIT(0) #define RK3588_BIFURCATION_LANE_2_3 BIT(1) #define RK3588_LANE_AGGREGATION BIT(2) +#define RK3588_PCIE1LN_SEL_EN (GENMASK(1, 0) << 16) +#define RK3588_PCIE30_PHY_MODE_EN (GENMASK(2, 0) << 16) struct rockchip_p3phy_ops; @@ -148,14 +150,15 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) } reg = mode; - regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg); + regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, + RK3588_PCIE30_PHY_MODE_EN | reg); /* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */ if (!IS_ERR(priv->pipe_grf)) { - reg = mode & 3; + reg = mode & (RK3588_BIFURCATION_LANE_0_1 | RK3588_BIFURCATION_LANE_2_3); if (reg) regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON, - (reg << 16) | reg); + RK3588_PCIE1LN_SEL_EN | reg); } reset_control_deassert(priv->p30phy); -- GitLab From 8bf574183282d219cfa991f7df37aad491d74c11 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 8 Mar 2024 16:00:32 -0500 Subject: [PATCH 1989/2290] dma: xilinx_dpdma: Fix locking [ Upstream commit 244296cc3a155199a8b080d19e645d7d49081a38 ] There are several places where either chan->lock or chan->vchan.lock was not held. Add appropriate locking. This fixes lockdep warnings like [ 31.077578] ------------[ cut here ]------------ [ 31.077831] WARNING: CPU: 2 PID: 40 at drivers/dma/xilinx/xilinx_dpdma.c:834 xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.077953] Modules linked in: [ 31.078019] CPU: 2 PID: 40 Comm: kworker/u12:1 Not tainted 6.6.20+ #98 [ 31.078102] Hardware name: xlnx,zynqmp (DT) [ 31.078169] Workqueue: events_unbound deferred_probe_work_func [ 31.078272] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 31.078377] pc : xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.078473] lr : xilinx_dpdma_chan_queue_transfer+0x270/0x5e0 [ 31.078550] sp : ffffffc083bb2e10 [ 31.078590] x29: ffffffc083bb2e10 x28: 0000000000000000 x27: ffffff880165a168 [ 31.078754] x26: ffffff880164e920 x25: ffffff880164eab8 x24: ffffff880164d480 [ 31.078920] x23: ffffff880165a148 x22: ffffff880164e988 x21: 0000000000000000 [ 31.079132] x20: ffffffc082aa3000 x19: ffffff880164e880 x18: 0000000000000000 [ 31.079295] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 31.079453] x14: 0000000000000000 x13: ffffff8802263dc0 x12: 0000000000000001 [ 31.079613] x11: 0001ffc083bb2e34 x10: 0001ff880164e98f x9 : 0001ffc082aa3def [ 31.079824] x8 : 0001ffc082aa3dec x7 : 0000000000000000 x6 : 0000000000000516 [ 31.079982] x5 : ffffffc7f8d43000 x4 : ffffff88003c9c40 x3 : ffffffffffffffff [ 31.080147] x2 : ffffffc7f8d43000 x1 : 00000000000000c0 x0 : 0000000000000000 [ 31.080307] Call trace: [ 31.080340] xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.080518] xilinx_dpdma_issue_pending+0x11c/0x120 [ 31.080595] zynqmp_disp_layer_update+0x180/0x3ac [ 31.080712] zynqmp_dpsub_plane_atomic_update+0x11c/0x21c [ 31.080825] drm_atomic_helper_commit_planes+0x20c/0x684 [ 31.080951] drm_atomic_helper_commit_tail+0x5c/0xb0 [ 31.081139] commit_tail+0x234/0x294 [ 31.081246] drm_atomic_helper_commit+0x1f8/0x210 [ 31.081363] drm_atomic_commit+0x100/0x140 [ 31.081477] drm_client_modeset_commit_atomic+0x318/0x384 [ 31.081634] drm_client_modeset_commit_locked+0x8c/0x24c [ 31.081725] drm_client_modeset_commit+0x34/0x5c [ 31.081812] __drm_fb_helper_restore_fbdev_mode_unlocked+0x104/0x168 [ 31.081899] drm_fb_helper_set_par+0x50/0x70 [ 31.081971] fbcon_init+0x538/0xc48 [ 31.082047] visual_init+0x16c/0x23c [ 31.082207] do_bind_con_driver.isra.0+0x2d0/0x634 [ 31.082320] do_take_over_console+0x24c/0x33c [ 31.082429] do_fbcon_takeover+0xbc/0x1b0 [ 31.082503] fbcon_fb_registered+0x2d0/0x34c [ 31.082663] register_framebuffer+0x27c/0x38c [ 31.082767] __drm_fb_helper_initial_config_and_unlock+0x5c0/0x91c [ 31.082939] drm_fb_helper_initial_config+0x50/0x74 [ 31.083012] drm_fbdev_dma_client_hotplug+0xb8/0x108 [ 31.083115] drm_client_register+0xa0/0xf4 [ 31.083195] drm_fbdev_dma_setup+0xb0/0x1cc [ 31.083293] zynqmp_dpsub_drm_init+0x45c/0x4e0 [ 31.083431] zynqmp_dpsub_probe+0x444/0x5e0 [ 31.083616] platform_probe+0x8c/0x13c [ 31.083713] really_probe+0x258/0x59c [ 31.083793] __driver_probe_device+0xc4/0x224 [ 31.083878] driver_probe_device+0x70/0x1c0 [ 31.083961] __device_attach_driver+0x108/0x1e0 [ 31.084052] bus_for_each_drv+0x9c/0x100 [ 31.084125] __device_attach+0x100/0x298 [ 31.084207] device_initial_probe+0x14/0x20 [ 31.084292] bus_probe_device+0xd8/0xdc [ 31.084368] deferred_probe_work_func+0x11c/0x180 [ 31.084451] process_one_work+0x3ac/0x988 [ 31.084643] worker_thread+0x398/0x694 [ 31.084752] kthread+0x1bc/0x1c0 [ 31.084848] ret_from_fork+0x10/0x20 [ 31.084932] irq event stamp: 64549 [ 31.084970] hardirqs last enabled at (64548): [] _raw_spin_unlock_irqrestore+0x80/0x90 [ 31.085157] hardirqs last disabled at (64549): [] _raw_spin_lock_irqsave+0xc0/0xdc [ 31.085277] softirqs last enabled at (64503): [] __do_softirq+0x47c/0x500 [ 31.085390] softirqs last disabled at (64498): [] ____do_softirq+0x10/0x1c [ 31.085501] ---[ end trace 0000000000000000 ]--- Fixes: 7cbb0c63de3f ("dmaengine: xilinx: dpdma: Add the Xilinx DisplayPort DMA engine driver") Signed-off-by: Sean Anderson Reviewed-by: Tomi Valkeinen Link: https://lore.kernel.org/r/20240308210034.3634938-2-sean.anderson@linux.dev Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dpdma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index 84dc5240a8074..93938ed80fc83 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -214,7 +214,8 @@ struct xilinx_dpdma_tx_desc { * @running: true if the channel is running * @first_frame: flag for the first frame of stream * @video_group: flag if multi-channel operation is needed for video channels - * @lock: lock to access struct xilinx_dpdma_chan + * @lock: lock to access struct xilinx_dpdma_chan. Must be taken before + * @vchan.lock, if both are to be held. * @desc_pool: descriptor allocation pool * @err_task: error IRQ bottom half handler * @desc: References to descriptors being processed @@ -1097,12 +1098,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) * Complete the active descriptor, if any, promote the pending * descriptor to active, and queue the next transfer, if any. */ + spin_lock(&chan->vchan.lock); if (chan->desc.active) vchan_cookie_complete(&chan->desc.active->vdesc); chan->desc.active = pending; chan->desc.pending = NULL; xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); out: spin_unlock_irqrestore(&chan->lock, flags); @@ -1264,10 +1267,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan) struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); unsigned long flags; - spin_lock_irqsave(&chan->vchan.lock, flags); + spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); if (vchan_issue_pending(&chan->vchan)) xilinx_dpdma_chan_queue_transfer(chan); - spin_unlock_irqrestore(&chan->vchan.lock, flags); + spin_unlock(&chan->vchan.lock); + spin_unlock_irqrestore(&chan->lock, flags); } static int xilinx_dpdma_config(struct dma_chan *dchan, @@ -1495,7 +1500,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t) XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id); spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); spin_unlock_irqrestore(&chan->lock, flags); } -- GitLab From 023b6390a15a98f9c3aa5e7da78d485d5384a08e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 13 Mar 2024 14:40:31 -0700 Subject: [PATCH 1990/2290] dmaengine: idxd: Fix oops during rmmod on single-CPU platforms [ Upstream commit f221033f5c24659dc6ad7e5cf18fb1b075f4a8be ] During the removal of the idxd driver, registered offline callback is invoked as part of the clean up process. However, on systems with only one CPU online, no valid target is available to migrate the perf context, resulting in a kernel oops: BUG: unable to handle page fault for address: 000000000002a2b8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 1470e1067 P4D 0 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 20 Comm: cpuhp/0 Not tainted 6.8.0-rc6-dsa+ #57 Hardware name: Intel Corporation AvenueCity/AvenueCity, BIOS BHSDCRB1.86B.2492.D03.2307181620 07/18/2023 RIP: 0010:mutex_lock+0x2e/0x50 ... Call Trace: __die+0x24/0x70 page_fault_oops+0x82/0x160 do_user_addr_fault+0x65/0x6b0 __pfx___rdmsr_safe_on_cpu+0x10/0x10 exc_page_fault+0x7d/0x170 asm_exc_page_fault+0x26/0x30 mutex_lock+0x2e/0x50 mutex_lock+0x1e/0x50 perf_pmu_migrate_context+0x87/0x1f0 perf_event_cpu_offline+0x76/0x90 [idxd] cpuhp_invoke_callback+0xa2/0x4f0 __pfx_perf_event_cpu_offline+0x10/0x10 [idxd] cpuhp_thread_fun+0x98/0x150 smpboot_thread_fn+0x27/0x260 smpboot_thread_fn+0x1af/0x260 __pfx_smpboot_thread_fn+0x10/0x10 kthread+0x103/0x140 __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fix the issue by preventing the migration of the perf context to an invalid target. Fixes: 81dd4d4d6178 ("dmaengine: idxd: Add IDXD performance monitor support") Reported-by: Terrence Xu Tested-by: Terrence Xu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20240313214031.1658045-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/idxd/perfmon.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c index d73004f47cf4b..612ef13b71603 100644 --- a/drivers/dma/idxd/perfmon.c +++ b/drivers/dma/idxd/perfmon.c @@ -529,14 +529,11 @@ static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; target = cpumask_any_but(cpu_online_mask, cpu); - /* migrate events if there is a valid target */ - if (target < nr_cpu_ids) + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); - else - target = -1; - - perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + } return 0; } -- GitLab From 0b947c90e3aa8f381683984e4100a93971943c10 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 5 Dec 2023 11:02:55 +0800 Subject: [PATCH 1991/2290] riscv: fix VMALLOC_START definition [ Upstream commit ac88ff6b9d7dea9f0907c86bdae204dde7d5c0e6 ] When below config items are set, compiler complained: -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y ...... ----------------------- ------------------------------------------------------------------- arch/riscv/kernel/crash_core.c: In function 'arch_crash_save_vmcoreinfo': arch/riscv/kernel/crash_core.c:11:58: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'int' [-Wformat=] 11 | vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); | ~~^ | | | long unsigned int | %x ---------------------------------------------------------------------- This is because on riscv macro VMALLOC_START has different type when CONFIG_MMU is set or unset. arch/riscv/include/asm/pgtable.h: -------------------------------------------------- Changing it to _AC(0, UL) in case CONFIG_MMU=n can fix the warning. Link: https://lkml.kernel.org/r/ZW7OsX4zQRA3mO4+@MiWiFi-R3L-srv Signed-off-by: Baoquan He Reported-by: Randy Dunlap Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Cc: Eric DeVolder Cc: Ignat Korchagin Cc: Stephen Rothwell Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Signed-off-by: Andrew Morton Stable-dep-of: 6065e736f82c ("riscv: Fix TASK_SIZE on 64-bit NOMMU") Signed-off-by: Sasha Levin --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 63055c6ad2c25..73fe12c93cad1 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -800,7 +800,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define TASK_SIZE 0xffffffffUL -#define VMALLOC_START 0 +#define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE #endif /* !CONFIG_MMU */ -- GitLab From 4201b8c8f2c32af321fb50867e68ac6c1cbed4be Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 26 Feb 2024 16:34:46 -0800 Subject: [PATCH 1992/2290] riscv: Fix TASK_SIZE on 64-bit NOMMU [ Upstream commit 6065e736f82c817c9a597a31ee67f0ce4628e948 ] On NOMMU, userspace memory can come from anywhere in physical RAM. The current definition of TASK_SIZE is wrong if any RAM exists above 4G, causing spurious failures in the userspace access routines. Fixes: 6bd33e1ece52 ("riscv: add nommu support") Fixes: c3f896dcf1e4 ("mm: switch the test_vmalloc module to use __vmalloc_node") Signed-off-by: Samuel Holland Reviewed-by: Jisheng Zhang Reviewed-by: Bo Gan Link: https://lore.kernel.org/r/20240227003630.3634533-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 73fe12c93cad1..2d9416a6a070e 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -799,7 +799,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL -#define TASK_SIZE 0xffffffffUL +#define TASK_SIZE _AC(-1, UL) #define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE -- GitLab From 25b3498485ac281e5851700e33b97f12c9533fd8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 6 Apr 2024 16:08:21 +0200 Subject: [PATCH 1993/2290] phy: ti: tusb1210: Resolve charger-det crash if charger psy is unregistered [ Upstream commit bf6e4ee5c43690e4c5a8a057bbcd4ff986bed052 ] The power_supply frame-work is not really designed for there to be long living in kernel references to power_supply devices. Specifically unregistering a power_supply while some other code has a reference to it triggers a WARN in power_supply_unregister(): WARN_ON(atomic_dec_return(&psy->use_cnt)); Folllowed by the power_supply still getting removed and the backing data freed anyway, leaving the tusb1210 charger-detect code with a dangling reference, resulting in a crash the next time tusb1210_get_online() is called. Fix this by only holding the reference in tusb1210_get_online() freeing it at the end of the function. Note this still leaves a theoretical race window, but it avoids the issue when manually rmmod-ing the charger chip driver during development. Fixes: 48969a5623ed ("phy: ti: tusb1210: Add charger detection") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240406140821.18624-1-hdegoede@redhat.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/ti/phy-tusb1210.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index 669c13d6e402f..bdd44ec3e8098 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -64,7 +64,6 @@ struct tusb1210 { struct delayed_work chg_det_work; struct notifier_block psy_nb; struct power_supply *psy; - struct power_supply *charger; #endif }; @@ -230,19 +229,24 @@ static const char * const tusb1210_chargers[] = { static bool tusb1210_get_online(struct tusb1210 *tusb) { + struct power_supply *charger = NULL; union power_supply_propval val; - int i; + bool online = false; + int i, ret; - for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !tusb->charger; i++) - tusb->charger = power_supply_get_by_name(tusb1210_chargers[i]); + for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !charger; i++) + charger = power_supply_get_by_name(tusb1210_chargers[i]); - if (!tusb->charger) + if (!charger) return false; - if (power_supply_get_property(tusb->charger, POWER_SUPPLY_PROP_ONLINE, &val)) - return false; + ret = power_supply_get_property(charger, POWER_SUPPLY_PROP_ONLINE, &val); + if (ret == 0) + online = val.intval; + + power_supply_put(charger); - return val.intval; + return online; } static void tusb1210_chg_det_work(struct work_struct *work) @@ -466,9 +470,6 @@ static void tusb1210_remove_charger_detect(struct tusb1210 *tusb) cancel_delayed_work_sync(&tusb->chg_det_work); power_supply_unregister(tusb->psy); } - - if (tusb->charger) - power_supply_put(tusb->charger); } #else static void tusb1210_probe_charger_detect(struct tusb1210 *tusb) { } -- GitLab From 4e75e222d397c6752b229ed72fc4644c8c36ecde Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 26 Apr 2024 08:44:08 +0200 Subject: [PATCH 1994/2290] i2c: smbus: fix NULL function pointer dereference [ Upstream commit 91811a31b68d3765b3065f4bb6d7d6d84a7cfc9f ] Baruch reported an OOPS when using the designware controller as target only. Target-only modes break the assumption of one transfer function always being available. Fix this by always checking the pointer in __i2c_transfer. Reported-by: Baruch Siach Closes: https://lore.kernel.org/r/4269631780e5ba789cf1ae391eec1b959def7d99.1712761976.git.baruch@tkos.co.il Fixes: 4b1acc43331d ("i2c: core changes for slave support") [wsa: dropped the simplification in core-smbus to avoid theoretical regressions] Signed-off-by: Wolfram Sang Tested-by: Baruch Siach Signed-off-by: Sasha Levin --- drivers/i2c/i2c-core-base.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 5e3976ba52650..1ebc953799149 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2075,13 +2075,18 @@ static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs, * Returns negative errno, else the number of messages executed. * * Adapter lock must be held when calling this function. No debug logging - * takes place. adap->algo->master_xfer existence isn't checked. + * takes place. */ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { unsigned long orig_jiffies; int ret, try; + if (!adap->algo->master_xfer) { + dev_dbg(&adap->dev, "I2C level transfers not supported\n"); + return -EOPNOTSUPP; + } + if (WARN_ON(!msgs || num < 1)) return -EINVAL; @@ -2148,11 +2153,6 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { int ret; - if (!adap->algo->master_xfer) { - dev_dbg(&adap->dev, "I2C level transfers not supported\n"); - return -EOPNOTSUPP; - } - /* REVISIT the fault reporting model here is weak: * * - When we get an error after receiving N bytes from a slave, -- GitLab From 9b7c5004d7c5ae062134052a85290869a015814c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 29 Apr 2024 15:47:51 +0100 Subject: [PATCH 1995/2290] bounds: Use the right number of bits for power-of-two CONFIG_NR_CPUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5af385f5f4cddf908f663974847a4083b2ff2c79 upstream. bits_per() rounds up to the next power of two when passed a power of two. This causes crashes on some machines and configurations. Reported-by: Михаил Новоселов Tested-by: Ильфат Гаптрахманов Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3347 Link: https://lore.kernel.org/all/1c978cf1-2934-4e66-e4b3-e81b04cb3571@rosalinux.ru/ Fixes: f2d5dcb48f7b (bounds: support non-power-of-two CONFIG_NR_CPUS) Cc: Signed-off-by: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Mel Gorman Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/bounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bounds.c b/kernel/bounds.c index c5a9fcd2d6228..29b2cd00df2cc 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -19,7 +19,7 @@ int main(void) DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); #ifdef CONFIG_SMP - DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS)); + DEFINE(NR_CPUS_BITS, order_base_2(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN -- GitLab From 6536f12fe2ddaa134b9ffa47b115256bd1302a6e Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 29 Apr 2024 17:44:21 -0700 Subject: [PATCH 1996/2290] macsec: Enable devices to advertise whether they update sk_buff md_dst during offloads commit 475747a19316b08e856c666a20503e73d7ed67ed upstream. Omit rx_use_md_dst comment in upstream commit since macsec_ops is not documented. Cannot know whether a Rx skb missing md_dst is intended for MACsec or not without knowing whether the device is able to update this field during an offload. Assume that an offload to a MACsec device cannot support updating md_dst by default. Capable devices can advertise that they do indicate that an skb is related to a MACsec offloaded packet using the md_dst. Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-2-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/macsec.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/macsec.h b/include/net/macsec.h index 65c93959c2dc5..dd578d193f9aa 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -302,6 +302,7 @@ struct macsec_ops { int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx); + bool rx_uses_md_dst; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); -- GitLab From 21e042d29e8990e30a10b2e087fc06cdc9597a35 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 29 Apr 2024 17:44:23 -0700 Subject: [PATCH 1997/2290] macsec: Detect if Rx skb is macsec-related for offloading devices that update md_dst commit 642c984dd0e37dbaec9f87bd1211e5fac1f142bf upstream. Can now correctly identify where the packets should be delivered by using md_dst or its absence on devices that provide it. This detection is not possible without device drivers that update md_dst. A fallback pattern should be used for supporting such device drivers. This fallback mode causes multicast messages to be cloned to both the non-macsec and macsec ports, independent of whether the multicast message received was encrypted over MACsec or not. Other non-macsec traffic may also fail to be handled correctly for devices in promiscuous mode. Link: https://lore.kernel.org/netdev/ZULRxX9eIbFiVi7v@hog/ Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-4-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 209ee9f352754..8a8fd74110e2c 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1007,10 +1007,12 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) struct metadata_dst *md_dst; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; + bool is_macsec_md_dst; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); md_dst = skb_metadata_dst(skb); + is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; @@ -1021,10 +1023,42 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) * the SecTAG, so we have to deduce which port to deliver to. */ if (macsec_is_offloaded(macsec) && netif_running(ndev)) { - if (md_dst && md_dst->type == METADATA_MACSEC && - (!find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci))) + const struct macsec_ops *ops; + + ops = macsec_get_ops(macsec, NULL); + + if (ops->rx_uses_md_dst && !is_macsec_md_dst) continue; + if (is_macsec_md_dst) { + struct macsec_rx_sc *rx_sc; + + /* All drivers that implement MACsec offload + * support using skb metadata destinations must + * indicate that they do so. + */ + DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst); + rx_sc = find_rx_sc(&macsec->secy, + md_dst->u.macsec_info.sci); + if (!rx_sc) + continue; + /* device indicated macsec offload occurred */ + skb->dev = ndev; + skb->pkt_type = PACKET_HOST; + eth_skb_pkt_type(skb, ndev); + ret = RX_HANDLER_ANOTHER; + goto out; + } + + /* This datapath is insecure because it is unable to + * enforce isolation of broadcast/multicast traffic and + * unicast traffic with promiscuous mode on the macsec + * netdev. Since the core stack has no mechanism to + * check that the hardware did indeed receive MACsec + * traffic, it is possible that the response handling + * done by the MACsec port was to a plaintext packet. + * This violates the MACsec protocol standard. + */ if (ether_addr_equal_64bits(hdr->h_dest, ndev->dev_addr)) { /* exact match, divert skb to this port */ @@ -1040,11 +1074,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) break; nskb->dev = ndev; - if (ether_addr_equal_64bits(hdr->h_dest, - ndev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; - else - nskb->pkt_type = PACKET_MULTICAST; + eth_skb_pkt_type(nskb, ndev); __netif_rx(nskb); } -- GitLab From ca817d44befcfdff4aab0bbc5a9c8a0c1eea6983 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 29 Apr 2024 17:44:24 -0700 Subject: [PATCH 1998/2290] net/mlx5e: Advertise mlx5 ethernet driver updates sk_buff md_dst for MACsec commit 39d26a8f2efcb8b5665fe7d54a7dba306a8f1dff upstream. mlx5 Rx flow steering and CQE handling enable the driver to be able to update an skb's md_dst attribute as MACsec when MACsec traffic arrives when a device is configured for offloading. Advertise this to the core stack to take advantage of this capability. Cc: stable@vger.kernel.org Fixes: b7c9400cbc48 ("net/mlx5e: Implement MACsec Rx data path using MACsec skb_metadata_dst") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-5-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index a7832a0180ee6..48cf691842b54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1703,6 +1703,7 @@ static const struct macsec_ops macsec_offload_ops = { .mdo_add_secy = mlx5e_macsec_add_secy, .mdo_upd_secy = mlx5e_macsec_upd_secy, .mdo_del_secy = mlx5e_macsec_del_secy, + .rx_uses_md_dst = true, }; bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb) -- GitLab From 909ba1f1b4146de529469910c1bd0b1248964536 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2024 16:29:32 +0200 Subject: [PATCH 1999/2290] Linux 6.1.90 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240430103047.561802595@linuxfoundation.org Tested-by: Salvatore Bonaccorso Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Miguel Ojeda Tested-by: Jon Hunter Tested-by: Mateusz Jończyk Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Pascal Ernster Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a0472e1cf7156..7ae5cf9ec9e55 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 89 +SUBLEVEL = 90 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 24369172a03a144e642bd6b73993d7ee5e34073f Mon Sep 17 00:00:00 2001 From: Bumyong Lee Date: Tue, 19 Dec 2023 14:50:26 +0900 Subject: [PATCH 2000/2290] dmaengine: pl330: issue_pending waits until WFP state [ Upstream commit 22a9d9585812440211b0b34a6bc02ade62314be4 ] According to DMA-330 errata notice[1] 71930, DMAKILL cannot clear internal signal, named pipeline_req_active. it makes that pl330 would wait forever in WFP state although dma already send dma request if pl330 gets dma request before entering WFP state. The errata suggests that polling until entering WFP state as workaround and then peripherals allows to issue dma request. [1]: https://developer.arm.com/documentation/genc008428/latest Signed-off-by: Bumyong Lee Link: https://lore.kernel.org/r/20231219055026.118695-1-bumyong.lee@samsung.com Signed-off-by: Vinod Koul Stable-dep-of: afc89870ea67 ("dmaengine: Revert "dmaengine: pl330: issue_pending waits until WFP state"") Signed-off-by: Sasha Levin --- drivers/dma/pl330.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 3cf0b38387ae5..c29744bfdf2c2 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1053,6 +1053,9 @@ static bool _trigger(struct pl330_thread *thrd) thrd->req_running = idx; + if (desc->rqtype == DMA_MEM_TO_DEV || desc->rqtype == DMA_DEV_TO_MEM) + UNTIL(thrd, PL330_STATE_WFP); + return true; } -- GitLab From c376f7ab283bf232cbb141b16104c684d8cc86ff Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 28 Mar 2024 12:21:51 +0530 Subject: [PATCH 2001/2290] dmaengine: Revert "dmaengine: pl330: issue_pending waits until WFP state" [ Upstream commit afc89870ea677bd5a44516eb981f7a259b74280c ] This reverts commit 22a9d9585812 ("dmaengine: pl330: issue_pending waits until WFP state") as it seems to cause regression in pl330 driver. Note the issue now exists in mainline so a fix to be done. Cc: stable@vger.kernel.org Reported-by: karthikeyan Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/pl330.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index c29744bfdf2c2..3cf0b38387ae5 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1053,9 +1053,6 @@ static bool _trigger(struct pl330_thread *thrd) thrd->req_running = idx; - if (desc->rqtype == DMA_MEM_TO_DEV || desc->rqtype == DMA_DEV_TO_MEM) - UNTIL(thrd, PL330_STATE_WFP); - return true; } -- GitLab From b0db4caa10f2e4e811cf88744fbf0d074b67ec1f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:23 +0200 Subject: [PATCH 2002/2290] wifi: nl80211: don't free NULL coalescing rule [ Upstream commit 801ea33ae82d6a9d954074fbcf8ea9d18f1543a7 ] If the parsing fails, we can dereference a NULL pointer here. Cc: stable@vger.kernel.org Fixes: be29b99a9b51 ("cfg80211/nl80211: Add packet coalesce support") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.b328f80406e7.Id75d961050deb05b3e4e354e024866f350c68103@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1a3bd554e2586..a00df7b89ca86 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13802,6 +13802,8 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) error: for (i = 0; i < new_coalesce.n_rules; i++) { tmp_rule = &new_coalesce.rules[i]; + if (!tmp_rule) + continue; for (j = 0; j < tmp_rule->n_patterns; j++) kfree(tmp_rule->patterns[j].mask); kfree(tmp_rule->patterns); -- GitLab From e26c2fadef6a1e678cf4161100aaff439fe6f71e Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Thu, 28 Mar 2024 16:54:54 -0300 Subject: [PATCH 2003/2290] rust: kernel: require `Send` for `Module` implementations [ Upstream commit 323617f649c0966ad5e741e47e27e06d3a680d8f ] The thread that calls the module initialisation code when a module is loaded is not guaranteed [in fact, it is unlikely] to be the same one that calls the module cleanup code on module unload, therefore, `Module` implementations must be `Send` to account for them moving from one thread to another implicitly. Signed-off-by: Wedson Almeida Filho Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org # 6.8.x: df70d04d5697: rust: phy: implement `Send` for `Registration` Cc: stable@vger.kernel.org Fixes: 247b365dc8dc ("rust: add `kernel` crate") Link: https://lore.kernel.org/r/20240328195457.225001-3-wedsonaf@gmail.com Signed-off-by: Miguel Ojeda Signed-off-by: Sasha Levin --- rust/kernel/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index abd46261d3855..43cf5f6bde9c2 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -37,7 +37,7 @@ const __LOG_PREFIX: &[u8] = b"rust_kernel\0"; /// The top level entrypoint to implementing a kernel module. /// /// For any teardown or cleanup operations, your type may implement [`Drop`]. -pub trait Module: Sized + Sync { +pub trait Module: Sized + Sync + Send { /// Called at module initialization time. /// /// Use this method to perform whatever setup or registration your module -- GitLab From 7b05bb82ac1c952da595fcabdd8773a34d514c2b Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 16 May 2023 10:05:53 +0200 Subject: [PATCH 2004/2290] eeprom: at24: Use dev_err_probe for nvmem register failure [ Upstream commit a3c10035d12f5ec10915d5c00c2e8f7d7c066182 ] When using nvmem layouts it is possible devm_nvmem_register returns -EPROBE_DEFER, resulting in an 'empty' in /sys/kernel/debug/devices_deferred. Use dev_err_probe for providing additional information. Signed-off-by: Alexander Stein Signed-off-by: Bartosz Golaszewski Stable-dep-of: f42c97027fb7 ("eeprom: at24: fix memory corruption race condition") Signed-off-by: Sasha Levin --- drivers/misc/eeprom/at24.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 938c4f41b98c7..5aae2f9bdd51c 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -761,7 +761,8 @@ static int at24_probe(struct i2c_client *client) pm_runtime_disable(dev); if (!pm_runtime_status_suspended(dev)) regulator_disable(at24->vcc_reg); - return PTR_ERR(at24->nvmem); + return dev_err_probe(dev, PTR_ERR(at24->nvmem), + "failed to register nvmem\n"); } /* -- GitLab From 8a9ae7e741585a970467210761efe0396b7bfd20 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 20 Dec 2023 13:55:58 +0100 Subject: [PATCH 2005/2290] eeprom: at24: Probe for DDR3 thermal sensor in the SPD case [ Upstream commit caba40ec3531b0849f44502a03117796e8c9f4a1 ] The DDR3 SPD data structure advertises the presence of a thermal sensor on a DDR3 module in byte 32, bit 7. Let's use this information to explicitly instantiate the thermal sensor I2C client instead of having to rely on class-based I2C probing. The temp sensor i2c address can be derived from the SPD i2c address, so we can directly instantiate the device and don't have to probe for it. If the temp sensor has been instantiated already by other means (e.g. class-based auto-detection), then the busy-check in i2c_new_client_device will detect this. Note: Thermal sensors on DDR4 DIMM's are instantiated from the ee1004 driver. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/68113672-3724-44d5-9ff8-313dd6628f8c@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: f42c97027fb7 ("eeprom: at24: fix memory corruption race condition") Signed-off-by: Sasha Levin --- drivers/misc/eeprom/at24.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 5aae2f9bdd51c..dc30fe137b40f 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -581,6 +581,31 @@ static unsigned int at24_get_offset_adj(u8 flags, unsigned int byte_len) } } +static void at24_probe_temp_sensor(struct i2c_client *client) +{ + struct at24_data *at24 = i2c_get_clientdata(client); + struct i2c_board_info info = { .type = "jc42" }; + int ret; + u8 val; + + /* + * Byte 2 has value 11 for DDR3, earlier versions don't + * support the thermal sensor present flag + */ + ret = at24_read(at24, 2, &val, 1); + if (ret || val != 11) + return; + + /* Byte 32, bit 7 is set if temp sensor is present */ + ret = at24_read(at24, 32, &val, 1); + if (ret || !(val & BIT(7))) + return; + + info.addr = 0x18 | (client->addr & 7); + + i2c_new_client_device(client->adapter, &info); +} + static int at24_probe(struct i2c_client *client) { struct regmap_config regmap_config = { }; @@ -780,6 +805,10 @@ static int at24_probe(struct i2c_client *client) } } + /* If this a SPD EEPROM, probe for DDR3 thermal sensor */ + if (cdata == &at24_data_spd) + at24_probe_temp_sensor(client); + pm_runtime_idle(dev); if (writable) -- GitLab From c43e5028f5a35331eb25017f5ff6cc21735005c6 Mon Sep 17 00:00:00 2001 From: Daniel Okazaki Date: Mon, 22 Apr 2024 17:43:36 +0000 Subject: [PATCH 2006/2290] eeprom: at24: fix memory corruption race condition [ Upstream commit f42c97027fb75776e2e9358d16bf4a99aeb04cf2 ] If the eeprom is not accessible, an nvmem device will be registered, the read will fail, and the device will be torn down. If another driver accesses the nvmem device after the teardown, it will reference invalid memory. Move the failure point before registering the nvmem device. Signed-off-by: Daniel Okazaki Fixes: b20eb4c1f026 ("eeprom: at24: drop unnecessary label") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240422174337.2487142-1-dtokazaki@google.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/misc/eeprom/at24.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index dc30fe137b40f..e664c1c852503 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -781,15 +781,6 @@ static int at24_probe(struct i2c_client *client) } pm_runtime_enable(dev); - at24->nvmem = devm_nvmem_register(dev, &nvmem_config); - if (IS_ERR(at24->nvmem)) { - pm_runtime_disable(dev); - if (!pm_runtime_status_suspended(dev)) - regulator_disable(at24->vcc_reg); - return dev_err_probe(dev, PTR_ERR(at24->nvmem), - "failed to register nvmem\n"); - } - /* * Perform a one-byte test read to verify that the chip is functional, * unless powering on the device is to be avoided during probe (i.e. @@ -805,6 +796,15 @@ static int at24_probe(struct i2c_client *client) } } + at24->nvmem = devm_nvmem_register(dev, &nvmem_config); + if (IS_ERR(at24->nvmem)) { + pm_runtime_disable(dev); + if (!pm_runtime_status_suspended(dev)) + regulator_disable(at24->vcc_reg); + return dev_err_probe(dev, PTR_ERR(at24->nvmem), + "failed to register nvmem\n"); + } + /* If this a SPD EEPROM, probe for DDR3 thermal sensor */ if (cdata == &at24_data_spd) at24_probe_temp_sensor(client); -- GitLab From ad643241d455fdd2516d46cfa54bd0c5e504fc86 Mon Sep 17 00:00:00 2001 From: Tim Jiang Date: Tue, 12 Sep 2023 17:39:57 +0800 Subject: [PATCH 2007/2290] Bluetooth: qca: add support for QCA2066 [ Upstream commit a7f8dedb4be2cc930a29af24427b885405ecd15d ] This patch adds support for QCA2066 firmware patch and NVM downloading. as the RF performance of QCA2066 SOC chip from different foundries may vary. Therefore we use different NVM to configure them based on board ID. Changes in v2 - optimize the function qca_generate_hsp_nvm_name - remove redundant debug code for function qca_read_fw_board_id Signed-off-by: Tim Jiang Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 32868e126c78 ("Bluetooth: qca: fix invalid device address check") Signed-off-by: Sasha Levin --- drivers/bluetooth/btqca.c | 68 +++++++++++++++++++++++++++++++++++++ drivers/bluetooth/btqca.h | 5 ++- drivers/bluetooth/hci_qca.c | 11 ++++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 5277090c6d6d7..19cfc342fc7bb 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -205,6 +205,44 @@ static int qca_send_reset(struct hci_dev *hdev) return 0; } +static int qca_read_fw_board_id(struct hci_dev *hdev, u16 *bid) +{ + u8 cmd; + struct sk_buff *skb; + struct edl_event_hdr *edl; + int err = 0; + + cmd = EDL_GET_BID_REQ_CMD; + skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, EDL_PATCH_CMD_LEN, + &cmd, 0, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "Reading QCA board ID failed (%d)", err); + return err; + } + + edl = skb_pull_data(skb, sizeof(*edl)); + if (!edl) { + bt_dev_err(hdev, "QCA read board ID with no header"); + err = -EILSEQ; + goto out; + } + + if (edl->cresp != EDL_CMD_REQ_RES_EVT || + edl->rtype != EDL_GET_BID_REQ_CMD) { + bt_dev_err(hdev, "QCA Wrong packet: %d %d", edl->cresp, edl->rtype); + err = -EIO; + goto out; + } + + *bid = (edl->data[1] << 8) + edl->data[2]; + bt_dev_dbg(hdev, "%s: bid = %x", __func__, *bid); + +out: + kfree_skb(skb); + return err; +} + int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { struct sk_buff *skb; @@ -574,6 +612,23 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) } EXPORT_SYMBOL_GPL(qca_set_bdaddr_rome); +static void qca_generate_hsp_nvm_name(char *fwname, size_t max_size, + struct qca_btsoc_version ver, u8 rom_ver, u16 bid) +{ + const char *variant; + + /* hsp gf chip */ + if ((le32_to_cpu(ver.soc_id) & QCA_HSP_GF_SOC_MASK) == QCA_HSP_GF_SOC_ID) + variant = "g"; + else + variant = ""; + + if (bid == 0x0) + snprintf(fwname, max_size, "qca/hpnv%02x%s.bin", rom_ver, variant); + else + snprintf(fwname, max_size, "qca/hpnv%02x%s.%x", rom_ver, variant, bid); +} + int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, enum qca_btsoc_type soc_type, struct qca_btsoc_version ver, const char *firmware_name) @@ -582,6 +637,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, int err; u8 rom_ver = 0; u32 soc_ver; + u16 boardid = 0; bt_dev_dbg(hdev, "QCA setup on UART"); @@ -615,6 +671,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/apbtfw%02x.tlv", rom_ver); break; + case QCA_QCA2066: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpbtfw%02x.tlv", rom_ver); + break; case QCA_QCA6390: snprintf(config.fwname, sizeof(config.fwname), "qca/htbtfw%02x.tlv", rom_ver); @@ -649,6 +709,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Give the controller some time to get ready to receive the NVM */ msleep(10); + if (soc_type == QCA_QCA2066) + qca_read_fw_board_id(hdev, &boardid); + /* Download NVM configuration */ config.type = TLV_TYPE_NVM; if (firmware_name) { @@ -671,6 +734,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/apnv%02x.bin", rom_ver); break; + case QCA_QCA2066: + qca_generate_hsp_nvm_name(config.fwname, + sizeof(config.fwname), ver, rom_ver, boardid); + break; case QCA_QCA6390: snprintf(config.fwname, sizeof(config.fwname), "qca/htnv%02x.bin", rom_ver); @@ -702,6 +769,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, switch (soc_type) { case QCA_WCN3991: + case QCA_QCA2066: case QCA_QCA6390: case QCA_WCN6750: case QCA_WCN6855: diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 03bff5c0059de..dc31984f71dc1 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -12,6 +12,7 @@ #define EDL_PATCH_VER_REQ_CMD (0x19) #define EDL_PATCH_TLV_REQ_CMD (0x1E) #define EDL_GET_BUILD_INFO_CMD (0x20) +#define EDL_GET_BID_REQ_CMD (0x23) #define EDL_NVM_ACCESS_SET_REQ_CMD (0x01) #define EDL_PATCH_CONFIG_CMD (0x28) #define MAX_SIZE_PER_TLV_SEGMENT (243) @@ -47,7 +48,8 @@ ((le32_to_cpu(soc_id) << 16) | (le16_to_cpu(rom_ver))) #define QCA_FW_BUILD_VER_LEN 255 - +#define QCA_HSP_GF_SOC_ID 0x1200 +#define QCA_HSP_GF_SOC_MASK 0x0000ff00 enum qca_baudrate { QCA_BAUDRATE_115200 = 0, @@ -146,6 +148,7 @@ enum qca_btsoc_type { QCA_WCN3990, QCA_WCN3998, QCA_WCN3991, + QCA_QCA2066, QCA_QCA6390, QCA_WCN6750, QCA_WCN6855, diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 179278b801eb3..a0e2b5d992695 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1808,6 +1808,10 @@ static int qca_setup(struct hci_uart *hu) set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); switch (soc_type) { + case QCA_QCA2066: + soc_name = "qca2066"; + break; + case QCA_WCN3988: case QCA_WCN3990: case QCA_WCN3991: @@ -2000,6 +2004,11 @@ static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { .num_vregs = 4, }; +static const struct qca_device_data qca_soc_data_qca2066 __maybe_unused = { + .soc_type = QCA_QCA2066, + .num_vregs = 0, +}; + static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { .soc_type = QCA_QCA6390, .num_vregs = 0, @@ -2539,6 +2548,7 @@ static SIMPLE_DEV_PM_OPS(qca_pm_ops, qca_suspend, qca_resume); #ifdef CONFIG_OF static const struct of_device_id qca_bluetooth_of_match[] = { + { .compatible = "qcom,qca2066-bt", .data = &qca_soc_data_qca2066}, { .compatible = "qcom,qca6174-bt" }, { .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390}, { .compatible = "qcom,qca9377-bt" }, @@ -2556,6 +2566,7 @@ MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id qca_bluetooth_acpi_match[] = { + { "QCOM2066", (kernel_ulong_t)&qca_soc_data_qca2066 }, { "QCOM6390", (kernel_ulong_t)&qca_soc_data_qca6390 }, { "DLA16390", (kernel_ulong_t)&qca_soc_data_qca6390 }, { "DLB16390", (kernel_ulong_t)&qca_soc_data_qca6390 }, -- GitLab From 3283a9894d58522256088c00348a89bfdc2458b6 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:03 -0500 Subject: [PATCH 2008/2290] mm/hugetlb: add folio support to hugetlb specific flag macros [ Upstream commit d03c376d9066532551dc56837c7c5490e4fcbbfe ] Patch series "begin converting hugetlb code to folios", v4. This patch series starts the conversion of the hugetlb code to operate on struct folios rather than struct pages. This removes the ambiguitiy of whether functions are operating on head pages, tail pages of compound pages, or base pages. This series passes the linux test project hugetlb test cases. Patch 1 adds hugeltb specific page macros that can operate on folios. Patch 2 adds the private field of the first tail page to struct page. For 32-bit, _private_1 alinging with page[1].private was confirmed by using pahole. Patch 3 introduces hugetlb subpool helper functions which operate on struct folios. These patches were tested using the hugepage-mmap.c selftest along with the migratepages command. Patch 4 converts hugetlb_delete_from_page_cache() to use folios. Patch 5 adds a folio_hstate() function to get hstate information from a folio and adds a user of folio_hstate(). Bpftrace was used to track time spent in the free_huge_pages function during the ltp test cases as it is a caller of the hugetlb subpool functions. From the histogram, the performance is similar before and after the patch series. Time spent in 'free_huge_page' 6.0.0-rc2.master.20220823 @nsecs: [256, 512) 14770 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |@@@@@@@@@@@@@@@@@@@@@@@@@ | [512, 1K) 155 | | [1K, 2K) 169 | | [2K, 4K) 50 | | [4K, 8K) 14 | | [8K, 16K) 3 | | [16K, 32K) 3 | | 6.0.0-rc2.master.20220823 + patch series @nsecs: [256, 512) 13678 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |@@@@@@@@@@@@@@@@@@@@@@@@@ | [512, 1K) 142 | | [1K, 2K) 199 | | [2K, 4K) 44 | | [4K, 8K) 13 | | [8K, 16K) 4 | | [16K, 32K) 1 | | This patch (of 5): Allow the macros which test, set, and clear hugetlb specific page flags to take a hugetlb folio as an input. The macrros are generated as folio_{test, set, clear}_hugetlb_{restore_reserve, migratable, temporary, freed, vmemmap_optimized, raw_hwp_unreliable}. Link: https://lkml.kernel.org/r/20220922154207.1575343-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20220922154207.1575343-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: kernel test robot Cc: Matthew Wilcox Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton Stable-dep-of: b76b46902c2d ("mm/hugetlb: fix missing hugetlb_lock for resv uncharge") Signed-off-by: Sasha Levin --- include/linux/hugetlb.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1c6f35ba1604f..0c5326fd3c47a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -625,26 +625,50 @@ enum hugetlb_page_flags { */ #ifdef CONFIG_HUGETLB_PAGE #define TESTHPAGEFLAG(uname, flname) \ +static __always_inline \ +bool folio_test_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + return test_bit(HPG_##flname, private); \ + } \ static inline int HPage##uname(struct page *page) \ { return test_bit(HPG_##flname, &(page->private)); } #define SETHPAGEFLAG(uname, flname) \ +static __always_inline \ +void folio_set_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + set_bit(HPG_##flname, private); \ + } \ static inline void SetHPage##uname(struct page *page) \ { set_bit(HPG_##flname, &(page->private)); } #define CLEARHPAGEFLAG(uname, flname) \ +static __always_inline \ +void folio_clear_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + clear_bit(HPG_##flname, private); \ + } \ static inline void ClearHPage##uname(struct page *page) \ { clear_bit(HPG_##flname, &(page->private)); } #else #define TESTHPAGEFLAG(uname, flname) \ +static inline bool \ +folio_test_hugetlb_##flname(struct folio *folio) \ + { return 0; } \ static inline int HPage##uname(struct page *page) \ { return 0; } #define SETHPAGEFLAG(uname, flname) \ +static inline void \ +folio_set_hugetlb_##flname(struct folio *folio) \ + { } \ static inline void SetHPage##uname(struct page *page) \ { } #define CLEARHPAGEFLAG(uname, flname) \ +static inline void \ +folio_clear_hugetlb_##flname(struct folio *folio) \ + { } \ static inline void ClearHPage##uname(struct page *page) \ { } #endif -- GitLab From 271227f13f616445d7e45c8d76e27f0b09425579 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:04 -0500 Subject: [PATCH 2009/2290] mm: add private field of first tail to struct page and struct folio [ Upstream commit d340625f4849ab5dbfebbc7d84709fbfcd39e52f ] Allow struct folio to store hugetlb metadata that is contained in the private field of the first tail page. On 32-bit, _private_1 aligns with page[1].private. Link: https://lkml.kernel.org/r/20220922154207.1575343-3-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Acked-by: Mike Kravetz Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: kernel test robot Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton Stable-dep-of: b76b46902c2d ("mm/hugetlb: fix missing hugetlb_lock for resv uncharge") Signed-off-by: Sasha Levin --- include/linux/mm_types.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 247aedb18d5c3..a9c1d611029d1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -144,6 +144,7 @@ struct page { atomic_t compound_pincount; #ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ + unsigned long _private_1; #endif }; struct { /* Second tail page of compound page */ @@ -264,6 +265,7 @@ struct page { * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). + * @_private_1: Do not use directly, call folio_get_private_1(). * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -311,6 +313,7 @@ struct folio { #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; #endif + unsigned long _private_1; }; #define FOLIO_MATCH(pg, fl) \ @@ -338,6 +341,7 @@ FOLIO_MATCH(compound_mapcount, _total_mapcount); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT FOLIO_MATCH(compound_nr, _folio_nr_pages); +FOLIO_MATCH(_private_1, _private_1); #endif #undef FOLIO_MATCH @@ -383,6 +387,16 @@ static inline void *folio_get_private(struct folio *folio) return folio->private; } +static inline void folio_set_private_1(struct folio *folio, unsigned long private) +{ + folio->_private_1 = private; +} + +static inline unsigned long folio_get_private_1(struct folio *folio) +{ + return folio->_private_1; +} + struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) -- GitLab From 6a8af731a16e134453614d71cc9be720608ef46e Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:05 -0500 Subject: [PATCH 2010/2290] mm/hugetlb: add hugetlb_folio_subpool() helpers [ Upstream commit 149562f7509404c382c32c3fa8a6ba356135e5cf ] Allow hugetlbfs_migrate_folio to check and read subpool information by passing in a folio. Link: https://lkml.kernel.org/r/20220922154207.1575343-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: kernel test robot Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton Stable-dep-of: b76b46902c2d ("mm/hugetlb: fix missing hugetlb_lock for resv uncharge") Signed-off-by: Sasha Levin --- fs/hugetlbfs/inode.c | 8 ++++---- include/linux/hugetlb.h | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 4fe4b3393e71c..330729445d8ab 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1108,10 +1108,10 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, if (rc != MIGRATEPAGE_SUCCESS) return rc; - if (hugetlb_page_subpool(&src->page)) { - hugetlb_set_page_subpool(&dst->page, - hugetlb_page_subpool(&src->page)); - hugetlb_set_page_subpool(&src->page, NULL); + if (hugetlb_folio_subpool(src)) { + hugetlb_set_folio_subpool(dst, + hugetlb_folio_subpool(src)); + hugetlb_set_folio_subpool(src, NULL); } if (mode != MIGRATE_SYNC_NO_COPY) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0c5326fd3c47a..02d9a8af3704e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -754,18 +754,29 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) +static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) +{ + return (void *)folio_get_private_1(folio); +} + /* * hugetlb page subpool pointer located in hpage[1].private */ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) { - return (void *)page_private(hpage + SUBPAGE_INDEX_SUBPOOL); + return hugetlb_folio_subpool(page_folio(hpage)); +} + +static inline void hugetlb_set_folio_subpool(struct folio *folio, + struct hugepage_subpool *subpool) +{ + folio_set_private_1(folio, (unsigned long)subpool); } static inline void hugetlb_set_page_subpool(struct page *hpage, struct hugepage_subpool *subpool) { - set_page_private(hpage + SUBPAGE_INDEX_SUBPOOL, (unsigned long)subpool); + hugetlb_set_folio_subpool(page_folio(hpage), subpool); } static inline struct hstate *hstate_file(struct file *f) -- GitLab From 6b27a1f2533ad24406778cf6513fda55fba83c89 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 22 Sep 2022 10:42:07 -0500 Subject: [PATCH 2011/2290] mm/hugetlb: add folio_hstate() [ Upstream commit e51da3a9b6c2f67879880259a25c51dbda01c462 ] Helper function to retrieve hstate information from a hugetlb folio. Link: https://lkml.kernel.org/r/20220922154207.1575343-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reported-by: kernel test robot Reviewed-by: Mike Kravetz Cc: Arnd Bergmann Cc: Colin Cross Cc: David Howells Cc: "Eric W . Biederman" Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Muchun Song Cc: Peter Xu Cc: Vlastimil Babka Cc: William Kucharski Signed-off-by: Andrew Morton Stable-dep-of: b76b46902c2d ("mm/hugetlb: fix missing hugetlb_lock for resv uncharge") Signed-off-by: Sasha Levin --- include/linux/hugetlb.h | 14 ++++++++++++-- mm/migrate.c | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 02d9a8af3704e..37eeef9841c4e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -863,10 +863,15 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, } #endif +static inline struct hstate *folio_hstate(struct folio *folio) +{ + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); + return size_to_hstate(folio_size(folio)); +} + static inline struct hstate *page_hstate(struct page *page) { - VM_BUG_ON_PAGE(!PageHuge(page), page); - return size_to_hstate(page_size(page)); + return folio_hstate(page_folio(page)); } static inline unsigned hstate_index_to_shift(unsigned index) @@ -1077,6 +1082,11 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma) return NULL; } +static inline struct hstate *folio_hstate(struct folio *folio) +{ + return NULL; +} + static inline struct hstate *page_hstate(struct page *page) { return NULL; diff --git a/mm/migrate.c b/mm/migrate.c index c5968021fde0a..0252aa4ff572e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1632,7 +1632,7 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) nid = folio_nid(folio); if (folio_test_hugetlb(folio)) { - struct hstate *h = page_hstate(&folio->page); + struct hstate *h = folio_hstate(folio); gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask); -- GitLab From 8080591648f0052967c558ed98fb93f8d228df57 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:51 -0700 Subject: [PATCH 2012/2290] mm/hugetlb_cgroup: convert __set_hugetlb_cgroup() to folios [ Upstream commit a098c977722ca27d3b4bfeb966767af3cce45f85 ] Patch series "convert hugetlb_cgroup helper functions to folios", v2. This patch series continues the conversion of hugetlb code from being managed in pages to folios by converting many of the hugetlb_cgroup helper functions to use folios. This allows the core hugetlb functions to pass in a folio to these helper functions. This patch (of 9); Change __set_hugetlb_cgroup() to use folios so it is explicit that the function operates on a head page. Link: https://lkml.kernel.org/r/20221101223059.460937-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20221101223059.460937-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton Stable-dep-of: b76b46902c2d ("mm/hugetlb: fix missing hugetlb_lock for resv uncharge") Signed-off-by: Sasha Levin --- include/linux/hugetlb_cgroup.h | 14 +++++++------- mm/hugetlb_cgroup.c | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 630cd255d0cfd..7576e9ed8afe7 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -90,31 +90,31 @@ hugetlb_cgroup_from_page_rsvd(struct page *page) return __hugetlb_cgroup_from_page(page, true); } -static inline void __set_hugetlb_cgroup(struct page *page, +static inline void __set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg, bool rsvd) { - VM_BUG_ON_PAGE(!PageHuge(page), page); + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) + if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return; if (rsvd) - set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD, + set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD), (unsigned long)h_cg); else - set_page_private(page + SUBPAGE_INDEX_CGROUP, + set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP), (unsigned long)h_cg); } static inline void set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) { - __set_hugetlb_cgroup(page, h_cg, false); + __set_hugetlb_cgroup(page_folio(page), h_cg, false); } static inline void set_hugetlb_cgroup_rsvd(struct page *page, struct hugetlb_cgroup *h_cg) { - __set_hugetlb_cgroup(page, h_cg, true); + __set_hugetlb_cgroup(page_folio(page), h_cg, true); } static inline bool hugetlb_cgroup_disabled(void) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index f61d132df52b3..b2316bcbf634a 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -314,7 +314,7 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, if (hugetlb_cgroup_disabled() || !h_cg) return; - __set_hugetlb_cgroup(page, h_cg, rsvd); + __set_hugetlb_cgroup(page_folio(page), h_cg, rsvd); if (!rsvd) { unsigned long usage = h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; @@ -356,7 +356,7 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, h_cg = __hugetlb_cgroup_from_page(page, rsvd); if (unlikely(!h_cg)) return; - __set_hugetlb_cgroup(page, NULL, rsvd); + __set_hugetlb_cgroup(page_folio(page), NULL, rsvd); page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), -- GitLab From fc50e09b8be25ed05288a40a28d575c58e2a1ac9 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:52 -0700 Subject: [PATCH 2013/2290] mm/hugetlb_cgroup: convert hugetlb_cgroup_from_page() to folios [ Upstream commit f074732d599e19a2a5b12e54743ad5eaccbe6550 ] Introduce folios in __remove_hugetlb_page() by converting hugetlb_cgroup_from_page() to use folios. Also gets rid of unsed hugetlb_cgroup_from_page_resv() function. Link: https://lkml.kernel.org/r/20221101223059.460937-3-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mina Almasry Signed-off-by: Andrew Morton Stable-dep-of: b76b46902c2d ("mm/hugetlb: fix missing hugetlb_lock for resv uncharge") Signed-off-by: Sasha Levin --- include/linux/hugetlb_cgroup.h | 39 +++++++++++++++++----------------- mm/hugetlb.c | 5 +++-- mm/hugetlb_cgroup.c | 13 +++++++----- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 7576e9ed8afe7..feb2edafc8b68 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -67,27 +67,34 @@ struct hugetlb_cgroup { }; static inline struct hugetlb_cgroup * -__hugetlb_cgroup_from_page(struct page *page, bool rsvd) +__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) { - VM_BUG_ON_PAGE(!PageHuge(page), page); + struct page *tail; - if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); + if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - if (rsvd) - return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD); - else - return (void *)page_private(page + SUBPAGE_INDEX_CGROUP); + + if (rsvd) { + tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD); + return (void *)page_private(tail); + } + + else { + tail = folio_page(folio, SUBPAGE_INDEX_CGROUP); + return (void *)page_private(tail); + } } -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) { - return __hugetlb_cgroup_from_page(page, false); + return __hugetlb_cgroup_from_folio(folio, false); } static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_rsvd(struct page *page) +hugetlb_cgroup_from_folio_rsvd(struct folio *folio) { - return __hugetlb_cgroup_from_page(page, true); + return __hugetlb_cgroup_from_folio(folio, true); } static inline void __set_hugetlb_cgroup(struct folio *folio, @@ -181,19 +188,13 @@ static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, { } -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) -{ - return NULL; -} - -static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_resv(struct page *page) +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) { return NULL; } static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_rsvd(struct page *page) +hugetlb_cgroup_from_folio_rsvd(struct folio *folio) { return NULL; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 37288a7f0fa65..9c1a30eb6c564 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1661,9 +1661,10 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page, bool demote) { int nid = page_to_nid(page); + struct folio *folio = page_folio(page); - VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); - VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page); + VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio); + VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio); lockdep_assert_held(&hugetlb_lock); if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index b2316bcbf634a..8b95c1560f9c3 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -191,8 +191,9 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, struct page_counter *counter; struct hugetlb_cgroup *page_hcg; struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); + struct folio *folio = page_folio(page); - page_hcg = hugetlb_cgroup_from_page(page); + page_hcg = hugetlb_cgroup_from_folio(folio); /* * We can have pages in active list without any cgroup * ie, hugepage with less than 3 pages. We can safely @@ -349,14 +350,15 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page, bool rsvd) { struct hugetlb_cgroup *h_cg; + struct folio *folio = page_folio(page); if (hugetlb_cgroup_disabled()) return; lockdep_assert_held(&hugetlb_lock); - h_cg = __hugetlb_cgroup_from_page(page, rsvd); + h_cg = __hugetlb_cgroup_from_folio(folio, rsvd); if (unlikely(!h_cg)) return; - __set_hugetlb_cgroup(page_folio(page), NULL, rsvd); + __set_hugetlb_cgroup(folio, NULL, rsvd); page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), @@ -888,13 +890,14 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) struct hugetlb_cgroup *h_cg; struct hugetlb_cgroup *h_cg_rsvd; struct hstate *h = page_hstate(oldhpage); + struct folio *old_folio = page_folio(oldhpage); if (hugetlb_cgroup_disabled()) return; spin_lock_irq(&hugetlb_lock); - h_cg = hugetlb_cgroup_from_page(oldhpage); - h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); + h_cg = hugetlb_cgroup_from_folio(old_folio); + h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio); set_hugetlb_cgroup(oldhpage, NULL); set_hugetlb_cgroup_rsvd(oldhpage, NULL); -- GitLab From 10de76f4cdac91a482177414ebb445f654af5da7 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:56 -0700 Subject: [PATCH 2014/2290] mm/hugetlb: convert free_huge_page to folios [ Upstream commit 0356c4b96f6890dd61af4c902f681764f4bdba09 ] Use folios inside free_huge_page(), this is in preparation for converting hugetlb_cgroup_uncharge_page() to take in a folio. Link: https://lkml.kernel.org/r/20221101223059.460937-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton Stable-dep-of: b76b46902c2d ("mm/hugetlb: fix missing hugetlb_lock for resv uncharge") Signed-off-by: Sasha Levin --- mm/hugetlb.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9c1a30eb6c564..6cdbb06902df1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1918,21 +1918,22 @@ void free_huge_page(struct page *page) * Can't pass hstate in here because it is called from the * compound page destructor. */ - struct hstate *h = page_hstate(page); - int nid = page_to_nid(page); - struct hugepage_subpool *spool = hugetlb_page_subpool(page); + struct folio *folio = page_folio(page); + struct hstate *h = folio_hstate(folio); + int nid = folio_nid(folio); + struct hugepage_subpool *spool = hugetlb_folio_subpool(folio); bool restore_reserve; unsigned long flags; - VM_BUG_ON_PAGE(page_count(page), page); - VM_BUG_ON_PAGE(page_mapcount(page), page); + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); + VM_BUG_ON_FOLIO(folio_mapcount(folio), folio); - hugetlb_set_page_subpool(page, NULL); - if (PageAnon(page)) - __ClearPageAnonExclusive(page); - page->mapping = NULL; - restore_reserve = HPageRestoreReserve(page); - ClearHPageRestoreReserve(page); + hugetlb_set_folio_subpool(folio, NULL); + if (folio_test_anon(folio)) + __ClearPageAnonExclusive(&folio->page); + folio->mapping = NULL; + restore_reserve = folio_test_hugetlb_restore_reserve(folio); + folio_clear_hugetlb_restore_reserve(folio); /* * If HPageRestoreReserve was set on page, page allocation consumed a @@ -1954,7 +1955,7 @@ void free_huge_page(struct page *page) } spin_lock_irqsave(&hugetlb_lock, flags); - ClearHPageMigratable(page); + folio_clear_hugetlb_migratable(folio); hugetlb_cgroup_uncharge_page(hstate_index(h), pages_per_huge_page(h), page); hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), @@ -1962,7 +1963,7 @@ void free_huge_page(struct page *page) if (restore_reserve) h->resv_huge_pages++; - if (HPageTemporary(page)) { + if (folio_test_hugetlb_temporary(folio)) { remove_hugetlb_page(h, page, false); spin_unlock_irqrestore(&hugetlb_lock, flags); update_and_free_page(h, page, true); -- GitLab From cc8f0d90ba485e4ff052ce6d6f1ecea34a475348 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 1 Nov 2022 15:30:57 -0700 Subject: [PATCH 2015/2290] mm/hugetlb_cgroup: convert hugetlb_cgroup_uncharge_page() to folios [ Upstream commit d4ab0316cc33aeedf6dcb1c2c25e097a25766132 ] Continue to use a folio inside free_huge_page() by converting hugetlb_cgroup_uncharge_page*() to folios. Link: https://lkml.kernel.org/r/20221101223059.460937-8-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Aneesh Kumar K.V Cc: Bui Quang Minh Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Mina Almasry Signed-off-by: Andrew Morton Stable-dep-of: b76b46902c2d ("mm/hugetlb: fix missing hugetlb_lock for resv uncharge") Signed-off-by: Sasha Levin --- include/linux/hugetlb_cgroup.h | 16 ++++++++-------- mm/hugetlb.c | 15 +++++++++------ mm/hugetlb_cgroup.c | 21 ++++++++++----------- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index feb2edafc8b68..241bf4fe701ae 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -158,10 +158,10 @@ extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page); -extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page); -extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, - struct page *page); +extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio); +extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, + struct folio *folio); extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg); @@ -254,14 +254,14 @@ hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, { } -static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page) +static inline void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio) { } -static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx, +static inline void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, - struct page *page) + struct folio *folio) { } static inline void hugetlb_cgroup_uncharge_cgroup(int idx, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6cdbb06902df1..e720b9ac28337 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1956,10 +1956,10 @@ void free_huge_page(struct page *page) spin_lock_irqsave(&hugetlb_lock, flags); folio_clear_hugetlb_migratable(folio); - hugetlb_cgroup_uncharge_page(hstate_index(h), - pages_per_huge_page(h), page); - hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), - pages_per_huge_page(h), page); + hugetlb_cgroup_uncharge_folio(hstate_index(h), + pages_per_huge_page(h), folio); + hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), + pages_per_huge_page(h), folio); if (restore_reserve) h->resv_huge_pages++; @@ -3082,6 +3082,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct hugepage_subpool *spool = subpool_vma(vma); struct hstate *h = hstate_vma(vma); struct page *page; + struct folio *folio; long map_chg, map_commit; long gbl_chg; int ret, idx; @@ -3145,6 +3146,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, * a reservation exists for the allocation. */ page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg); + if (!page) { spin_unlock_irq(&hugetlb_lock); page = alloc_buddy_huge_page_with_mpol(h, vma, addr); @@ -3159,6 +3161,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, set_page_refcounted(page); /* Fall through */ } + folio = page_folio(page); hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page); /* If allocation is not consuming a reservation, also store the * hugetlb_cgroup pointer on the page. @@ -3188,8 +3191,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, rsv_adjust = hugepage_subpool_put_pages(spool, 1); hugetlb_acct_memory(h, -rsv_adjust); if (deferred_reserve) - hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), - pages_per_huge_page(h), page); + hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), + pages_per_huge_page(h), folio); } return page; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 8b95c1560f9c3..32f4408eda240 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -346,11 +346,10 @@ void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, /* * Should be called with hugetlb_lock held */ -static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page, bool rsvd) +static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio, bool rsvd) { struct hugetlb_cgroup *h_cg; - struct folio *folio = page_folio(page); if (hugetlb_cgroup_disabled()) return; @@ -368,27 +367,27 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, css_put(&h_cg->css); else { unsigned long usage = - h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; + h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; /* * This write is not atomic due to fetching usage and writing * to it, but that's fine because we call this with * hugetlb_lock held anyway. */ - WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx], + WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], usage - nr_pages); } } -void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page) +void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio) { - __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); + __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false); } -void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, - struct page *page) +void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, + struct folio *folio) { - __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); + __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true); } static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, -- GitLab From 4c806333efea1000a2a9620926f560ad2e1ca7cc Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 17 Apr 2024 17:18:35 -0400 Subject: [PATCH 2016/2290] mm/hugetlb: fix missing hugetlb_lock for resv uncharge [ Upstream commit b76b46902c2d0395488c8412e1116c2486cdfcb2 ] There is a recent report on UFFDIO_COPY over hugetlb: https://lore.kernel.org/all/000000000000ee06de0616177560@google.com/ 350: lockdep_assert_held(&hugetlb_lock); Should be an issue in hugetlb but triggered in an userfault context, where it goes into the unlikely path where two threads modifying the resv map together. Mike has a fix in that path for resv uncharge but it looks like the locking criteria was overlooked: hugetlb_cgroup_uncharge_folio_rsvd() will update the cgroup pointer, so it requires to be called with the lock held. Link: https://lkml.kernel.org/r/20240417211836.2742593-3-peterx@redhat.com Fixes: 79aa925bf239 ("hugetlb_cgroup: fix reservation accounting") Signed-off-by: Peter Xu Reported-by: syzbot+4b8077a5fccc61c385a1@syzkaller.appspotmail.com Reviewed-by: Mina Almasry Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/hugetlb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e720b9ac28337..e9ae0fc81dfbe 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3190,9 +3190,12 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, rsv_adjust = hugepage_subpool_put_pages(spool, 1); hugetlb_acct_memory(h, -rsv_adjust); - if (deferred_reserve) + if (deferred_reserve) { + spin_lock_irq(&hugetlb_lock); hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), pages_per_huge_page(h), folio); + spin_unlock_irq(&hugetlb_lock); + } } return page; -- GitLab From d73ba54dad9236f7f0304caebe5288e6ecaf4008 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jan 2023 18:18:14 +0900 Subject: [PATCH 2017/2290] kbuild: refactor host*_flags [ Upstream commit 16169a47d5c36046041527faafb5a3f5c86701c6 ] Remove _host*_flags. No functional change is intended. Signed-off-by: Masahiro Yamada Reviewed-by: Miguel Ojeda Tested-by: Miguel Ojeda Stable-dep-of: ded103c7eb23 ("kbuild: rust: force `alloc` extern to allow "empty" Rust files") Signed-off-by: Sasha Levin --- scripts/Makefile.host | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/Makefile.host b/scripts/Makefile.host index da133780b7518..4a02b31cd1029 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -80,25 +80,23 @@ host-rust := $(addprefix $(obj)/,$(host-rust)) ##### # Handle options to gcc. Support building with separate output directory -_hostc_flags = $(KBUILD_HOSTCFLAGS) $(HOST_EXTRACFLAGS) \ +hostc_flags = -Wp,-MMD,$(depfile) \ + $(KBUILD_HOSTCFLAGS) $(HOST_EXTRACFLAGS) \ $(HOSTCFLAGS_$(target-stem).o) -_hostcxx_flags = $(KBUILD_HOSTCXXFLAGS) $(HOST_EXTRACXXFLAGS) \ +hostcxx_flags = -Wp,-MMD,$(depfile) \ + $(KBUILD_HOSTCXXFLAGS) $(HOST_EXTRACXXFLAGS) \ $(HOSTCXXFLAGS_$(target-stem).o) -_hostrust_flags = $(KBUILD_HOSTRUSTFLAGS) $(HOST_EXTRARUSTFLAGS) \ - $(HOSTRUSTFLAGS_$(target-stem)) +hostrust_flags = $(KBUILD_HOSTRUSTFLAGS) $(HOST_EXTRARUSTFLAGS) \ + $(HOSTRUSTFLAGS_$(target-stem)) # $(objtree)/$(obj) for including generated headers from checkin source files ifeq ($(KBUILD_EXTMOD),) ifdef building_out_of_srctree -_hostc_flags += -I $(objtree)/$(obj) -_hostcxx_flags += -I $(objtree)/$(obj) +hostc_flags += -I $(objtree)/$(obj) +hostcxx_flags += -I $(objtree)/$(obj) endif endif -hostc_flags = -Wp,-MMD,$(depfile) $(_hostc_flags) -hostcxx_flags = -Wp,-MMD,$(depfile) $(_hostcxx_flags) -hostrust_flags = $(_hostrust_flags) - ##### # Compile programs on the host -- GitLab From fe73628b304172d65fefb888c13ab6f54cb54692 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jan 2023 18:18:15 +0900 Subject: [PATCH 2018/2290] kbuild: specify output names separately for each emission type from rustc [ Upstream commit 295d8398c67e314d99bb070f38883f83fe94a97a ] In Kbuild, two different rules must not write to the same file, but it happens when compiling rust source files. For example, set CONFIG_SAMPLE_RUST_MINIMAL=m and run the following: $ make -j$(nproc) samples/rust/rust_minimal.o samples/rust/rust_minimal.rsi \ samples/rust/rust_minimal.s samples/rust/rust_minimal.ll [snip] RUSTC [M] samples/rust/rust_minimal.o RUSTC [M] samples/rust/rust_minimal.rsi RUSTC [M] samples/rust/rust_minimal.s RUSTC [M] samples/rust/rust_minimal.ll mv: cannot stat 'samples/rust/rust_minimal.d': No such file or directory make[3]: *** [scripts/Makefile.build:334: samples/rust/rust_minimal.ll] Error 1 make[3]: *** Waiting for unfinished jobs.... mv: cannot stat 'samples/rust/rust_minimal.d': No such file or directory make[3]: *** [scripts/Makefile.build:309: samples/rust/rust_minimal.o] Error 1 mv: cannot stat 'samples/rust/rust_minimal.d': No such file or directory make[3]: *** [scripts/Makefile.build:326: samples/rust/rust_minimal.s] Error 1 make[2]: *** [scripts/Makefile.build:504: samples/rust] Error 2 make[1]: *** [scripts/Makefile.build:504: samples] Error 2 make: *** [Makefile:2008: .] Error 2 The reason for the error is that 4 threads running in parallel renames the same file, samples/rust/rust_minimal.d. This does not happen when compiling C or assembly files because -Wp,-MMD,$(depfile) explicitly specifies the dependency filepath. $(depfile) is a unique path for each target. Currently, rustc is only given --out-dir and --emit= So, all the rust build rules output the dep-info into the default .d, which causes the path conflict. Fortunately, the --emit option is able to specify the output path individually, with the form --emit==. Add --emit=dep-info=$(depfile) to the common part. Also, remove the redundant --out-dir because the output path is specified for each type. The code gets much cleaner because we do not need to rename *.d files. Signed-off-by: Masahiro Yamada Reviewed-by: Miguel Ojeda Tested-by: Miguel Ojeda Reviewed-by: Vincenzo Palazzo Stable-dep-of: ded103c7eb23 ("kbuild: rust: force `alloc` extern to allow "empty" Rust files") Signed-off-by: Sasha Levin --- rust/Makefile | 11 +++++------ scripts/Makefile.build | 14 +++++++------- scripts/Makefile.host | 7 +++---- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index 7700d3853404e..6d0c0e9757f21 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -322,10 +322,9 @@ $(obj)/exports_kernel_generated.h: $(obj)/kernel.o FORCE quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ cmd_rustc_procmacro = \ $(RUSTC_OR_CLIPPY) $(rust_common_flags) \ - --emit=dep-info,link --extern proc_macro \ - --crate-type proc-macro --out-dir $(objtree)/$(obj) \ + --emit=dep-info=$(depfile) --emit=link=$@ --extern proc_macro \ + --crate-type proc-macro \ --crate-name $(patsubst lib%.so,%,$(notdir $@)) $<; \ - mv $(objtree)/$(obj)/$(patsubst lib%.so,%,$(notdir $@)).d $(depfile); \ sed -i '/^\#/d' $(depfile) # Procedural macros can only be used with the `rustc` that compiled it. @@ -339,10 +338,10 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L OBJTREE=$(abspath $(objtree)) \ $(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \ $(filter-out $(skip_flags),$(rust_flags) $(rustc_target_flags)) \ - --emit=dep-info,obj,metadata --crate-type rlib \ - --out-dir $(objtree)/$(obj) -L$(objtree)/$(obj) \ + --emit=dep-info=$(depfile) --emit=obj=$@ \ + --emit=metadata=$(dir $@)$(patsubst %.o,lib%.rmeta,$(notdir $@)) \ + --crate-type rlib -L$(objtree)/$(obj) \ --crate-name $(patsubst %.o,%,$(notdir $@)) $<; \ - mv $(objtree)/$(obj)/$(patsubst %.o,%,$(notdir $@)).d $(depfile); \ sed -i '/^\#/d' $(depfile) \ $(if $(rustc_objcopy),;$(OBJCOPY) $(rustc_objcopy) $@) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 41f3602fc8de7..9ae02542b9389 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -283,11 +283,11 @@ rust_common_cmd = \ -Zcrate-attr=no_std \ -Zcrate-attr='feature($(rust_allowed_features))' \ --extern alloc --extern kernel \ - --crate-type rlib --out-dir $(obj) -L $(objtree)/rust/ \ - --crate-name $(basename $(notdir $@)) + --crate-type rlib -L $(objtree)/rust/ \ + --crate-name $(basename $(notdir $@)) \ + --emit=dep-info=$(depfile) rust_handle_depfile = \ - mv $(obj)/$(basename $(notdir $@)).d $(depfile); \ sed -i '/^\#/d' $(depfile) # `--emit=obj`, `--emit=asm` and `--emit=llvm-ir` imply a single codegen unit @@ -300,7 +300,7 @@ rust_handle_depfile = \ quiet_cmd_rustc_o_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ cmd_rustc_o_rs = \ - $(rust_common_cmd) --emit=dep-info,obj $<; \ + $(rust_common_cmd) --emit=obj=$@ $<; \ $(rust_handle_depfile) $(obj)/%.o: $(src)/%.rs FORCE @@ -308,7 +308,7 @@ $(obj)/%.o: $(src)/%.rs FORCE quiet_cmd_rustc_rsi_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ cmd_rustc_rsi_rs = \ - $(rust_common_cmd) --emit=dep-info -Zunpretty=expanded $< >$@; \ + $(rust_common_cmd) -Zunpretty=expanded $< >$@; \ command -v $(RUSTFMT) >/dev/null && $(RUSTFMT) $@; \ $(rust_handle_depfile) @@ -317,7 +317,7 @@ $(obj)/%.rsi: $(src)/%.rs FORCE quiet_cmd_rustc_s_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ cmd_rustc_s_rs = \ - $(rust_common_cmd) --emit=dep-info,asm $<; \ + $(rust_common_cmd) --emit=asm=$@ $<; \ $(rust_handle_depfile) $(obj)/%.s: $(src)/%.rs FORCE @@ -325,7 +325,7 @@ $(obj)/%.s: $(src)/%.rs FORCE quiet_cmd_rustc_ll_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ cmd_rustc_ll_rs = \ - $(rust_common_cmd) --emit=dep-info,llvm-ir $<; \ + $(rust_common_cmd) --emit=llvm-ir=$@ $<; \ $(rust_handle_depfile) $(obj)/%.ll: $(src)/%.rs FORCE diff --git a/scripts/Makefile.host b/scripts/Makefile.host index 4a02b31cd1029..d812241144d44 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -86,7 +86,8 @@ hostc_flags = -Wp,-MMD,$(depfile) \ hostcxx_flags = -Wp,-MMD,$(depfile) \ $(KBUILD_HOSTCXXFLAGS) $(HOST_EXTRACXXFLAGS) \ $(HOSTCXXFLAGS_$(target-stem).o) -hostrust_flags = $(KBUILD_HOSTRUSTFLAGS) $(HOST_EXTRARUSTFLAGS) \ +hostrust_flags = --emit=dep-info=$(depfile) \ + $(KBUILD_HOSTRUSTFLAGS) $(HOST_EXTRARUSTFLAGS) \ $(HOSTRUSTFLAGS_$(target-stem)) # $(objtree)/$(obj) for including generated headers from checkin source files @@ -147,9 +148,7 @@ $(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE # host-rust -> Executable quiet_cmd_host-rust = HOSTRUSTC $@ cmd_host-rust = \ - $(HOSTRUSTC) $(hostrust_flags) --emit=dep-info,link \ - --out-dir=$(obj)/ $<; \ - mv $(obj)/$(target-stem).d $(depfile); \ + $(HOSTRUSTC) $(hostrust_flags) --emit=link=$@ $<; \ sed -i '/^\#/d' $(depfile) $(host-rust): $(obj)/%: $(src)/%.rs FORCE $(call if_changed_dep,host-rust) -- GitLab From ff03a8b422039456454a736dc1fb9da34045884d Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 19 Dec 2022 05:40:44 +0000 Subject: [PATCH 2019/2290] cifs: use the least loaded channel for sending requests [ Upstream commit ea90708d3cf3d0d92c02afe445ad463fb3c6bf10 ] Till now, we've used a simple round robin approach to distribute the requests between the channels. This does not work well if the channels consume the requests at different speeds, even if the advertised speeds are the same. This change will allow the client to pick the channel with least number of requests currently in-flight. This will disregard the link speed, and select a channel based on the current load of the channels. For cases when all the channels are equally loaded, fall back to the old round robin method. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Stable-dep-of: 8094a600245e ("smb3: missing lock when picking channel") Signed-off-by: Sasha Levin --- fs/smb/client/transport.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 338b34c99b2de..da2bef3b7ac27 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1045,15 +1045,40 @@ cifs_cancelled_callback(struct mid_q_entry *mid) struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) { uint index = 0; + unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; + struct TCP_Server_Info *server = NULL; + int i; if (!ses) return NULL; - /* round robin */ - index = (uint)atomic_inc_return(&ses->chan_seq); - spin_lock(&ses->chan_lock); - index %= ses->chan_count; + for (i = 0; i < ses->chan_count; i++) { + server = ses->chans[i].server; + if (!server) + continue; + + /* + * strictly speaking, we should pick up req_lock to read + * server->in_flight. But it shouldn't matter much here if we + * race while reading this data. The worst that can happen is + * that we could use a channel that's not least loaded. Avoiding + * taking the lock could help reduce wait time, which is + * important for this function + */ + if (server->in_flight < min_in_flight) { + min_in_flight = server->in_flight; + index = i; + } + if (server->in_flight > max_in_flight) + max_in_flight = server->in_flight; + } + + /* if all channels are equally loaded, fall back to round-robin */ + if (min_in_flight == max_in_flight) { + index = (uint)atomic_inc_return(&ses->chan_seq); + index %= ses->chan_count; + } spin_unlock(&ses->chan_lock); return ses->chans[index].server; -- GitLab From 98c7ed29cd754ae7475dc7cb3f33399fda902729 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Apr 2024 11:30:16 -0500 Subject: [PATCH 2020/2290] smb3: missing lock when picking channel [ Upstream commit 8094a600245e9b28eb36a13036f202ad67c1f887 ] Coverity spotted a place where we should have been holding the channel lock when accessing the ses channel index. Addresses-Coverity: 1582039 ("Data race condition (MISSING_LOCK)") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/transport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index da2bef3b7ac27..3fdafb9297f13 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1079,9 +1079,11 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) index = (uint)atomic_inc_return(&ses->chan_seq); index %= ses->chan_count; } + + server = ses->chans[index].server; spin_unlock(&ses->chan_lock); - return ses->chans[index].server; + return server; } int -- GitLab From ee2b22d38826454e260e8df1fb49863611c2da2f Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Wed, 13 Mar 2024 17:28:09 +0800 Subject: [PATCH 2021/2290] pinctrl: pinctrl-aspeed-g6: Fix register offset for pinconf of GPIOR-T [ Upstream commit c10cd03d69403fa0f00be8631bd4cb4690440ebd ] The register offset to disable the internal pull-down of GPIOR~T is 0x630 instead of 0x620, as specified in the Ast2600 datasheet v15 The datasheet can download from the official Aspeed website. Fixes: 15711ba6ff19 ("pinctrl: aspeed-g6: Add AST2600 pinconf support") Reported-by: Delphine CC Chiu Signed-off-by: Billy Tsai Reviewed-by: Paul Menzel Reviewed-by: Andrew Jeffery Message-ID: <20240313092809.2596644-1-billy_tsai@aspeedtech.com> Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 34 +++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 80838dc54b3ab..7938741136a2c 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -43,7 +43,7 @@ #define SCU614 0x614 /* Disable GPIO Internal Pull-Down #1 */ #define SCU618 0x618 /* Disable GPIO Internal Pull-Down #2 */ #define SCU61C 0x61c /* Disable GPIO Internal Pull-Down #3 */ -#define SCU620 0x620 /* Disable GPIO Internal Pull-Down #4 */ +#define SCU630 0x630 /* Disable GPIO Internal Pull-Down #4 */ #define SCU634 0x634 /* Disable GPIO Internal Pull-Down #5 */ #define SCU638 0x638 /* Disable GPIO Internal Pull-Down #6 */ #define SCU690 0x690 /* Multi-function Pin Control #24 */ @@ -2494,38 +2494,38 @@ static struct aspeed_pin_config aspeed_g6_configs[] = { ASPEED_PULL_DOWN_PINCONF(D14, SCU61C, 0), /* GPIOS7 */ - ASPEED_PULL_DOWN_PINCONF(T24, SCU620, 23), + ASPEED_PULL_DOWN_PINCONF(T24, SCU630, 23), /* GPIOS6 */ - ASPEED_PULL_DOWN_PINCONF(P23, SCU620, 22), + ASPEED_PULL_DOWN_PINCONF(P23, SCU630, 22), /* GPIOS5 */ - ASPEED_PULL_DOWN_PINCONF(P24, SCU620, 21), + ASPEED_PULL_DOWN_PINCONF(P24, SCU630, 21), /* GPIOS4 */ - ASPEED_PULL_DOWN_PINCONF(R26, SCU620, 20), + ASPEED_PULL_DOWN_PINCONF(R26, SCU630, 20), /* GPIOS3*/ - ASPEED_PULL_DOWN_PINCONF(R24, SCU620, 19), + ASPEED_PULL_DOWN_PINCONF(R24, SCU630, 19), /* GPIOS2 */ - ASPEED_PULL_DOWN_PINCONF(T26, SCU620, 18), + ASPEED_PULL_DOWN_PINCONF(T26, SCU630, 18), /* GPIOS1 */ - ASPEED_PULL_DOWN_PINCONF(T25, SCU620, 17), + ASPEED_PULL_DOWN_PINCONF(T25, SCU630, 17), /* GPIOS0 */ - ASPEED_PULL_DOWN_PINCONF(R23, SCU620, 16), + ASPEED_PULL_DOWN_PINCONF(R23, SCU630, 16), /* GPIOR7 */ - ASPEED_PULL_DOWN_PINCONF(U26, SCU620, 15), + ASPEED_PULL_DOWN_PINCONF(U26, SCU630, 15), /* GPIOR6 */ - ASPEED_PULL_DOWN_PINCONF(W26, SCU620, 14), + ASPEED_PULL_DOWN_PINCONF(W26, SCU630, 14), /* GPIOR5 */ - ASPEED_PULL_DOWN_PINCONF(T23, SCU620, 13), + ASPEED_PULL_DOWN_PINCONF(T23, SCU630, 13), /* GPIOR4 */ - ASPEED_PULL_DOWN_PINCONF(U25, SCU620, 12), + ASPEED_PULL_DOWN_PINCONF(U25, SCU630, 12), /* GPIOR3*/ - ASPEED_PULL_DOWN_PINCONF(V26, SCU620, 11), + ASPEED_PULL_DOWN_PINCONF(V26, SCU630, 11), /* GPIOR2 */ - ASPEED_PULL_DOWN_PINCONF(V24, SCU620, 10), + ASPEED_PULL_DOWN_PINCONF(V24, SCU630, 10), /* GPIOR1 */ - ASPEED_PULL_DOWN_PINCONF(U24, SCU620, 9), + ASPEED_PULL_DOWN_PINCONF(U24, SCU630, 9), /* GPIOR0 */ - ASPEED_PULL_DOWN_PINCONF(V25, SCU620, 8), + ASPEED_PULL_DOWN_PINCONF(V25, SCU630, 8), /* GPIOX7 */ ASPEED_PULL_DOWN_PINCONF(AB10, SCU634, 31), -- GitLab From 8d93303fd026fbdbb8c0b9c0cdddc807e83cf911 Mon Sep 17 00:00:00 2001 From: Jan Dakinevich Date: Mon, 25 Mar 2024 14:30:58 +0300 Subject: [PATCH 2022/2290] pinctrl/meson: fix typo in PDM's pin name [ Upstream commit 368a90e651faeeb7049a876599cf2b0d74954796 ] Other pins have _a or _x suffix, but this one doesn't have any. Most likely this is a typo. Fixes: dabad1ff8561 ("pinctrl: meson: add pinctrl driver support for Meson-A1 SoC") Signed-off-by: Jan Dakinevich Reviewed-by: Neil Armstrong Message-ID: <20240325113058.248022-1-jan.dakinevich@salutedevices.com> Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/meson/pinctrl-meson-a1.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-a1.c b/drivers/pinctrl/meson/pinctrl-meson-a1.c index 79f5d753d7e1a..50a87d9618a8e 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-a1.c +++ b/drivers/pinctrl/meson/pinctrl-meson-a1.c @@ -250,7 +250,7 @@ static const unsigned int pdm_dclk_x_pins[] = { GPIOX_10 }; static const unsigned int pdm_din2_a_pins[] = { GPIOA_6 }; static const unsigned int pdm_din1_a_pins[] = { GPIOA_7 }; static const unsigned int pdm_din0_a_pins[] = { GPIOA_8 }; -static const unsigned int pdm_dclk_pins[] = { GPIOA_9 }; +static const unsigned int pdm_dclk_a_pins[] = { GPIOA_9 }; /* gen_clk */ static const unsigned int gen_clk_x_pins[] = { GPIOX_7 }; @@ -591,7 +591,7 @@ static struct meson_pmx_group meson_a1_periphs_groups[] = { GROUP(pdm_din2_a, 3), GROUP(pdm_din1_a, 3), GROUP(pdm_din0_a, 3), - GROUP(pdm_dclk, 3), + GROUP(pdm_dclk_a, 3), GROUP(pwm_c_a, 3), GROUP(pwm_b_a, 3), @@ -755,7 +755,7 @@ static const char * const spi_a_groups[] = { static const char * const pdm_groups[] = { "pdm_din0_x", "pdm_din1_x", "pdm_din2_x", "pdm_dclk_x", "pdm_din2_a", - "pdm_din1_a", "pdm_din0_a", "pdm_dclk", + "pdm_din1_a", "pdm_din0_a", "pdm_dclk_a", }; static const char * const gen_clk_groups[] = { -- GitLab From ac7d65795827dc0cf7662384ed27caf4066bd72e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Mar 2024 09:38:39 +0300 Subject: [PATCH 2023/2290] pinctrl: core: delete incorrect free in pinctrl_enable() [ Upstream commit 5038a66dad0199de60e5671603ea6623eb9e5c79 ] The "pctldev" struct is allocated in devm_pinctrl_register_and_init(). It's a devm_ managed pointer that is freed by devm_pinctrl_dev_release(), so freeing it in pinctrl_enable() will lead to a double free. The devm_pinctrl_dev_release() function frees the pindescs and destroys the mutex as well. Fixes: 6118714275f0 ("pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable()") Signed-off-by: Dan Carpenter Message-ID: <578fbe56-44e9-487c-ae95-29b695650f7c@moroto.mountain> Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/core.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index f1962866bb814..1ef36a0a7dd20 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2098,13 +2098,7 @@ int pinctrl_enable(struct pinctrl_dev *pctldev) error = pinctrl_claim_hogs(pctldev); if (error) { - dev_err(pctldev->dev, "could not claim hogs: %i\n", - error); - pinctrl_free_pindescs(pctldev, pctldev->desc->pins, - pctldev->desc->npins); - mutex_destroy(&pctldev->mutex); - kfree(pctldev); - + dev_err(pctldev->dev, "could not claim hogs: %i\n", error); return error; } -- GitLab From 4880cc2233ca44107cf84d6be4c51e2a4eec432c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 27 Mar 2024 17:13:33 +0800 Subject: [PATCH 2024/2290] pinctrl: mediatek: paris: Fix PIN_CONFIG_INPUT_SCHMITT_ENABLE readback [ Upstream commit 08f66a8edd08f6f7cfa769c81634b29a2b123908 ] In the generic pin config library, readback of some options are handled differently compared to the setting of those options: the argument value is used to convey enable/disable of an option in the set path, but success or -EINVAL is used to convey if an option is enabled or disabled in the debugfs readback path. PIN_CONFIG_INPUT_SCHMITT_ENABLE is one such option. Fix the readback of the option in the mediatek-paris library, so that the debugfs dump is not showing "input schmitt enabled" for pins that don't have it enabled. Fixes: 1bea6afbc842 ("pinctrl: mediatek: Refine mtk_pinconf_get()") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Message-ID: <20240327091336.3434141-2-wenst@chromium.org> Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/pinctrl-paris.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index ad873bd051b68..3f2297ee2b765 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -188,6 +188,8 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, } err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_SMT, &ret); + if (!ret) + err = -EINVAL; break; case PIN_CONFIG_DRIVE_STRENGTH: if (!hw->soc->drive_get) -- GitLab From 8a6c8f2876409504d7b26bf43ff143fb86d70d0b Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 27 Mar 2024 17:13:34 +0800 Subject: [PATCH 2025/2290] pinctrl: mediatek: paris: Rework support for PIN_CONFIG_{INPUT,OUTPUT}_ENABLE [ Upstream commit c5d3b64c568a344e998830e0e94a7c04e372f89b ] There is a misinterpretation of some of the PIN_CONFIG_* options in this driver library. PIN_CONFIG_OUTPUT_ENABLE should refer to a buffer or switch in the output direction of the electrical path. The MediaTek hardware does not have such a thing. The driver incorrectly maps this option to the GPIO function's direction. Likewise, PIN_CONFIG_INPUT_ENABLE should refer to a buffer or switch in the input direction. The hardware does have such a mechanism, and is mapped to the IES bit. The driver however sets the direction in addition to the IES bit, which is incorrect. On readback, the IES bit isn't even considered. Ironically, the driver does not support readback for PIN_CONFIG_OUTPUT, while its readback of PIN_CONFIG_{INPUT,OUTPUT}_ENABLE is what it should be doing for PIN_CONFIG_OUTPUT. Rework support for these three options, so that PIN_CONFIG_OUTPUT_ENABLE is completely removed, PIN_CONFIG_INPUT_ENABLE is only linked to the IES bit, and PIN_CONFIG_OUTPUT is linked to the GPIO function's direction and output level. Fixes: 805250982bb5 ("pinctrl: mediatek: add pinctrl-paris that implements the vendor dt-bindings") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Message-ID: <20240327091336.3434141-3-wenst@chromium.org> Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/pinctrl-paris.c | 38 +++++++----------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index 3f2297ee2b765..ee72c6894a5d7 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -160,20 +160,21 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_SR, &ret); break; case PIN_CONFIG_INPUT_ENABLE: - case PIN_CONFIG_OUTPUT_ENABLE: + err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_IES, &ret); + if (!ret) + err = -EINVAL; + break; + case PIN_CONFIG_OUTPUT: err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DIR, &ret); if (err) break; - /* CONFIG Current direction return value - * ------------- ----------------- ---------------------- - * OUTPUT_ENABLE output 1 (= HW value) - * input 0 (= HW value) - * INPUT_ENABLE output 0 (= reverse HW value) - * input 1 (= reverse HW value) - */ - if (param == PIN_CONFIG_INPUT_ENABLE) - ret = !ret; + if (!ret) { + err = -EINVAL; + break; + } + + err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DO, &ret); break; case PIN_CONFIG_INPUT_SCHMITT_ENABLE: err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DIR, &ret); @@ -278,26 +279,9 @@ static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, break; err = hw->soc->bias_set_combo(hw, desc, 0, arg); break; - case PIN_CONFIG_OUTPUT_ENABLE: - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_SMT, - MTK_DISABLE); - /* Keep set direction to consider the case that a GPIO pin - * does not have SMT control - */ - if (err != -ENOTSUPP) - break; - - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DIR, - MTK_OUTPUT); - break; case PIN_CONFIG_INPUT_ENABLE: /* regard all non-zero value as enable */ err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_IES, !!arg); - if (err) - break; - - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DIR, - MTK_INPUT); break; case PIN_CONFIG_SLEW_RATE: /* regard all non-zero value as enable */ -- GitLab From 5720cd526404f21ec8e19034836d6fa7dc7da062 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 15 Feb 2024 14:57:30 -0500 Subject: [PATCH 2026/2290] sunrpc: add a struct rpc_stats arg to rpc_create_args [ Upstream commit 2057a48d0dd00c6a2a94ded7df2bf1d3f2a4a0da ] We want to be able to have our rpc stats handled in a per network namespace manner, so add an option to rpc_create_args to specify a different rpc_stats struct instead of using the one on the rpc_program. Signed-off-by: Josef Bacik Signed-off-by: Trond Myklebust Stable-dep-of: 24457f1be29f ("nfs: Handle error of rpc_proc_register() in nfs_net_init().") Signed-off-by: Sasha Levin --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c794b0ce4e782..809c23120d548 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -131,6 +131,7 @@ struct rpc_create_args { const char *servername; const char *nodename; const struct rpc_program *program; + struct rpc_stat *stats; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 61e5c77462e94..b774028e4aa8f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -398,7 +398,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, clnt->cl_maxproc = version->nrprocs; clnt->cl_prog = args->prognumber ? : program->number; clnt->cl_vers = version->number; - clnt->cl_stats = program->stats; + clnt->cl_stats = args->stats ? : program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects); err = -ENOMEM; @@ -677,6 +677,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt) .version = clnt->cl_vers, .authflavor = clnt->cl_auth->au_flavor, .cred = clnt->cl_cred, + .stats = clnt->cl_stats, }; return __rpc_clone_client(&args, clnt); } @@ -699,6 +700,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) .version = clnt->cl_vers, .authflavor = flavor, .cred = clnt->cl_cred, + .stats = clnt->cl_stats, }; return __rpc_clone_client(&args, clnt); } @@ -1044,6 +1046,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .version = vers, .authflavor = old->cl_auth->au_flavor, .cred = old->cl_cred, + .stats = old->cl_stats, }; struct rpc_clnt *clnt; int err; -- GitLab From 9dd86e9d34b1078dcd647220e96a205028bf4e6f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 15 Feb 2024 14:57:31 -0500 Subject: [PATCH 2027/2290] nfs: expose /proc/net/sunrpc/nfs in net namespaces [ Upstream commit d47151b79e3220e72ae323b8b8e9d6da20dc884e ] We're using nfs mounts inside of containers in production and noticed that the nfs stats are not exposed in /proc. This is a problem for us as we use these stats for monitoring, and have to do this awkward bind mount from the main host into the container in order to get to these states. Add the rpc_proc_register call to the pernet operations entry and exit points so these stats can be exposed inside of network namespaces. Signed-off-by: Josef Bacik Signed-off-by: Trond Myklebust Stable-dep-of: 24457f1be29f ("nfs: Handle error of rpc_proc_register() in nfs_net_init().") Signed-off-by: Sasha Levin --- fs/nfs/inode.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e0c1fb98f907a..3cdb8daa6d6a9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2334,11 +2334,13 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { nfs_clients_init(net); + rpc_proc_register(net, &nfs_rpcstat); return nfs_fs_proc_net_init(net); } static void nfs_net_exit(struct net *net) { + rpc_proc_unregister(net, "nfs"); nfs_fs_proc_net_exit(net); nfs_clients_exit(net); } @@ -2393,15 +2395,12 @@ static int __init init_nfs_fs(void) if (err) goto out1; - rpc_proc_register(&init_net, &nfs_rpcstat); - err = register_nfs_fs(); if (err) goto out0; return 0; out0: - rpc_proc_unregister(&init_net, "nfs"); nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2431,7 +2430,6 @@ static void __exit exit_nfs_fs(void) nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); unregister_pernet_subsys(&nfs_net_ops); - rpc_proc_unregister(&init_net, "nfs"); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); -- GitLab From 2b7f2d663a965943e8820b6a40cc2abe2eee7431 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 15 Feb 2024 14:57:32 -0500 Subject: [PATCH 2028/2290] nfs: make the rpc_stat per net namespace [ Upstream commit 1548036ef1204df65ca5a16e8b199c858cb80075 ] Now that we're exposing the rpc stats on a per-network namespace basis, move this struct into struct nfs_net and use that to make sure only the per-network namespace stats are exposed. Signed-off-by: Josef Bacik Signed-off-by: Trond Myklebust Stable-dep-of: 24457f1be29f ("nfs: Handle error of rpc_proc_register() in nfs_net_init().") Signed-off-by: Sasha Levin --- fs/nfs/client.c | 5 ++++- fs/nfs/inode.c | 4 +++- fs/nfs/internal.h | 2 -- fs/nfs/netns.h | 2 ++ 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f50e025ae4064..755256875052f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -73,7 +73,6 @@ const struct rpc_program nfs_program = { .number = NFS_PROGRAM, .nrvers = ARRAY_SIZE(nfs_version), .version = nfs_version, - .stats = &nfs_rpcstat, .pipe_dir_name = NFS_PIPE_DIRNAME, }; @@ -496,6 +495,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, const struct nfs_client_initdata *cl_init, rpc_authflavor_t flavor) { + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { .net = clp->cl_net, @@ -507,6 +507,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, .servername = clp->cl_hostname, .nodename = cl_init->nodename, .program = &nfs_program, + .stats = &nn->rpcstats, .version = clp->rpc_ops->version, .authflavor = flavor, .cred = cl_init->cred, @@ -1142,6 +1143,8 @@ void nfs_clients_init(struct net *net) #endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); + memset(&nn->rpcstats, 0, sizeof(nn->rpcstats)); + nn->rpcstats.program = &nfs_program; nfs_netns_sysfs_setup(nn, net); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3cdb8daa6d6a9..fae1e009cd6cf 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2333,8 +2333,10 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); + nfs_clients_init(net); - rpc_proc_register(net, &nfs_rpcstat); + rpc_proc_register(net, &nn->rpcstats); return nfs_fs_proc_net_init(net); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 35a8ae46b6c34..b3b801e7c4bc5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -443,8 +443,6 @@ int nfs_try_get_tree(struct fs_context *); int nfs_get_tree_common(struct fs_context *); void nfs_kill_super(struct super_block *); -extern struct rpc_stat nfs_rpcstat; - extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern bool nfs_sb_active(struct super_block *sb); diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index c8374f74dce11..a68b21603ea9a 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -9,6 +9,7 @@ #include #include #include +#include struct bl_dev_msg { int32_t status; @@ -34,6 +35,7 @@ struct nfs_net { struct nfs_netns_client *nfs_client; spinlock_t nfs_client_lock; ktime_t boot_time; + struct rpc_stat rpcstats; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_nfsfs; #endif -- GitLab From 8ae63bd858691bee0e2a92571f2fbb36a4d86d65 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 4 Apr 2024 15:12:00 -0700 Subject: [PATCH 2029/2290] nfs: Handle error of rpc_proc_register() in nfs_net_init(). [ Upstream commit 24457f1be29f1e7042e50a7749f5c2dde8c433c8 ] syzkaller reported a warning [0] triggered while destroying immature netns. rpc_proc_register() was called in init_nfs_fs(), but its error has been ignored since at least the initial commit 1da177e4c3f4 ("Linux-2.6.12-rc2"). Recently, commit d47151b79e32 ("nfs: expose /proc/net/sunrpc/nfs in net namespaces") converted the procfs to per-netns and made the problem more visible. Even when rpc_proc_register() fails, nfs_net_init() could succeed, and thus nfs_net_exit() will be called while destroying the netns. Then, remove_proc_entry() will be called for non-existing proc directory and trigger the warning below. Let's handle the error of rpc_proc_register() properly in nfs_net_init(). [0]: name 'nfs' WARNING: CPU: 1 PID: 1710 at fs/proc/generic.c:711 remove_proc_entry+0x1bb/0x2d0 fs/proc/generic.c:711 Modules linked in: CPU: 1 PID: 1710 Comm: syz-executor.2 Not tainted 6.8.0-12822-gcd51db110a7e #12 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:remove_proc_entry+0x1bb/0x2d0 fs/proc/generic.c:711 Code: 41 5d 41 5e c3 e8 85 09 b5 ff 48 c7 c7 88 58 64 86 e8 09 0e 71 02 e8 74 09 b5 ff 4c 89 e6 48 c7 c7 de 1b 80 84 e8 c5 ad 97 ff <0f> 0b eb b1 e8 5c 09 b5 ff 48 c7 c7 88 58 64 86 e8 e0 0d 71 02 eb RSP: 0018:ffffc9000c6d7ce0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8880422b8b00 RCX: ffffffff8110503c RDX: ffff888030652f00 RSI: ffffffff81105045 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: ffffffff81bb62cb R12: ffffffff84807ffc R13: ffff88804ad6fcc0 R14: ffffffff84807ffc R15: ffffffff85741ff8 FS: 00007f30cfba8640(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff51afe8000 CR3: 000000005a60a005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: rpc_proc_unregister+0x64/0x70 net/sunrpc/stats.c:310 nfs_net_exit+0x1c/0x30 fs/nfs/inode.c:2438 ops_exit_list+0x62/0xb0 net/core/net_namespace.c:170 setup_net+0x46c/0x660 net/core/net_namespace.c:372 copy_net_ns+0x244/0x590 net/core/net_namespace.c:505 create_new_namespaces+0x2ed/0x770 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xae/0x160 kernel/nsproxy.c:228 ksys_unshare+0x342/0x760 kernel/fork.c:3322 __do_sys_unshare kernel/fork.c:3393 [inline] __se_sys_unshare kernel/fork.c:3391 [inline] __x64_sys_unshare+0x1f/0x30 kernel/fork.c:3391 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x46/0x4e RIP: 0033:0x7f30d0febe5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007f30cfba7cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f30d0febe5d RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000006c020600 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 000000000000000b R14: 00007f30d104c530 R15: 0000000000000000 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fae1e009cd6cf..cf8c3771e4bfb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2336,7 +2336,12 @@ static int nfs_net_init(struct net *net) struct nfs_net *nn = net_generic(net, nfs_net_id); nfs_clients_init(net); - rpc_proc_register(net, &nn->rpcstats); + + if (!rpc_proc_register(net, &nn->rpcstats)) { + nfs_clients_exit(net); + return -ENOMEM; + } + return nfs_fs_proc_net_init(net); } -- GitLab From 6322e368f0758f48ef057cbb14755b7650327b3c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Dec 2022 14:42:33 +0200 Subject: [PATCH 2030/2290] pinctrl: Introduce struct pinfunction and PINCTRL_PINFUNCTION() macro [ Upstream commit 443a0a0f0cf4f432c7af6654b7f2f920d411d379 ] There are many pin control drivers define their own data type for pin function representation which is the same or embed the same data as newly introduced one. Provide the data type and convenient macro for all pin control drivers. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Acked-by: Mika Westerberg Stable-dep-of: fed6d9a8e6a6 ("pinctrl: baytrail: Fix selecting gpio pinctrl state") Signed-off-by: Sasha Levin --- include/linux/pinctrl/pinctrl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 487117ccb1bc2..fb25085d09224 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -206,6 +206,26 @@ extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev, const char *pin_group, const unsigned **pins, unsigned *num_pins); +/** + * struct pinfunction - Description about a function + * @name: Name of the function + * @groups: An array of groups for this function + * @ngroups: Number of groups in @groups + */ +struct pinfunction { + const char *name; + const char * const *groups; + size_t ngroups; +}; + +/* Convenience macro to define a single named pinfunction */ +#define PINCTRL_PINFUNCTION(_name, _groups, _ngroups) \ +(struct pinfunction) { \ + .name = (_name), \ + .groups = (_groups), \ + .ngroups = (_ngroups), \ + } + #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_PINCTRL) extern struct pinctrl_dev *of_pinctrl_get(struct device_node *np); #else -- GitLab From 91a0840a3c0f5626fd7fca2859f4c0cc08fc5544 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Dec 2022 14:42:34 +0200 Subject: [PATCH 2031/2290] pinctrl: intel: Make use of struct pinfunction and PINCTRL_PINFUNCTION() [ Upstream commit 999b85bfd765f273208cd7348b2977d3c5ae0753 ] Since pin control provides a generic data type and a macro for the pin function definition, use them in the Intel driver. Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Stable-dep-of: fed6d9a8e6a6 ("pinctrl: baytrail: Fix selecting gpio pinctrl state") Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-intel.c | 6 +++--- drivers/pinctrl/intel/pinctrl-intel.h | 13 ++++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 954a412267402..8542053d4d6d0 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -362,7 +362,7 @@ static const char *intel_get_function_name(struct pinctrl_dev *pctldev, { struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); - return pctrl->soc->functions[function].name; + return pctrl->soc->functions[function].func.name; } static int intel_get_function_groups(struct pinctrl_dev *pctldev, @@ -372,8 +372,8 @@ static int intel_get_function_groups(struct pinctrl_dev *pctldev, { struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); - *groups = pctrl->soc->functions[function].groups; - *ngroups = pctrl->soc->functions[function].ngroups; + *groups = pctrl->soc->functions[function].func.groups; + *ngroups = pctrl->soc->functions[function].func.ngroups; return 0; } diff --git a/drivers/pinctrl/intel/pinctrl-intel.h b/drivers/pinctrl/intel/pinctrl-intel.h index 65628423bf639..46f5f7d1565fe 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.h +++ b/drivers/pinctrl/intel/pinctrl-intel.h @@ -36,11 +36,13 @@ struct intel_pingroup { /** * struct intel_function - Description about a function + * @func: Generic data of the pin function (name and groups of pins) * @name: Name of the function * @groups: An array of groups for this function * @ngroups: Number of groups in @groups */ struct intel_function { + struct pinfunction func; const char *name; const char * const *groups; size_t ngroups; @@ -158,11 +160,12 @@ struct intel_community { .modes = __builtin_choose_expr(__builtin_constant_p((m)), NULL, (m)), \ } -#define FUNCTION(n, g) \ - { \ - .name = (n), \ - .groups = (g), \ - .ngroups = ARRAY_SIZE((g)), \ +#define FUNCTION(n, g) \ + { \ + .func = PINCTRL_PINFUNCTION((n), (g), ARRAY_SIZE(g)), \ + .name = (n), \ + .groups = (g), \ + .ngroups = ARRAY_SIZE((g)), \ } /** -- GitLab From 39460d43df7fa95e8e38edda38db123f0bc2c548 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 7 Apr 2024 19:50:48 +0200 Subject: [PATCH 2032/2290] pinctrl: baytrail: Fix selecting gpio pinctrl state [ Upstream commit fed6d9a8e6a60ecf6506d0ea004040fbaa109927 ] For all the "score" pin-groups all the intel_pingroup-s to select the non GPIO function are re-used for byt_score_gpio_groups[]. But this is incorrect since a pin-group includes the mode setting, which for the non GPIO functions generally is 1, where as to select the GPIO function mode must be set to 0. So the GPIO function needs separate intel_pingroup-s with their own mode value of 0. Add a new PIN_GROUP_GPIO macro which adds a foo_gpio entry to each pin-group defined this way and update byt_score_gpio_groups[] to point to the new foo_gpio entries. The "sus" usb_oc_grp usb_ulpi_grp and pcu_spi_grp pin-groups are special because these have a non 0 mode value to select the GPIO functions and these already have matching foo_gpio pin-groups, leave these are unchanged. The pmu_clk "sus" groups added in commit 2f46d7f7e959 ("pinctrl: baytrail: Add pinconf group + function for the pmu_clk") do need to use the new PIN_GROUP_GPIO macro. Fixes: 2f46d7f7e959 ("pinctrl: baytrail: Add pinconf group + function for the pmu_clk") Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-baytrail.c | 74 ++++++++++++------------ drivers/pinctrl/intel/pinctrl-intel.h | 4 ++ 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 67db79f380510..a0b7b16cb4de3 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -276,33 +276,33 @@ static const unsigned int byt_score_plt_clk5_pins[] = { 101 }; static const unsigned int byt_score_smbus_pins[] = { 51, 52, 53 }; static const struct intel_pingroup byt_score_groups[] = { - PIN_GROUP("uart1_grp", byt_score_uart1_pins, 1), - PIN_GROUP("uart2_grp", byt_score_uart2_pins, 1), - PIN_GROUP("pwm0_grp", byt_score_pwm0_pins, 1), - PIN_GROUP("pwm1_grp", byt_score_pwm1_pins, 1), - PIN_GROUP("ssp2_grp", byt_score_ssp2_pins, 1), - PIN_GROUP("sio_spi_grp", byt_score_sio_spi_pins, 1), - PIN_GROUP("i2c5_grp", byt_score_i2c5_pins, 1), - PIN_GROUP("i2c6_grp", byt_score_i2c6_pins, 1), - PIN_GROUP("i2c4_grp", byt_score_i2c4_pins, 1), - PIN_GROUP("i2c3_grp", byt_score_i2c3_pins, 1), - PIN_GROUP("i2c2_grp", byt_score_i2c2_pins, 1), - PIN_GROUP("i2c1_grp", byt_score_i2c1_pins, 1), - PIN_GROUP("i2c0_grp", byt_score_i2c0_pins, 1), - PIN_GROUP("ssp0_grp", byt_score_ssp0_pins, 1), - PIN_GROUP("ssp1_grp", byt_score_ssp1_pins, 1), - PIN_GROUP("sdcard_grp", byt_score_sdcard_pins, byt_score_sdcard_mux_values), - PIN_GROUP("sdio_grp", byt_score_sdio_pins, 1), - PIN_GROUP("emmc_grp", byt_score_emmc_pins, 1), - PIN_GROUP("lpc_grp", byt_score_ilb_lpc_pins, 1), - PIN_GROUP("sata_grp", byt_score_sata_pins, 1), - PIN_GROUP("plt_clk0_grp", byt_score_plt_clk0_pins, 1), - PIN_GROUP("plt_clk1_grp", byt_score_plt_clk1_pins, 1), - PIN_GROUP("plt_clk2_grp", byt_score_plt_clk2_pins, 1), - PIN_GROUP("plt_clk3_grp", byt_score_plt_clk3_pins, 1), - PIN_GROUP("plt_clk4_grp", byt_score_plt_clk4_pins, 1), - PIN_GROUP("plt_clk5_grp", byt_score_plt_clk5_pins, 1), - PIN_GROUP("smbus_grp", byt_score_smbus_pins, 1), + PIN_GROUP_GPIO("uart1_grp", byt_score_uart1_pins, 1), + PIN_GROUP_GPIO("uart2_grp", byt_score_uart2_pins, 1), + PIN_GROUP_GPIO("pwm0_grp", byt_score_pwm0_pins, 1), + PIN_GROUP_GPIO("pwm1_grp", byt_score_pwm1_pins, 1), + PIN_GROUP_GPIO("ssp2_grp", byt_score_ssp2_pins, 1), + PIN_GROUP_GPIO("sio_spi_grp", byt_score_sio_spi_pins, 1), + PIN_GROUP_GPIO("i2c5_grp", byt_score_i2c5_pins, 1), + PIN_GROUP_GPIO("i2c6_grp", byt_score_i2c6_pins, 1), + PIN_GROUP_GPIO("i2c4_grp", byt_score_i2c4_pins, 1), + PIN_GROUP_GPIO("i2c3_grp", byt_score_i2c3_pins, 1), + PIN_GROUP_GPIO("i2c2_grp", byt_score_i2c2_pins, 1), + PIN_GROUP_GPIO("i2c1_grp", byt_score_i2c1_pins, 1), + PIN_GROUP_GPIO("i2c0_grp", byt_score_i2c0_pins, 1), + PIN_GROUP_GPIO("ssp0_grp", byt_score_ssp0_pins, 1), + PIN_GROUP_GPIO("ssp1_grp", byt_score_ssp1_pins, 1), + PIN_GROUP_GPIO("sdcard_grp", byt_score_sdcard_pins, byt_score_sdcard_mux_values), + PIN_GROUP_GPIO("sdio_grp", byt_score_sdio_pins, 1), + PIN_GROUP_GPIO("emmc_grp", byt_score_emmc_pins, 1), + PIN_GROUP_GPIO("lpc_grp", byt_score_ilb_lpc_pins, 1), + PIN_GROUP_GPIO("sata_grp", byt_score_sata_pins, 1), + PIN_GROUP_GPIO("plt_clk0_grp", byt_score_plt_clk0_pins, 1), + PIN_GROUP_GPIO("plt_clk1_grp", byt_score_plt_clk1_pins, 1), + PIN_GROUP_GPIO("plt_clk2_grp", byt_score_plt_clk2_pins, 1), + PIN_GROUP_GPIO("plt_clk3_grp", byt_score_plt_clk3_pins, 1), + PIN_GROUP_GPIO("plt_clk4_grp", byt_score_plt_clk4_pins, 1), + PIN_GROUP_GPIO("plt_clk5_grp", byt_score_plt_clk5_pins, 1), + PIN_GROUP_GPIO("smbus_grp", byt_score_smbus_pins, 1), }; static const char * const byt_score_uart_groups[] = { @@ -330,12 +330,14 @@ static const char * const byt_score_plt_clk_groups[] = { }; static const char * const byt_score_smbus_groups[] = { "smbus_grp" }; static const char * const byt_score_gpio_groups[] = { - "uart1_grp", "uart2_grp", "pwm0_grp", "pwm1_grp", "ssp0_grp", - "ssp1_grp", "ssp2_grp", "sio_spi_grp", "i2c0_grp", "i2c1_grp", - "i2c2_grp", "i2c3_grp", "i2c4_grp", "i2c5_grp", "i2c6_grp", - "sdcard_grp", "sdio_grp", "emmc_grp", "lpc_grp", "sata_grp", - "plt_clk0_grp", "plt_clk1_grp", "plt_clk2_grp", "plt_clk3_grp", - "plt_clk4_grp", "plt_clk5_grp", "smbus_grp", + "uart1_grp_gpio", "uart2_grp_gpio", "pwm0_grp_gpio", + "pwm1_grp_gpio", "ssp0_grp_gpio", "ssp1_grp_gpio", "ssp2_grp_gpio", + "sio_spi_grp_gpio", "i2c0_grp_gpio", "i2c1_grp_gpio", "i2c2_grp_gpio", + "i2c3_grp_gpio", "i2c4_grp_gpio", "i2c5_grp_gpio", "i2c6_grp_gpio", + "sdcard_grp_gpio", "sdio_grp_gpio", "emmc_grp_gpio", "lpc_grp_gpio", + "sata_grp_gpio", "plt_clk0_grp_gpio", "plt_clk1_grp_gpio", + "plt_clk2_grp_gpio", "plt_clk3_grp_gpio", "plt_clk4_grp_gpio", + "plt_clk5_grp_gpio", "smbus_grp_gpio", }; static const struct intel_function byt_score_functions[] = { @@ -454,8 +456,8 @@ static const struct intel_pingroup byt_sus_groups[] = { PIN_GROUP("usb_oc_grp_gpio", byt_sus_usb_over_current_pins, byt_sus_usb_over_current_gpio_mode_values), PIN_GROUP("usb_ulpi_grp_gpio", byt_sus_usb_ulpi_pins, byt_sus_usb_ulpi_gpio_mode_values), PIN_GROUP("pcu_spi_grp_gpio", byt_sus_pcu_spi_pins, byt_sus_pcu_spi_gpio_mode_values), - PIN_GROUP("pmu_clk1_grp", byt_sus_pmu_clk1_pins, 1), - PIN_GROUP("pmu_clk2_grp", byt_sus_pmu_clk2_pins, 1), + PIN_GROUP_GPIO("pmu_clk1_grp", byt_sus_pmu_clk1_pins, 1), + PIN_GROUP_GPIO("pmu_clk2_grp", byt_sus_pmu_clk2_pins, 1), }; static const char * const byt_sus_usb_groups[] = { @@ -467,7 +469,7 @@ static const char * const byt_sus_pmu_clk_groups[] = { }; static const char * const byt_sus_gpio_groups[] = { "usb_oc_grp_gpio", "usb_ulpi_grp_gpio", "pcu_spi_grp_gpio", - "pmu_clk1_grp", "pmu_clk2_grp", + "pmu_clk1_grp_gpio", "pmu_clk2_grp_gpio", }; static const struct intel_function byt_sus_functions[] = { diff --git a/drivers/pinctrl/intel/pinctrl-intel.h b/drivers/pinctrl/intel/pinctrl-intel.h index 46f5f7d1565fe..0d45063435ebc 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.h +++ b/drivers/pinctrl/intel/pinctrl-intel.h @@ -160,6 +160,10 @@ struct intel_community { .modes = __builtin_choose_expr(__builtin_constant_p((m)), NULL, (m)), \ } +#define PIN_GROUP_GPIO(n, p, m) \ + PIN_GROUP(n, p, m), \ + PIN_GROUP(n "_gpio", p, 0) + #define FUNCTION(n, g) \ { \ .func = PINCTRL_PINFUNCTION((n), (g), ARRAY_SIZE(g)), \ -- GitLab From 030017a83d64e4c2291f865619f9a6f9dadca1d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:27 +0200 Subject: [PATCH 2033/2290] power: rt9455: hide unused rt9455_boost_voltage_values [ Upstream commit 452d8950db3e839aba1bb13bc5378f4bac11fa04 ] The rt9455_boost_voltage_values[] array is only used when USB PHY support is enabled, causing a W=1 warning otherwise: drivers/power/supply/rt9455_charger.c:200:18: error: 'rt9455_boost_voltage_values' defined but not used [-Werror=unused-const-variable=] Enclose the definition in the same #ifdef as the references to it. Fixes: e86d69dd786e ("power_supply: Add support for Richtek RT9455 battery charger") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240403080702.3509288-10-arnd@kernel.org Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/rt9455_charger.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c index 72962286d7045..c5597967a0699 100644 --- a/drivers/power/supply/rt9455_charger.c +++ b/drivers/power/supply/rt9455_charger.c @@ -193,6 +193,7 @@ static const int rt9455_voreg_values[] = { 4450000, 4450000, 4450000, 4450000, 4450000, 4450000, 4450000, 4450000 }; +#if IS_ENABLED(CONFIG_USB_PHY) /* * When the charger is in boost mode, REG02[7:2] represent boost output * voltage. @@ -208,6 +209,7 @@ static const int rt9455_boost_voltage_values[] = { 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, }; +#endif /* REG07[3:0] (VMREG) in uV */ static const int rt9455_vmreg_values[] = { -- GitLab From 22975a1eb6af9edf715ec2b8f439786f36566d4a Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 10 Apr 2024 10:44:05 +0200 Subject: [PATCH 2034/2290] power: supply: mt6360_charger: Fix of_match for usb-otg-vbus regulator [ Upstream commit 1e0fb113646182e073539db96016b00cfeb18ecc ] The of_match shall correspond to the name of the regulator subnode, or the deprecated `regulator-compatible` property must be used: failing to do so, the regulator won't probe (and the driver will as well not probe). Since the devicetree binding for this driver is actually correct and wants DTs to use the "usb-otg-vbus-regulator" subnode name, fix this driver by aligning the `of_match` string to what the DT binding wants. Fixes: 0402e8ebb8b8 ("power: supply: mt6360_charger: add MT6360 charger support") Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240410084405.1389378-1-angelogioacchino.delregno@collabora.com Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/mt6360_charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c index f1248faf59058..383bf19819dfb 100644 --- a/drivers/power/supply/mt6360_charger.c +++ b/drivers/power/supply/mt6360_charger.c @@ -591,7 +591,7 @@ static const struct regulator_ops mt6360_chg_otg_ops = { }; static const struct regulator_desc mt6360_otg_rdesc = { - .of_match = "usb-otg-vbus", + .of_match = "usb-otg-vbus-regulator", .name = "usb-otg-vbus", .ops = &mt6360_chg_otg_ops, .owner = THIS_MODULE, -- GitLab From 518d5ddafeb084d6d9b1773ed85164300037d0e6 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Mon, 15 Apr 2024 18:53:28 +0800 Subject: [PATCH 2035/2290] pinctrl: devicetree: fix refcount leak in pinctrl_dt_to_map() [ Upstream commit a0cedbcc8852d6c77b00634b81e41f17f29d9404 ] If we fail to allocate propname buffer, we need to drop the reference count we just took. Because the pinctrl_dt_free_maps() includes the droping operation, here we call it directly. Fixes: 91d5c5060ee2 ("pinctrl: devicetree: fix null pointer dereferencing in pinctrl_dt_to_map") Suggested-by: Dan Carpenter Signed-off-by: Zeng Heng Reviewed-by: Dan Carpenter Message-ID: <20240415105328.3651441-1-zengheng4@huawei.com> Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/devicetree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index 6e0a40962f384..5ee746cb81f59 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -220,14 +220,16 @@ int pinctrl_dt_to_map(struct pinctrl *p, struct pinctrl_dev *pctldev) for (state = 0; ; state++) { /* Retrieve the pinctrl-* property */ propname = kasprintf(GFP_KERNEL, "pinctrl-%d", state); - if (!propname) - return -ENOMEM; + if (!propname) { + ret = -ENOMEM; + goto err; + } prop = of_find_property(np, propname, &size); kfree(propname); if (!prop) { if (state == 0) { - of_node_put(np); - return -ENODEV; + ret = -ENODEV; + goto err; } break; } -- GitLab From ed6877bce6a699e0c6ac3a2ae834d640fbf00fd3 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 9 Apr 2024 16:44:38 +0200 Subject: [PATCH 2036/2290] regulator: mt6360: De-capitalize devicetree regulator subnodes [ Upstream commit d3cf8a17498dd9104c04ad28eeac3ef3339f9f9f ] The MT6360 regulator binding, the example in the MT6360 mfd binding, and the devicetree users of those bindings are rightfully declaring MT6360 regulator subnodes with non-capital names, and luckily without using the deprecated regulator-compatible property. With this driver declaring capitalized BUCKx/LDOx as of_match string for the node names, obviously no regulator gets probed: fix that by changing the MT6360_REGULATOR_DESC macro to add a "match" parameter which gets assigned to the of_match. Fixes: d321571d5e4c ("regulator: mt6360: Add support for MT6360 regulator") Signed-off-by: AngeloGioacchino Del Regno Link: https://msgid.link/r/20240409144438.410060-1-angelogioacchino.delregno@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/mt6360-regulator.c | 32 +++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/regulator/mt6360-regulator.c b/drivers/regulator/mt6360-regulator.c index 4d34be94d1663..fc464a4450dc5 100644 --- a/drivers/regulator/mt6360-regulator.c +++ b/drivers/regulator/mt6360-regulator.c @@ -319,15 +319,15 @@ static unsigned int mt6360_regulator_of_map_mode(unsigned int hw_mode) } } -#define MT6360_REGULATOR_DESC(_name, _sname, ereg, emask, vreg, vmask, \ - mreg, mmask, streg, stmask, vranges, \ - vcnts, offon_delay, irq_tbls) \ +#define MT6360_REGULATOR_DESC(match, _name, _sname, ereg, emask, vreg, \ + vmask, mreg, mmask, streg, stmask, \ + vranges, vcnts, offon_delay, irq_tbls) \ { \ .desc = { \ .name = #_name, \ .supply_name = #_sname, \ .id = MT6360_REGULATOR_##_name, \ - .of_match = of_match_ptr(#_name), \ + .of_match = of_match_ptr(match), \ .regulators_node = of_match_ptr("regulator"), \ .of_map_mode = mt6360_regulator_of_map_mode, \ .owner = THIS_MODULE, \ @@ -351,21 +351,29 @@ static unsigned int mt6360_regulator_of_map_mode(unsigned int hw_mode) } static const struct mt6360_regulator_desc mt6360_regulator_descs[] = { - MT6360_REGULATOR_DESC(BUCK1, BUCK1_VIN, 0x117, 0x40, 0x110, 0xff, 0x117, 0x30, 0x117, 0x04, + MT6360_REGULATOR_DESC("buck1", BUCK1, BUCK1_VIN, + 0x117, 0x40, 0x110, 0xff, 0x117, 0x30, 0x117, 0x04, buck_vout_ranges, 256, 0, buck1_irq_tbls), - MT6360_REGULATOR_DESC(BUCK2, BUCK2_VIN, 0x127, 0x40, 0x120, 0xff, 0x127, 0x30, 0x127, 0x04, + MT6360_REGULATOR_DESC("buck2", BUCK2, BUCK2_VIN, + 0x127, 0x40, 0x120, 0xff, 0x127, 0x30, 0x127, 0x04, buck_vout_ranges, 256, 0, buck2_irq_tbls), - MT6360_REGULATOR_DESC(LDO6, LDO_VIN3, 0x137, 0x40, 0x13B, 0xff, 0x137, 0x30, 0x137, 0x04, + MT6360_REGULATOR_DESC("ldo6", LDO6, LDO_VIN3, + 0x137, 0x40, 0x13B, 0xff, 0x137, 0x30, 0x137, 0x04, ldo_vout_ranges1, 256, 0, ldo6_irq_tbls), - MT6360_REGULATOR_DESC(LDO7, LDO_VIN3, 0x131, 0x40, 0x135, 0xff, 0x131, 0x30, 0x131, 0x04, + MT6360_REGULATOR_DESC("ldo7", LDO7, LDO_VIN3, + 0x131, 0x40, 0x135, 0xff, 0x131, 0x30, 0x131, 0x04, ldo_vout_ranges1, 256, 0, ldo7_irq_tbls), - MT6360_REGULATOR_DESC(LDO1, LDO_VIN1, 0x217, 0x40, 0x21B, 0xff, 0x217, 0x30, 0x217, 0x04, + MT6360_REGULATOR_DESC("ldo1", LDO1, LDO_VIN1, + 0x217, 0x40, 0x21B, 0xff, 0x217, 0x30, 0x217, 0x04, ldo_vout_ranges2, 256, 0, ldo1_irq_tbls), - MT6360_REGULATOR_DESC(LDO2, LDO_VIN1, 0x211, 0x40, 0x215, 0xff, 0x211, 0x30, 0x211, 0x04, + MT6360_REGULATOR_DESC("ldo2", LDO2, LDO_VIN1, + 0x211, 0x40, 0x215, 0xff, 0x211, 0x30, 0x211, 0x04, ldo_vout_ranges2, 256, 0, ldo2_irq_tbls), - MT6360_REGULATOR_DESC(LDO3, LDO_VIN1, 0x205, 0x40, 0x209, 0xff, 0x205, 0x30, 0x205, 0x04, + MT6360_REGULATOR_DESC("ldo3", LDO3, LDO_VIN1, + 0x205, 0x40, 0x209, 0xff, 0x205, 0x30, 0x205, 0x04, ldo_vout_ranges2, 256, 100, ldo3_irq_tbls), - MT6360_REGULATOR_DESC(LDO5, LDO_VIN2, 0x20B, 0x40, 0x20F, 0x7f, 0x20B, 0x30, 0x20B, 0x04, + MT6360_REGULATOR_DESC("ldo5", LDO5, LDO_VIN2, + 0x20B, 0x40, 0x20F, 0x7f, 0x20B, 0x30, 0x20B, 0x04, ldo_vout_ranges3, 128, 100, ldo5_irq_tbls), }; -- GitLab From 2ec0e92dc5dc474b83eb2a8e1f6d925150e13478 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 22 Apr 2024 09:38:33 +0300 Subject: [PATCH 2037/2290] regulator: change stubbed devm_regulator_get_enable to return Ok [ Upstream commit 96e20adc43c4f81e9163a5188cee75a6dd393e09 ] The devm_regulator_get_enable() should be a 'call and forget' API, meaning, when it is used to enable the regulators, the API does not provide a handle to do any further control of the regulators. It gives no real benefit to return an error from the stub if CONFIG_REGULATOR is not set. On the contrary, returning and error is causing problems to drivers when hardware is such it works out just fine with no regulator control. Returning an error forces drivers to specifically handle the case where CONFIG_REGULATOR is not set, making the mere existence of the stub questionalble. Furthermore, the stub of the regulator_enable() seems to be returning Ok. Change the stub implementation for the devm_regulator_get_enable() to return Ok so drivers do not separately handle the case where the CONFIG_REGULATOR is not set. Signed-off-by: Matti Vaittinen Reported-by: Aleksander Mazur Suggested-by: Guenter Roeck Fixes: da279e6965b3 ("regulator: Add devm helpers for get and enable") Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/ZiYF6d1V1vSPcsJS@drtxq0yyyyyyyyyyyyyby-3.rev.dnainternet.fi Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index ee3b4a0146119..eea165685588a 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -361,7 +361,7 @@ devm_regulator_get_exclusive(struct device *dev, const char *id) static inline int devm_regulator_get_enable(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline int devm_regulator_get_enable_optional(struct device *dev, -- GitLab From a4a645d96b7496425b466297f2691756cfe0052b Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 23 Apr 2024 14:38:28 +0300 Subject: [PATCH 2038/2290] regulator: change devm_regulator_get_enable_optional() stub to return Ok [ Upstream commit ff33132605c1a0acea59e4c523cb7c6fabe856b2 ] The devm_regulator_get_enable_optional() should be a 'call and forget' API, meaning, when it is used to enable the regulators, the API does not provide a handle to do any further control of the regulators. It gives no real benefit to return an error from the stub if CONFIG_REGULATOR is not set. On the contrary, returning an error is causing problems to drivers when hardware is such it works out just fine with no regulator control. Returning an error forces drivers to specifically handle the case where CONFIG_REGULATOR is not set, making the mere existence of the stub questionalble. Change the stub implementation for the devm_regulator_get_enable_optional() to return Ok so drivers do not separately handle the case where the CONFIG_REGULATOR is not set. Signed-off-by: Matti Vaittinen Fixes: da279e6965b3 ("regulator: Add devm helpers for get and enable") Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/ZiedtOE00Zozd3XO@fedora Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index eea165685588a..a9ca87a8f4e61 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -367,7 +367,7 @@ static inline int devm_regulator_get_enable(struct device *dev, const char *id) static inline int devm_regulator_get_enable_optional(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline struct regulator *__must_check -- GitLab From a81bcc6abf49da7bf534538e926bb7ece18abf6c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 4 Apr 2024 15:03:44 -0700 Subject: [PATCH 2039/2290] bpf, kconfig: Fix DEBUG_INFO_BTF_MODULES Kconfig definition [ Upstream commit 229087f6f1dc2d0c38feba805770f28529980ec0 ] Turns out that due to CONFIG_DEBUG_INFO_BTF_MODULES not having an explicitly specified "menu item name" in Kconfig, it's basically impossible to turn it off (see [0]). This patch fixes the issue by defining menu name for CONFIG_DEBUG_INFO_BTF_MODULES, which makes it actually adjustable and independent of CONFIG_DEBUG_INFO_BTF, in the sense that one can have DEBUG_INFO_BTF=y and DEBUG_INFO_BTF_MODULES=n. We still keep it as defaulting to Y, of course. Fixes: 5f9ae91f7c0d ("kbuild: Build kernel module BTFs if BTF is enabled and pahole supports it") Reported-by: Vincent Li Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/CAK3+h2xiFfzQ9UXf56nrRRP=p1+iUxGoEP5B+aq9MDT5jLXDSg@mail.gmail.com [0] Link: https://lore.kernel.org/bpf/20240404220344.3879270-1-andrii@kernel.org Signed-off-by: Sasha Levin --- lib/Kconfig.debug | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 95541b99aa8ea..b2dff19358938 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -343,7 +343,7 @@ config DEBUG_INFO_SPLIT Incompatible with older versions of ccache. config DEBUG_INFO_BTF - bool "Generate BTF typeinfo" + bool "Generate BTF type information" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST depends on BPF_SYSCALL @@ -374,7 +374,8 @@ config PAHOLE_HAS_LANG_EXCLUDE using DEBUG_INFO_BTF_MODULES. config DEBUG_INFO_BTF_MODULES - def_bool y + bool "Generate BTF type information for kernel modules" + default y depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF help Generate compact split BTF type information for kernel modules. -- GitLab From 39dc9e1442385d6e9be0b6491ee488dddd55ae27 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 4 Apr 2024 10:10:01 +0800 Subject: [PATCH 2040/2290] bpf, skmsg: Fix NULL pointer dereference in sk_psock_skb_ingress_enqueue [ Upstream commit 6648e613226e18897231ab5e42ffc29e63fa3365 ] Fix NULL pointer data-races in sk_psock_skb_ingress_enqueue() which syzbot reported [1]. [1] BUG: KCSAN: data-race in sk_psock_drop / sk_psock_skb_ingress_enqueue write to 0xffff88814b3278b8 of 8 bytes by task 10724 on cpu 1: sk_psock_stop_verdict net/core/skmsg.c:1257 [inline] sk_psock_drop+0x13e/0x1f0 net/core/skmsg.c:843 sk_psock_put include/linux/skmsg.h:459 [inline] sock_map_close+0x1a7/0x260 net/core/sock_map.c:1648 unix_release+0x4b/0x80 net/unix/af_unix.c:1048 __sock_release net/socket.c:659 [inline] sock_close+0x68/0x150 net/socket.c:1421 __fput+0x2c1/0x660 fs/file_table.c:422 __fput_sync+0x44/0x60 fs/file_table.c:507 __do_sys_close fs/open.c:1556 [inline] __se_sys_close+0x101/0x1b0 fs/open.c:1541 __x64_sys_close+0x1f/0x30 fs/open.c:1541 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 read to 0xffff88814b3278b8 of 8 bytes by task 10713 on cpu 0: sk_psock_data_ready include/linux/skmsg.h:464 [inline] sk_psock_skb_ingress_enqueue+0x32d/0x390 net/core/skmsg.c:555 sk_psock_skb_ingress_self+0x185/0x1e0 net/core/skmsg.c:606 sk_psock_verdict_apply net/core/skmsg.c:1008 [inline] sk_psock_verdict_recv+0x3e4/0x4a0 net/core/skmsg.c:1202 unix_read_skb net/unix/af_unix.c:2546 [inline] unix_stream_read_skb+0x9e/0xf0 net/unix/af_unix.c:2682 sk_psock_verdict_data_ready+0x77/0x220 net/core/skmsg.c:1223 unix_stream_sendmsg+0x527/0x860 net/unix/af_unix.c:2339 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x140/0x180 net/socket.c:745 ____sys_sendmsg+0x312/0x410 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x1e9/0x280 net/socket.c:2667 __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x46/0x50 net/socket.c:2674 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 value changed: 0xffffffff83d7feb0 -> 0x0000000000000000 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 10713 Comm: syz-executor.4 Tainted: G W 6.8.0-syzkaller-08951-gfe46a7dd189e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Prior to this, commit 4cd12c6065df ("bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready()") fixed one NULL pointer similarly due to no protection of saved_data_ready. Here is another different caller causing the same issue because of the same reason. So we should protect it with sk_callback_lock read lock because the writer side in the sk_psock_drop() uses "write_lock_bh(&sk->sk_callback_lock);". To avoid errors that could happen in future, I move those two pairs of lock into the sk_psock_data_ready(), which is suggested by John Fastabend. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: syzbot+aa8c8ec2538929f18f2d@syzkaller.appspotmail.com Signed-off-by: Jason Xing Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=aa8c8ec2538929f18f2d Link: https://lore.kernel.org/all/20240329134037.92124-1-kerneljasonxing@gmail.com Link: https://lore.kernel.org/bpf/20240404021001.94815-1-kerneljasonxing@gmail.com Signed-off-by: Sasha Levin --- include/linux/skmsg.h | 2 ++ net/core/skmsg.c | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index bd4418377bacf..062fe440f5d09 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -456,10 +456,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { + read_lock_bh(&sk->sk_callback_lock); if (psock->saved_data_ready) psock->saved_data_ready(sk); else sk->sk_data_ready(sk); + read_unlock_bh(&sk->sk_callback_lock); } static inline void psock_set_prog(struct bpf_prog **pprog, diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 39643f78cf782..8b0459a6b629f 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1217,11 +1217,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) { - read_lock_bh(&sk->sk_callback_lock); + if (psock) sk_psock_data_ready(sk, psock); - read_unlock_bh(&sk->sk_callback_lock); - } rcu_read_unlock(); } } -- GitLab From 264327b7163f1f0d547532266275b7e12cf317d1 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 10 Apr 2024 08:57:14 +0800 Subject: [PATCH 2041/2290] nvme: fix warn output about shared namespaces without CONFIG_NVME_MULTIPATH [ Upstream commit 0bc2e80b9be51712b19e919db5abc97a418f8292 ] Move the stray '.' that is currently at the end of the line after newline '\n' to before newline character which is the right position. Fixes: ce8d78616a6b ("nvme: warn about shared namespaces without CONFIG_NVME_MULTIPATH") Signed-off-by: Yi Zhang Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 20160683e8685..75b4dd8a55b03 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4284,7 +4284,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) "Found shared namespace %d, but multipathing not supported.\n", info->nsid); dev_warn_once(ctrl->device, - "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n."); + "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n"); } } -- GitLab From 77fe00227f2e2613299cafb71ce68d84b50712d6 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 12 Apr 2024 16:11:00 +0200 Subject: [PATCH 2042/2290] bpf: Fix a verifier verbose message [ Upstream commit 37eacb9f6e89fb399a79e952bc9c78eb3e16290e ] Long ago a map file descriptor in a pseudo ldimm64 instruction could only be present as an immediate value insn[0].imm, and thus this value was used in a verbose verifier message printed when the file descriptor wasn't valid. Since addition of BPF_PSEUDO_MAP_IDX_VALUE/BPF_PSEUDO_MAP_IDX the insn[0].imm field can also contain an index pointing to the file descriptor in the attr.fd_array array. However, if the file descriptor is invalid, the verifier still prints the verbose message containing value of insn[0].imm. Patch the verifier message to always print the actual file descriptor value. Fixes: 387544bfa291 ("bpf: Introduce fd_idx") Signed-off-by: Anton Protopopov Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240412141100.3562942-1-aspsk@isovalent.com Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 27cc6e3db5a86..18b3f429abe17 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13177,8 +13177,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) f = fdget(fd); map = __bpf_map_get(f); if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", - insn[0].imm); + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); return PTR_ERR(map); } -- GitLab From 1a8183ccf04428b7624734722a31fab55734089f Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 11 Oct 2022 17:22:04 +0800 Subject: [PATCH 2043/2290] spi: introduce new helpers with using modern naming [ Upstream commit b8d3b056a78dcc941fd1a117697ab2b956c2953f ] For using modern names host/target to instead of all the legacy names, I think it takes 3 steps: - step1: introduce new helpers with modern naming. - step2: switch to use these new helpers in all drivers. - step3: remove all legacy helpers and update all legacy names. This patch is for step1, it introduces new helpers with host/target naming for drivers using. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221011092204.950288-1-yangyingliang@huawei.com Signed-off-by: Mark Brown Stable-dep-of: 0064db9ce4aa ("spi: axi-spi-engine: fix version format string") Signed-off-by: Sasha Levin --- drivers/spi/spi.c | 11 ++++++++++ include/linux/spi/spi.h | 47 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 19688f333e0bc..4aa2e0928de9c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2774,6 +2774,17 @@ int spi_slave_abort(struct spi_device *spi) } EXPORT_SYMBOL_GPL(spi_slave_abort); +int spi_target_abort(struct spi_device *spi) +{ + struct spi_controller *ctlr = spi->controller; + + if (spi_controller_is_target(ctlr) && ctlr->target_abort) + return ctlr->target_abort(ctlr); + + return -ENOTSUPP; +} +EXPORT_SYMBOL_GPL(spi_target_abort); + static ssize_t slave_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 8e9054d9f6df0..6edf8a2962d4a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -376,6 +376,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @max_speed_hz: Highest supported transfer speed * @flags: other constraints relevant to this driver * @slave: indicates that this is an SPI slave controller + * @target: indicates that this is an SPI target controller * @devm_allocated: whether the allocation of this struct is devres-managed * @max_transfer_size: function that returns the max transfer size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. @@ -460,6 +461,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @mem_caps: controller capabilities for the handling of memory operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller + * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). @@ -556,8 +558,12 @@ struct spi_controller { /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; - /* Flag indicating this is an SPI slave controller */ - bool slave; + union { + /* Flag indicating this is an SPI slave controller */ + bool slave; + /* Flag indicating this is an SPI target controller */ + bool target; + }; /* * on some hardware transfer / message size may be constrained @@ -671,6 +677,7 @@ struct spi_controller { int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); int (*slave_abort)(struct spi_controller *ctlr); + int (*target_abort)(struct spi_controller *ctlr); /* * These hooks are for drivers that use a generic implementation @@ -748,6 +755,11 @@ static inline bool spi_controller_is_slave(struct spi_controller *ctlr) return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave; } +static inline bool spi_controller_is_target(struct spi_controller *ctlr) +{ + return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target; +} + /* PM calls that need to be issued by the driver */ extern int spi_controller_suspend(struct spi_controller *ctlr); extern int spi_controller_resume(struct spi_controller *ctlr); @@ -784,6 +796,21 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host, return __spi_alloc_controller(host, size, true); } +static inline struct spi_controller *spi_alloc_host(struct device *dev, + unsigned int size) +{ + return __spi_alloc_controller(dev, size, false); +} + +static inline struct spi_controller *spi_alloc_target(struct device *dev, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __spi_alloc_controller(dev, size, true); +} + struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool slave); @@ -803,6 +830,21 @@ static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, return __devm_spi_alloc_controller(dev, size, true); } +static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, + unsigned int size) +{ + return __devm_spi_alloc_controller(dev, size, false); +} + +static inline struct spi_controller *devm_spi_alloc_target(struct device *dev, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __devm_spi_alloc_controller(dev, size, true); +} + extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); @@ -1162,6 +1204,7 @@ static inline void spi_message_free(struct spi_message *m) extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_slave_abort(struct spi_device *spi); +extern int spi_target_abort(struct spi_device *spi); static inline size_t spi_max_message_size(struct spi_device *spi) -- GitLab From 4a680d305e58712b96979772e401b26c02a00a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 3 Mar 2023 18:19:22 +0100 Subject: [PATCH 2044/2290] spi: axi-spi-engine: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fc87abbef23413943457459e2c473ce607b4dd24 ] The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230303172041.2103336-9-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown Stable-dep-of: 0064db9ce4aa ("spi: axi-spi-engine: fix version format string") Signed-off-by: Sasha Levin --- drivers/spi/spi-axi-spi-engine.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index 80c3e38f5c1b5..c5a3a31891642 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -554,7 +554,7 @@ err_put_master: return ret; } -static int spi_engine_remove(struct platform_device *pdev) +static void spi_engine_remove(struct platform_device *pdev) { struct spi_master *master = spi_master_get(platform_get_drvdata(pdev)); struct spi_engine *spi_engine = spi_master_get_devdata(master); @@ -572,8 +572,6 @@ static int spi_engine_remove(struct platform_device *pdev) clk_disable_unprepare(spi_engine->ref_clk); clk_disable_unprepare(spi_engine->clk); - - return 0; } static const struct of_device_id spi_engine_match_table[] = { @@ -584,7 +582,7 @@ MODULE_DEVICE_TABLE(of, spi_engine_match_table); static struct platform_driver spi_engine_driver = { .probe = spi_engine_probe, - .remove = spi_engine_remove, + .remove_new = spi_engine_remove, .driver = { .name = "spi-engine", .of_match_table = spi_engine_match_table, -- GitLab From d8309051f71b84fa8771a4adfa155c560cf52876 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 28 Jul 2023 17:32:03 +0800 Subject: [PATCH 2045/2290] spi: spi-axi-spi-engine: switch to use modern name [ Upstream commit 9d5920b37ab4a970f658a6a30b54cc6d6a7d2d3d ] Change legacy name master to modern name host or controller. No functional changed. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20230728093221.3312026-4-yangyingliang@huawei.com Signed-off-by: Mark Brown Stable-dep-of: 0064db9ce4aa ("spi: axi-spi-engine: fix version format string") Signed-off-by: Sasha Levin --- drivers/spi/spi-axi-spi-engine.c | 58 ++++++++++++++++---------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index c5a3a31891642..e10c70cb87c97 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -356,8 +356,8 @@ static bool spi_engine_read_rx_fifo(struct spi_engine *spi_engine) static irqreturn_t spi_engine_irq(int irq, void *devid) { - struct spi_master *master = devid; - struct spi_engine *spi_engine = spi_master_get_devdata(master); + struct spi_controller *host = devid; + struct spi_engine *spi_engine = spi_controller_get_devdata(host); unsigned int disable_int = 0; unsigned int pending; @@ -396,7 +396,7 @@ static irqreturn_t spi_engine_irq(int irq, void *devid) msg->status = 0; msg->actual_length = msg->frame_length; spi_engine->msg = NULL; - spi_finalize_current_message(master); + spi_finalize_current_message(host); disable_int |= SPI_ENGINE_INT_SYNC; } } @@ -412,11 +412,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid) return IRQ_HANDLED; } -static int spi_engine_transfer_one_message(struct spi_master *master, +static int spi_engine_transfer_one_message(struct spi_controller *host, struct spi_message *msg) { struct spi_engine_program p_dry, *p; - struct spi_engine *spi_engine = spi_master_get_devdata(master); + struct spi_engine *spi_engine = spi_controller_get_devdata(host); unsigned int int_enable = 0; unsigned long flags; size_t size; @@ -464,7 +464,7 @@ static int spi_engine_transfer_one_message(struct spi_master *master, static int spi_engine_probe(struct platform_device *pdev) { struct spi_engine *spi_engine; - struct spi_master *master; + struct spi_controller *host; unsigned int version; int irq; int ret; @@ -477,29 +477,29 @@ static int spi_engine_probe(struct platform_device *pdev) if (!spi_engine) return -ENOMEM; - master = spi_alloc_master(&pdev->dev, 0); - if (!master) + host = spi_alloc_host(&pdev->dev, 0); + if (!host) return -ENOMEM; - spi_master_set_devdata(master, spi_engine); + spi_controller_set_devdata(host, spi_engine); spin_lock_init(&spi_engine->lock); spi_engine->clk = devm_clk_get(&pdev->dev, "s_axi_aclk"); if (IS_ERR(spi_engine->clk)) { ret = PTR_ERR(spi_engine->clk); - goto err_put_master; + goto err_put_host; } spi_engine->ref_clk = devm_clk_get(&pdev->dev, "spi_clk"); if (IS_ERR(spi_engine->ref_clk)) { ret = PTR_ERR(spi_engine->ref_clk); - goto err_put_master; + goto err_put_host; } ret = clk_prepare_enable(spi_engine->clk); if (ret) - goto err_put_master; + goto err_put_host; ret = clk_prepare_enable(spi_engine->ref_clk); if (ret) @@ -525,46 +525,46 @@ static int spi_engine_probe(struct platform_device *pdev) writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING); writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE); - ret = request_irq(irq, spi_engine_irq, 0, pdev->name, master); + ret = request_irq(irq, spi_engine_irq, 0, pdev->name, host); if (ret) goto err_ref_clk_disable; - master->dev.of_node = pdev->dev.of_node; - master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_3WIRE; - master->bits_per_word_mask = SPI_BPW_MASK(8); - master->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2; - master->transfer_one_message = spi_engine_transfer_one_message; - master->num_chipselect = 8; + host->dev.of_node = pdev->dev.of_node; + host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_3WIRE; + host->bits_per_word_mask = SPI_BPW_MASK(8); + host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2; + host->transfer_one_message = spi_engine_transfer_one_message; + host->num_chipselect = 8; - ret = spi_register_master(master); + ret = spi_register_controller(host); if (ret) goto err_free_irq; - platform_set_drvdata(pdev, master); + platform_set_drvdata(pdev, host); return 0; err_free_irq: - free_irq(irq, master); + free_irq(irq, host); err_ref_clk_disable: clk_disable_unprepare(spi_engine->ref_clk); err_clk_disable: clk_disable_unprepare(spi_engine->clk); -err_put_master: - spi_master_put(master); +err_put_host: + spi_controller_put(host); return ret; } static void spi_engine_remove(struct platform_device *pdev) { - struct spi_master *master = spi_master_get(platform_get_drvdata(pdev)); - struct spi_engine *spi_engine = spi_master_get_devdata(master); + struct spi_controller *host = spi_controller_get(platform_get_drvdata(pdev)); + struct spi_engine *spi_engine = spi_controller_get_devdata(host); int irq = platform_get_irq(pdev, 0); - spi_unregister_master(master); + spi_unregister_controller(host); - free_irq(irq, master); + free_irq(irq, host); - spi_master_put(master); + spi_controller_put(host); writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING); writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE); -- GitLab From aec8b34ec09044d9b225240fc4f9ef41fea14e93 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Wed, 23 Aug 2023 21:39:18 +0800 Subject: [PATCH 2046/2290] spi: spi-axi-spi-engine: Use helper function devm_clk_get_enabled() [ Upstream commit a08199b309f833fd4221ab5ee2391c791fe26385 ] Since commit 7ef9651e9792 ("clk: Provide new devm_clk helpers for prepared and enabled clocks"), devm_clk_get() and clk_prepare_enable() can now be replaced by devm_clk_get_enabled() when driver enables (and possibly prepares) the clocks for the whole lifetime of the device. Moreover, it is no longer necessary to unprepare and disable the clocks explicitly. Reviewed-by: Jonathan Cameron Signed-off-by: Li Zetao Link: https://lore.kernel.org/r/20230823133938.1359106-6-lizetao1@huawei.com Signed-off-by: Mark Brown Stable-dep-of: 0064db9ce4aa ("spi: axi-spi-engine: fix version format string") Signed-off-by: Sasha Levin --- drivers/spi/spi-axi-spi-engine.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index e10c70cb87c97..861578aa6ea12 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -485,30 +485,22 @@ static int spi_engine_probe(struct platform_device *pdev) spin_lock_init(&spi_engine->lock); - spi_engine->clk = devm_clk_get(&pdev->dev, "s_axi_aclk"); + spi_engine->clk = devm_clk_get_enabled(&pdev->dev, "s_axi_aclk"); if (IS_ERR(spi_engine->clk)) { ret = PTR_ERR(spi_engine->clk); goto err_put_host; } - spi_engine->ref_clk = devm_clk_get(&pdev->dev, "spi_clk"); + spi_engine->ref_clk = devm_clk_get_enabled(&pdev->dev, "spi_clk"); if (IS_ERR(spi_engine->ref_clk)) { ret = PTR_ERR(spi_engine->ref_clk); goto err_put_host; } - ret = clk_prepare_enable(spi_engine->clk); - if (ret) - goto err_put_host; - - ret = clk_prepare_enable(spi_engine->ref_clk); - if (ret) - goto err_clk_disable; - spi_engine->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(spi_engine->base)) { ret = PTR_ERR(spi_engine->base); - goto err_ref_clk_disable; + goto err_put_host; } version = readl(spi_engine->base + SPI_ENGINE_REG_VERSION); @@ -518,7 +510,7 @@ static int spi_engine_probe(struct platform_device *pdev) SPI_ENGINE_VERSION_MINOR(version), SPI_ENGINE_VERSION_PATCH(version)); ret = -ENODEV; - goto err_ref_clk_disable; + goto err_put_host; } writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_RESET); @@ -527,7 +519,7 @@ static int spi_engine_probe(struct platform_device *pdev) ret = request_irq(irq, spi_engine_irq, 0, pdev->name, host); if (ret) - goto err_ref_clk_disable; + goto err_put_host; host->dev.of_node = pdev->dev.of_node; host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_3WIRE; @@ -545,10 +537,6 @@ static int spi_engine_probe(struct platform_device *pdev) return 0; err_free_irq: free_irq(irq, host); -err_ref_clk_disable: - clk_disable_unprepare(spi_engine->ref_clk); -err_clk_disable: - clk_disable_unprepare(spi_engine->clk); err_put_host: spi_controller_put(host); return ret; @@ -569,9 +557,6 @@ static void spi_engine_remove(struct platform_device *pdev) writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING); writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE); writel_relaxed(0x01, spi_engine->base + SPI_ENGINE_REG_RESET); - - clk_disable_unprepare(spi_engine->ref_clk); - clk_disable_unprepare(spi_engine->clk); } static const struct of_device_id spi_engine_match_table[] = { -- GitLab From be632e909c057d2f6c3c6ec9ca7b968b2dff7bed Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 17 Nov 2023 14:12:54 -0600 Subject: [PATCH 2047/2290] spi: axi-spi-engine: simplify driver data allocation [ Upstream commit 9e4ce5220eedea2cc440f3961dec1b5122e815b2 ] This simplifies the private data allocation in the AXI SPI Engine driver by making use of the feature built into the spi_alloc_host() function instead of doing it manually. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20231117-axi-spi-engine-series-1-v1-3-cc59db999b87@baylibre.com Signed-off-by: Mark Brown Stable-dep-of: 0064db9ce4aa ("spi: axi-spi-engine: fix version format string") Signed-off-by: Sasha Levin --- drivers/spi/spi-axi-spi-engine.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index 861578aa6ea12..492882213bb2f 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -473,15 +473,11 @@ static int spi_engine_probe(struct platform_device *pdev) if (irq <= 0) return -ENXIO; - spi_engine = devm_kzalloc(&pdev->dev, sizeof(*spi_engine), GFP_KERNEL); - if (!spi_engine) - return -ENOMEM; - - host = spi_alloc_host(&pdev->dev, 0); + host = spi_alloc_host(&pdev->dev, sizeof(*spi_engine)); if (!host) return -ENOMEM; - spi_controller_set_devdata(host, spi_engine); + spi_engine = spi_controller_get_devdata(host); spin_lock_init(&spi_engine->lock); -- GitLab From 657f21171357a62e485cde64db3ef2e221067a18 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 17 Nov 2023 14:12:55 -0600 Subject: [PATCH 2048/2290] spi: axi-spi-engine: use devm_spi_alloc_host() [ Upstream commit e12cd96e8e93044646fdf4b2c9a1de62cfa01e7c ] This modifies the AXI SPI Engine driver to use devm_spi_alloc_host() instead of spi_alloc_host() to simplify the code a bit. In addition to simplifying the error paths in the probe function, we can also remove spi_controller_get/put() calls in the remove function since devm_spi_alloc_host() sets a flag to no longer decrement the controller reference count in the spi_unregister_controller() function. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20231117-axi-spi-engine-series-1-v1-4-cc59db999b87@baylibre.com Signed-off-by: Mark Brown Stable-dep-of: 0064db9ce4aa ("spi: axi-spi-engine: fix version format string") Signed-off-by: Sasha Levin --- drivers/spi/spi-axi-spi-engine.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index 492882213bb2f..69c4ff142baae 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -473,7 +473,7 @@ static int spi_engine_probe(struct platform_device *pdev) if (irq <= 0) return -ENXIO; - host = spi_alloc_host(&pdev->dev, sizeof(*spi_engine)); + host = devm_spi_alloc_host(&pdev->dev, sizeof(*spi_engine)); if (!host) return -ENOMEM; @@ -482,22 +482,16 @@ static int spi_engine_probe(struct platform_device *pdev) spin_lock_init(&spi_engine->lock); spi_engine->clk = devm_clk_get_enabled(&pdev->dev, "s_axi_aclk"); - if (IS_ERR(spi_engine->clk)) { - ret = PTR_ERR(spi_engine->clk); - goto err_put_host; - } + if (IS_ERR(spi_engine->clk)) + return PTR_ERR(spi_engine->clk); spi_engine->ref_clk = devm_clk_get_enabled(&pdev->dev, "spi_clk"); - if (IS_ERR(spi_engine->ref_clk)) { - ret = PTR_ERR(spi_engine->ref_clk); - goto err_put_host; - } + if (IS_ERR(spi_engine->ref_clk)) + return PTR_ERR(spi_engine->ref_clk); spi_engine->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(spi_engine->base)) { - ret = PTR_ERR(spi_engine->base); - goto err_put_host; - } + if (IS_ERR(spi_engine->base)) + return PTR_ERR(spi_engine->base); version = readl(spi_engine->base + SPI_ENGINE_REG_VERSION); if (SPI_ENGINE_VERSION_MAJOR(version) != 1) { @@ -505,8 +499,7 @@ static int spi_engine_probe(struct platform_device *pdev) SPI_ENGINE_VERSION_MAJOR(version), SPI_ENGINE_VERSION_MINOR(version), SPI_ENGINE_VERSION_PATCH(version)); - ret = -ENODEV; - goto err_put_host; + return -ENODEV; } writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_RESET); @@ -515,7 +508,7 @@ static int spi_engine_probe(struct platform_device *pdev) ret = request_irq(irq, spi_engine_irq, 0, pdev->name, host); if (ret) - goto err_put_host; + return ret; host->dev.of_node = pdev->dev.of_node; host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_3WIRE; @@ -533,14 +526,12 @@ static int spi_engine_probe(struct platform_device *pdev) return 0; err_free_irq: free_irq(irq, host); -err_put_host: - spi_controller_put(host); return ret; } static void spi_engine_remove(struct platform_device *pdev) { - struct spi_controller *host = spi_controller_get(platform_get_drvdata(pdev)); + struct spi_controller *host = platform_get_drvdata(pdev); struct spi_engine *spi_engine = spi_controller_get_devdata(host); int irq = platform_get_irq(pdev, 0); @@ -548,8 +539,6 @@ static void spi_engine_remove(struct platform_device *pdev) free_irq(irq, host); - spi_controller_put(host); - writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING); writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE); writel_relaxed(0x01, spi_engine->base + SPI_ENGINE_REG_RESET); -- GitLab From cde20c41508f97515cc80ddd376b2adf860a9d5b Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 17 Nov 2023 14:13:00 -0600 Subject: [PATCH 2049/2290] spi: axi-spi-engine: move msg state to new struct [ Upstream commit 7f970ecb77b6759d37ee743fc36fc0daba960e75 ] This moves the message state in the AXI SPI Engine driver to a new struct spi_engine_msg_state. Previously, the driver state contained various pointers that pointed to memory owned by a struct spi_message. However, it did not set any of these pointers to NULL when a message was completed. This could lead to use after free bugs. Example of how this could happen: 1. SPI core calls into spi_engine_transfer_one_message() with msg1. 2. Assume something was misconfigured and spi_engine_tx_next() is not called enough times in interrupt callbacks for msg1 such that spi_engine->tx_xfer is never set to NULL before the msg1 completes. 3. SYNC interrupt is received and spi_finalize_current_message() is called for msg1. spi_engine->msg is set to NULL but no other message-specific state is reset. 4. Caller that sent msg1 is notified of the completion and frees msg1 and the associated xfers and tx/rx buffers. 4. SPI core calls into spi_engine_transfer_one_message() with msg2. 5. When spi_engine_tx_next() is called for msg2, spi_engine->tx_xfer is still be pointing to an xfer from msg1, which was already freed. spi_engine_xfer_next() tries to access xfer->transfer_list of one of the freed xfers and we get a segfault or undefined behavior. To avoid issues like this, instead of putting per-message state in the driver state struct, we can make use of the struct spi_message::state field to store a pointer to a new struct spi_engine_msg_state. This way, all of the state that belongs to specific message stays with that message and we don't have to remember to manually reset all aspects of the message state when a message is completed. Rather, a new state is allocated for each message. Most of the changes are just renames where the state is accessed. One place where this wasn't straightforward was the sync_id member. This has been changed to use ida_alloc_range() since we needed to separate the per-message sync_id from the per-controller next available sync_id. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20231117-axi-spi-engine-series-1-v1-9-cc59db999b87@baylibre.com Signed-off-by: Mark Brown Stable-dep-of: 0064db9ce4aa ("spi: axi-spi-engine: fix version format string") Signed-off-by: Sasha Levin --- drivers/spi/spi-axi-spi-engine.c | 150 ++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 54 deletions(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index 69c4ff142baae..b75c1272de5f3 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -78,28 +79,42 @@ struct spi_engine_program { uint16_t instructions[]; }; -struct spi_engine { - struct clk *clk; - struct clk *ref_clk; - - spinlock_t lock; - - void __iomem *base; - - struct spi_message *msg; +/** + * struct spi_engine_message_state - SPI engine per-message state + */ +struct spi_engine_message_state { + /** Instructions for executing this message. */ struct spi_engine_program *p; + /** Number of elements in cmd_buf array. */ unsigned cmd_length; + /** Array of commands not yet written to CMD FIFO. */ const uint16_t *cmd_buf; - + /** Next xfer with tx_buf not yet fully written to TX FIFO. */ struct spi_transfer *tx_xfer; + /** Size of tx_buf in bytes. */ unsigned int tx_length; + /** Bytes not yet written to TX FIFO. */ const uint8_t *tx_buf; - + /** Next xfer with rx_buf not yet fully written to RX FIFO. */ struct spi_transfer *rx_xfer; + /** Size of tx_buf in bytes. */ unsigned int rx_length; + /** Bytes not yet written to the RX FIFO. */ uint8_t *rx_buf; + /** ID to correlate SYNC interrupts with this message. */ + u8 sync_id; +}; + +struct spi_engine { + struct clk *clk; + struct clk *ref_clk; - unsigned int sync_id; + spinlock_t lock; + + void __iomem *base; + + struct spi_message *msg; + struct ida sync_ida; unsigned int completed_id; unsigned int int_enable; @@ -258,100 +273,105 @@ static void spi_engine_xfer_next(struct spi_engine *spi_engine, static void spi_engine_tx_next(struct spi_engine *spi_engine) { - struct spi_transfer *xfer = spi_engine->tx_xfer; + struct spi_engine_message_state *st = spi_engine->msg->state; + struct spi_transfer *xfer = st->tx_xfer; do { spi_engine_xfer_next(spi_engine, &xfer); } while (xfer && !xfer->tx_buf); - spi_engine->tx_xfer = xfer; + st->tx_xfer = xfer; if (xfer) { - spi_engine->tx_length = xfer->len; - spi_engine->tx_buf = xfer->tx_buf; + st->tx_length = xfer->len; + st->tx_buf = xfer->tx_buf; } else { - spi_engine->tx_buf = NULL; + st->tx_buf = NULL; } } static void spi_engine_rx_next(struct spi_engine *spi_engine) { - struct spi_transfer *xfer = spi_engine->rx_xfer; + struct spi_engine_message_state *st = spi_engine->msg->state; + struct spi_transfer *xfer = st->rx_xfer; do { spi_engine_xfer_next(spi_engine, &xfer); } while (xfer && !xfer->rx_buf); - spi_engine->rx_xfer = xfer; + st->rx_xfer = xfer; if (xfer) { - spi_engine->rx_length = xfer->len; - spi_engine->rx_buf = xfer->rx_buf; + st->rx_length = xfer->len; + st->rx_buf = xfer->rx_buf; } else { - spi_engine->rx_buf = NULL; + st->rx_buf = NULL; } } static bool spi_engine_write_cmd_fifo(struct spi_engine *spi_engine) { void __iomem *addr = spi_engine->base + SPI_ENGINE_REG_CMD_FIFO; + struct spi_engine_message_state *st = spi_engine->msg->state; unsigned int n, m, i; const uint16_t *buf; n = readl_relaxed(spi_engine->base + SPI_ENGINE_REG_CMD_FIFO_ROOM); - while (n && spi_engine->cmd_length) { - m = min(n, spi_engine->cmd_length); - buf = spi_engine->cmd_buf; + while (n && st->cmd_length) { + m = min(n, st->cmd_length); + buf = st->cmd_buf; for (i = 0; i < m; i++) writel_relaxed(buf[i], addr); - spi_engine->cmd_buf += m; - spi_engine->cmd_length -= m; + st->cmd_buf += m; + st->cmd_length -= m; n -= m; } - return spi_engine->cmd_length != 0; + return st->cmd_length != 0; } static bool spi_engine_write_tx_fifo(struct spi_engine *spi_engine) { void __iomem *addr = spi_engine->base + SPI_ENGINE_REG_SDO_DATA_FIFO; + struct spi_engine_message_state *st = spi_engine->msg->state; unsigned int n, m, i; const uint8_t *buf; n = readl_relaxed(spi_engine->base + SPI_ENGINE_REG_SDO_FIFO_ROOM); - while (n && spi_engine->tx_length) { - m = min(n, spi_engine->tx_length); - buf = spi_engine->tx_buf; + while (n && st->tx_length) { + m = min(n, st->tx_length); + buf = st->tx_buf; for (i = 0; i < m; i++) writel_relaxed(buf[i], addr); - spi_engine->tx_buf += m; - spi_engine->tx_length -= m; + st->tx_buf += m; + st->tx_length -= m; n -= m; - if (spi_engine->tx_length == 0) + if (st->tx_length == 0) spi_engine_tx_next(spi_engine); } - return spi_engine->tx_length != 0; + return st->tx_length != 0; } static bool spi_engine_read_rx_fifo(struct spi_engine *spi_engine) { void __iomem *addr = spi_engine->base + SPI_ENGINE_REG_SDI_DATA_FIFO; + struct spi_engine_message_state *st = spi_engine->msg->state; unsigned int n, m, i; uint8_t *buf; n = readl_relaxed(spi_engine->base + SPI_ENGINE_REG_SDI_FIFO_LEVEL); - while (n && spi_engine->rx_length) { - m = min(n, spi_engine->rx_length); - buf = spi_engine->rx_buf; + while (n && st->rx_length) { + m = min(n, st->rx_length); + buf = st->rx_buf; for (i = 0; i < m; i++) buf[i] = readl_relaxed(addr); - spi_engine->rx_buf += m; - spi_engine->rx_length -= m; + st->rx_buf += m; + st->rx_length -= m; n -= m; - if (spi_engine->rx_length == 0) + if (st->rx_length == 0) spi_engine_rx_next(spi_engine); } - return spi_engine->rx_length != 0; + return st->rx_length != 0; } static irqreturn_t spi_engine_irq(int irq, void *devid) @@ -387,12 +407,16 @@ static irqreturn_t spi_engine_irq(int irq, void *devid) disable_int |= SPI_ENGINE_INT_SDI_ALMOST_FULL; } - if (pending & SPI_ENGINE_INT_SYNC) { - if (spi_engine->msg && - spi_engine->completed_id == spi_engine->sync_id) { + if (pending & SPI_ENGINE_INT_SYNC && spi_engine->msg) { + struct spi_engine_message_state *st = spi_engine->msg->state; + + if (spi_engine->completed_id == st->sync_id) { struct spi_message *msg = spi_engine->msg; + struct spi_engine_message_state *st = msg->state; - kfree(spi_engine->p); + ida_free(&spi_engine->sync_ida, st->sync_id); + kfree(st->p); + kfree(st); msg->status = 0; msg->actual_length = msg->frame_length; spi_engine->msg = NULL; @@ -417,29 +441,46 @@ static int spi_engine_transfer_one_message(struct spi_controller *host, { struct spi_engine_program p_dry, *p; struct spi_engine *spi_engine = spi_controller_get_devdata(host); + struct spi_engine_message_state *st; unsigned int int_enable = 0; unsigned long flags; size_t size; + int ret; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; p_dry.length = 0; spi_engine_compile_message(spi_engine, msg, true, &p_dry); size = sizeof(*p->instructions) * (p_dry.length + 1); p = kzalloc(sizeof(*p) + size, GFP_KERNEL); - if (!p) + if (!p) { + kfree(st); return -ENOMEM; + } + + ret = ida_alloc_range(&spi_engine->sync_ida, 0, U8_MAX, GFP_KERNEL); + if (ret < 0) { + kfree(p); + kfree(st); + return ret; + } + + st->sync_id = ret; + spi_engine_compile_message(spi_engine, msg, false, p); spin_lock_irqsave(&spi_engine->lock, flags); - spi_engine->sync_id = (spi_engine->sync_id + 1) & 0xff; - spi_engine_program_add_cmd(p, false, - SPI_ENGINE_CMD_SYNC(spi_engine->sync_id)); + spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC(st->sync_id)); + msg->state = st; spi_engine->msg = msg; - spi_engine->p = p; + st->p = p; - spi_engine->cmd_buf = p->instructions; - spi_engine->cmd_length = p->length; + st->cmd_buf = p->instructions; + st->cmd_length = p->length; if (spi_engine_write_cmd_fifo(spi_engine)) int_enable |= SPI_ENGINE_INT_CMD_ALMOST_EMPTY; @@ -448,7 +489,7 @@ static int spi_engine_transfer_one_message(struct spi_controller *host, int_enable |= SPI_ENGINE_INT_SDO_ALMOST_EMPTY; spi_engine_rx_next(spi_engine); - if (spi_engine->rx_length != 0) + if (st->rx_length != 0) int_enable |= SPI_ENGINE_INT_SDI_ALMOST_FULL; int_enable |= SPI_ENGINE_INT_SYNC; @@ -480,6 +521,7 @@ static int spi_engine_probe(struct platform_device *pdev) spi_engine = spi_controller_get_devdata(host); spin_lock_init(&spi_engine->lock); + ida_init(&spi_engine->sync_ida); spi_engine->clk = devm_clk_get_enabled(&pdev->dev, "s_axi_aclk"); if (IS_ERR(spi_engine->clk)) -- GitLab From 0308cf64a17f65a25c6e150af30a91c903a49619 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 2 Feb 2024 15:31:32 -0600 Subject: [PATCH 2050/2290] spi: axi-spi-engine: use common AXI macros [ Upstream commit 88c2b56c2690061121cad03f0f551db465287575 ] This avoid duplicating the same macros in multiple drivers by reusing the common AXI macros for the version register. Signed-off-by: David Lechner Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240202213132.3863124-2-dlechner@baylibre.com Signed-off-by: Mark Brown Stable-dep-of: 0064db9ce4aa ("spi: axi-spi-engine: fix version format string") Signed-off-by: Sasha Levin --- drivers/spi/spi-axi-spi-engine.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index b75c1272de5f3..719e4f7445361 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -14,12 +15,6 @@ #include #include -#define SPI_ENGINE_VERSION_MAJOR(x) ((x >> 16) & 0xff) -#define SPI_ENGINE_VERSION_MINOR(x) ((x >> 8) & 0xff) -#define SPI_ENGINE_VERSION_PATCH(x) (x & 0xff) - -#define SPI_ENGINE_REG_VERSION 0x00 - #define SPI_ENGINE_REG_RESET 0x40 #define SPI_ENGINE_REG_INT_ENABLE 0x80 @@ -535,12 +530,12 @@ static int spi_engine_probe(struct platform_device *pdev) if (IS_ERR(spi_engine->base)) return PTR_ERR(spi_engine->base); - version = readl(spi_engine->base + SPI_ENGINE_REG_VERSION); - if (SPI_ENGINE_VERSION_MAJOR(version) != 1) { + version = readl(spi_engine->base + ADI_AXI_REG_VERSION); + if (ADI_AXI_PCORE_VER_MAJOR(version) != 1) { dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%c\n", - SPI_ENGINE_VERSION_MAJOR(version), - SPI_ENGINE_VERSION_MINOR(version), - SPI_ENGINE_VERSION_PATCH(version)); + ADI_AXI_PCORE_VER_MAJOR(version), + ADI_AXI_PCORE_VER_MINOR(version), + ADI_AXI_PCORE_VER_PATCH(version)); return -ENODEV; } -- GitLab From 5d6e336b9ed0b375a4ab2c5c69b47d4944c987cc Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 12 Apr 2024 17:52:48 -0500 Subject: [PATCH 2051/2290] spi: axi-spi-engine: fix version format string [ Upstream commit 0064db9ce4aa7cc794e6f4aed60dee0f94fc9bcf ] The version format string in the AXI SPI Engine driver was probably intended to print the version number in the same format as the DT compatible string (e.g. 1.00.a). However, the version just uses semantic versioning so formatting the patch number as a character is not correct and would result in printing control characters for patch numbers less than 32. Fixes: b1353d1c1d45 ("spi: Add Analog Devices AXI SPI Engine controller support") Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20240412-axi-spi-engine-version-printf-v1-1-95e1e842c1a6@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-axi-spi-engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index 719e4f7445361..a5f0a61b266f1 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -532,7 +532,7 @@ static int spi_engine_probe(struct platform_device *pdev) version = readl(spi_engine->base + ADI_AXI_REG_VERSION); if (ADI_AXI_PCORE_VER_MAJOR(version) != 1) { - dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%c\n", + dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%u\n", ADI_AXI_PCORE_VER_MAJOR(version), ADI_AXI_PCORE_VER_MINOR(version), ADI_AXI_PCORE_VER_PATCH(version)); -- GitLab From d97e7ab8c80b71d7e41c62228554027bee888a0b Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 16 Apr 2024 09:58:39 +0800 Subject: [PATCH 2052/2290] spi: hisi-kunpeng: Delete the dump interface of data registers in debugfs [ Upstream commit 7430764f5a85d30314aeef2d5438dff1fb0b1d68 ] Due to the reading of FIFO during the dump of data registers in debugfs, if SPI transmission is in progress, it will be affected and may result in transmission failure. Therefore, the dump interface of data registers in debugfs is removed. Fixes: 2b2142f247eb ("spi: hisi-kunpeng: Add debugfs support") Signed-off-by: Devyn Liu Reviewed-by: Jay Fang Link: https://lore.kernel.org/r/20240416015839.3323398-1-liudingyuan@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-hisi-kunpeng.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 525cc0143a305..54730e93fba45 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -151,8 +151,6 @@ static const struct debugfs_reg32 hisi_spi_regs[] = { HISI_SPI_DBGFS_REG("ENR", HISI_SPI_ENR), HISI_SPI_DBGFS_REG("FIFOC", HISI_SPI_FIFOC), HISI_SPI_DBGFS_REG("IMR", HISI_SPI_IMR), - HISI_SPI_DBGFS_REG("DIN", HISI_SPI_DIN), - HISI_SPI_DBGFS_REG("DOUT", HISI_SPI_DOUT), HISI_SPI_DBGFS_REG("SR", HISI_SPI_SR), HISI_SPI_DBGFS_REG("RISR", HISI_SPI_RISR), HISI_SPI_DBGFS_REG("ISR", HISI_SPI_ISR), -- GitLab From 67a8dbe10bb9b1e38542305f1a606754a89ad263 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 16 Apr 2024 14:42:07 +0800 Subject: [PATCH 2053/2290] bpf, arm64: Fix incorrect runtime stats [ Upstream commit dc7d7447b56bcc9cf79a9c22e4edad200a298e4c ] When __bpf_prog_enter() returns zero, the arm64 register x20 that stores prog start time is not assigned to zero, causing incorrect runtime stats. To fix it, assign the return value of bpf_prog_enter() to x20 register immediately upon its return. Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Reported-by: Ivan Babrou Signed-off-by: Xu Kuohai Signed-off-by: Daniel Borkmann Tested-by: Ivan Babrou Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240416064208.2919073-2-xukuohai@huaweicloud.com Signed-off-by: Sasha Levin --- arch/arm64/net/bpf_jit_comp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 0ce5f13eabb1b..afb79209d4132 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1679,15 +1679,15 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, emit_call(enter_prog, ctx); + /* save return value to callee saved register x20 */ + emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ branch = ctx->image + ctx->idx; emit(A64_NOP, ctx); - /* save return value to callee saved register x20 */ - emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); - emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); if (!p->jited) emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); -- GitLab From 5dbc158805d9d96dc3f539e40887b4105b8476d6 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 16 Apr 2024 13:42:19 +0200 Subject: [PATCH 2054/2290] s390/mm: Fix storage key clearing for guest huge pages [ Upstream commit 843c3280686fc1a83d89ee1e0b5599c9f6b09d0c ] The function __storage_key_init_range() expects the end address to be the first byte outside the range to be initialized. I.e. end - start should be the size of the area to be initialized. The current code works because __storage_key_init_range() will still loop over every page in the range, but it is slower than using sske_frame(). Fixes: 964c2c05c9f3 ("s390/mm: Clear huge page storage keys on enable_skey") Reviewed-by: Heiko Carstens Signed-off-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20240416114220.28489-2-imbrenda@linux.ibm.com Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- arch/s390/mm/gmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 662cf23a1b44b..59657e0363e7c 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2642,7 +2642,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, return 0; start = pmd_val(*pmd) & HPAGE_MASK; - end = start + HPAGE_SIZE - 1; + end = start + HPAGE_SIZE; __storage_key_init_range(start, end); set_bit(PG_arch_1, &page->flags); cond_resched(); -- GitLab From a4b30f548aca6378b3346fa65ca363a81b37fd85 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 16 Apr 2024 13:42:20 +0200 Subject: [PATCH 2055/2290] s390/mm: Fix clearing storage keys for huge pages [ Upstream commit 412050af2ea39407fe43324b0be4ab641530ce88 ] The function __storage_key_init_range() expects the end address to be the first byte outside the range to be initialized. I.e. end - start should be the size of the area to be initialized. The current code works because __storage_key_init_range() will still loop over every page in the range, but it is slower than using sske_frame(). Fixes: 3afdfca69870 ("s390/mm: Clear skeys for newly mapped huge guest pmds") Reviewed-by: Heiko Carstens Signed-off-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20240416114220.28489-3-imbrenda@linux.ibm.com Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- arch/s390/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c299a18273ffe..33ef6790114ab 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -139,7 +139,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) } if (!test_and_set_bit(PG_arch_1, &page->flags)) - __storage_key_init_range(paddr, paddr + size - 1); + __storage_key_init_range(paddr, paddr + size); } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, -- GitLab From 272bfb019f3cc018f654b992115774e77b4f3ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 18 Apr 2024 09:18:39 +0200 Subject: [PATCH 2056/2290] xdp: use flags field to disambiguate broadcast redirect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5bcf0dcbf9066348058b88a510c57f70f384c92c ] When redirecting a packet using XDP, the bpf_redirect_map() helper will set up the redirect destination information in struct bpf_redirect_info (using the __bpf_xdp_redirect_map() helper function), and the xdp_do_redirect() function will read this information after the XDP program returns and pass the frame on to the right redirect destination. When using the BPF_F_BROADCAST flag to do multicast redirect to a whole map, __bpf_xdp_redirect_map() sets the 'map' pointer in struct bpf_redirect_info to point to the destination map to be broadcast. And xdp_do_redirect() reacts to the value of this map pointer to decide whether it's dealing with a broadcast or a single-value redirect. However, if the destination map is being destroyed before xdp_do_redirect() is called, the map pointer will be cleared out (by bpf_clear_redirect_map()) without waiting for any XDP programs to stop running. This causes xdp_do_redirect() to think that the redirect was to a single target, but the target pointer is also NULL (since broadcast redirects don't have a single target), so this causes a crash when a NULL pointer is passed to dev_map_enqueue(). To fix this, change xdp_do_redirect() to react directly to the presence of the BPF_F_BROADCAST flag in the 'flags' value in struct bpf_redirect_info to disambiguate between a single-target and a broadcast redirect. And only read the 'map' pointer if the broadcast flag is set, aborting if that has been cleared out in the meantime. This prevents the crash, while keeping the atomic (cmpxchg-based) clearing of the map pointer itself, and without adding any more checks in the non-broadcast fast path. Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support") Reported-and-tested-by: syzbot+af9492708df9797198d6@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Acked-by: Stanislav Fomichev Reviewed-by: Hangbin Liu Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20240418071840.156411-1-toke@redhat.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- net/core/filter.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index cb7c4651eaec8..1d8b271ef8cc2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4244,10 +4244,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; struct bpf_map *map; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (unlikely(!xdpf)) { @@ -4259,11 +4261,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_enqueue_multi(xdpf, dev, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_enqueue(fwd, xdpf, dev); } @@ -4334,9 +4345,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, - void *fwd, - enum bpf_map_type map_type, u32 map_id) + struct bpf_prog *xdp_prog, void *fwd, + enum bpf_map_type map_type, u32 map_id, + u32 flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct bpf_map *map; @@ -4346,11 +4357,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_redirect_multi(dev, skb, xdp_prog, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_generic_redirect(fwd, skb, xdp_prog); } @@ -4387,9 +4407,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { @@ -4409,7 +4431,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, return 0; } - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags); err: _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); return err; -- GitLab From 06cb37e2ba6441888f24566a997481d4197b4e32 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 24 Apr 2024 21:44:19 +0700 Subject: [PATCH 2057/2290] bna: ensure the copied buf is NUL terminated [ Upstream commit 8c34096c7fdf272fd4c0c37fe411cd2e3ed0ee9f ] Currently, we allocate a nbytes-sized kernel buffer and copy nbytes from userspace to that buffer. Later, we use sscanf on this buffer but we don't ensure that the string is terminated inside the buffer, this can lead to OOB read when using sscanf. Fix this issue by using memdup_user_nul instead of memdup_user. Fixes: 7afc5dbde091 ("bna: Add debugfs interface.") Signed-off-by: Bui Quang Minh Link: https://lore.kernel.org/r/20240424-fix-oob-read-v2-2-f1f1b53a10f4@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/brocade/bna/bnad_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 04ad0f2b9677e..777f0d7e48192 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -312,7 +312,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf, void *kern_buf; /* Copy the user space buf */ - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); @@ -372,7 +372,7 @@ bnad_debugfs_write_regwr(struct file *file, const char __user *buf, void *kern_buf; /* Copy the user space buf */ - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); -- GitLab From 8f11fe3ea3fc261640cfc8a5addd838000407c67 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 24 Apr 2024 21:44:23 +0700 Subject: [PATCH 2058/2290] octeontx2-af: avoid off-by-one read from userspace [ Upstream commit f299ee709fb45036454ca11e90cb2810fe771878 ] We try to access count + 1 byte from userspace with memdup_user(buffer, count + 1). However, the userspace only provides buffer of count bytes and only these count bytes are verified to be okay to access. To ensure the copied buffer is NUL terminated, we use memdup_user_nul instead. Fixes: 3a2eb515d136 ("octeontx2-af: Fix an off by one in rvu_dbg_qsize_write()") Signed-off-by: Bui Quang Minh Link: https://lore.kernel.org/r/20240424-fix-oob-read-v2-6-f1f1b53a10f4@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index cc5d342e026c7..a3c1d82032f55 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -999,12 +999,10 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, u16 pcifunc; int ret, lf; - cmd_buf = memdup_user(buffer, count + 1); + cmd_buf = memdup_user_nul(buffer, count); if (IS_ERR(cmd_buf)) return -ENOMEM; - cmd_buf[count] = '\0'; - cmd_buf_tmp = strchr(cmd_buf, '\n'); if (cmd_buf_tmp) { *cmd_buf_tmp = '\0'; -- GitLab From 37ed6f244ec5bda2e90b085084e322ea55d0aaa2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Apr 2024 19:35:49 -0700 Subject: [PATCH 2059/2290] nsh: Restore skb->{protocol,data,mac_header} for outer header in nsh_gso_segment(). [ Upstream commit 4b911a9690d72641879ea6d13cce1de31d346d79 ] syzbot triggered various splats (see [0] and links) by a crafted GSO packet of VIRTIO_NET_HDR_GSO_UDP layering the following protocols: ETH_P_8021AD + ETH_P_NSH + ETH_P_IPV6 + IPPROTO_UDP NSH can encapsulate IPv4, IPv6, Ethernet, NSH, and MPLS. As the inner protocol can be Ethernet, NSH GSO handler, nsh_gso_segment(), calls skb_mac_gso_segment() to invoke inner protocol GSO handlers. nsh_gso_segment() does the following for the original skb before calling skb_mac_gso_segment() 1. reset skb->network_header 2. save the original skb->{mac_heaeder,mac_len} in a local variable 3. pull the NSH header 4. resets skb->mac_header 5. set up skb->mac_len and skb->protocol for the inner protocol. and does the following for the segmented skb 6. set ntohs(ETH_P_NSH) to skb->protocol 7. push the NSH header 8. restore skb->mac_header 9. set skb->mac_header + mac_len to skb->network_header 10. restore skb->mac_len There are two problems in 6-7 and 8-9. (a) After 6 & 7, skb->data points to the NSH header, so the outer header (ETH_P_8021AD in this case) is stripped when skb is sent out of netdev. Also, if NSH is encapsulated by NSH + Ethernet (so NSH-Ethernet-NSH), skb_pull() in the first nsh_gso_segment() will make skb->data point to the middle of the outer NSH or Ethernet header because the Ethernet header is not pulled by the second nsh_gso_segment(). (b) While restoring skb->{mac_header,network_header} in 8 & 9, nsh_gso_segment() does not assume that the data in the linear buffer is shifted. However, udp6_ufo_fragment() could shift the data and change skb->mac_header accordingly as demonstrated by syzbot. If this happens, even the restored skb->mac_header points to the middle of the outer header. It seems nsh_gso_segment() has never worked with outer headers so far. At the end of nsh_gso_segment(), the outer header must be restored for the segmented skb, instead of the NSH header. To do that, let's calculate the outer header position relatively from the inner header and set skb->{data,mac_header,protocol} properly. [0]: BUG: KMSAN: uninit-value in ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:524 [inline] BUG: KMSAN: uninit-value in ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] BUG: KMSAN: uninit-value in ipvlan_queue_xmit+0xf44/0x16b0 drivers/net/ipvlan/ipvlan_core.c:668 ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:524 [inline] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] ipvlan_queue_xmit+0xf44/0x16b0 drivers/net/ipvlan/ipvlan_core.c:668 ipvlan_start_xmit+0x5c/0x1a0 drivers/net/ipvlan/ipvlan_main.c:222 __netdev_start_xmit include/linux/netdevice.h:4989 [inline] netdev_start_xmit include/linux/netdevice.h:5003 [inline] xmit_one net/core/dev.c:3547 [inline] dev_hard_start_xmit+0x244/0xa10 net/core/dev.c:3563 __dev_queue_xmit+0x33ed/0x51c0 net/core/dev.c:4351 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook mm/slub.c:3819 [inline] slab_alloc_node mm/slub.c:3860 [inline] __do_kmalloc_node mm/slub.c:3980 [inline] __kmalloc_node_track_caller+0x705/0x1000 mm/slub.c:4001 kmalloc_reserve+0x249/0x4a0 net/core/skbuff.c:582 __alloc_skb+0x352/0x790 net/core/skbuff.c:651 skb_segment+0x20aa/0x7080 net/core/skbuff.c:4647 udp6_ufo_fragment+0xcab/0x1150 net/ipv6/udp_offload.c:109 ipv6_gso_segment+0x14be/0x2ca0 net/ipv6/ip6_offload.c:152 skb_mac_gso_segment+0x3e8/0x760 net/core/gso.c:53 nsh_gso_segment+0x6f4/0xf70 net/nsh/nsh.c:108 skb_mac_gso_segment+0x3e8/0x760 net/core/gso.c:53 __skb_gso_segment+0x4b0/0x730 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x107f/0x1930 net/core/dev.c:3628 __dev_queue_xmit+0x1f28/0x51c0 net/core/dev.c:4343 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 1 PID: 5101 Comm: syz-executor421 Not tainted 6.8.0-rc5-syzkaller-00297-gf2e367d6ad3b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Fixes: c411ed854584 ("nsh: add GSO support") Reported-and-tested-by: syzbot+42a0dc856239de4de60e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=42a0dc856239de4de60e Reported-and-tested-by: syzbot+c298c9f0e46a3c86332b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c298c9f0e46a3c86332b Link: https://lore.kernel.org/netdev/20240415222041.18537-1-kuniyu@amazon.com/ Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240424023549.21862-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/nsh/nsh.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 0f23e5e8e03eb..3e0fc71d95a14 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -76,13 +76,15 @@ EXPORT_SYMBOL_GPL(nsh_pop); static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { + unsigned int outer_hlen, mac_len, nsh_len; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; - unsigned int nsh_len, mac_len; - __be16 proto; + __be16 outer_proto, proto; skb_reset_network_header(skb); + outer_proto = skb->protocol; + outer_hlen = skb_mac_header_len(skb); mac_len = skb->mac_len; if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) @@ -112,10 +114,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, } for (skb = segs; skb; skb = skb->next) { - skb->protocol = htons(ETH_P_NSH); - __skb_push(skb, nsh_len); - skb->mac_header = mac_offset; - skb->network_header = skb->mac_header + mac_len; + skb->protocol = outer_proto; + __skb_push(skb, nsh_len + outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, outer_hlen); skb->mac_len = mac_len; } -- GitLab From 39a055e607cb80688a26b68a53deaeea5c02c6e9 Mon Sep 17 00:00:00 2001 From: David Bauer Date: Wed, 24 Apr 2024 19:11:10 +0200 Subject: [PATCH 2060/2290] net l2tp: drop flow hash on forward [ Upstream commit 42f853b42899d9b445763b55c3c8adc72be0f0e1 ] Drop the flow-hash of the skb when forwarding to the L2TP netdev. This avoids the L2TP qdisc from using the flow-hash from the outer packet, which is identical for every flow within the tunnel. This does not affect every platform but is specific for the ethernet driver. It depends on the platform including L4 information in the flow-hash. One such example is the Mediatek Filogic MT798x family of networking processors. Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support") Acked-by: James Chapman Signed-off-by: David Bauer Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240424171110.13701-1-mail@david-bauer.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/l2tp/l2tp_eth.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index f2ae03c404736..1f41d2f3b8c4e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -136,6 +136,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, /* checksums verified by L2TP */ skb->ip_summed = CHECKSUM_NONE; + /* drop outer flow-hash */ + skb_clear_hash(skb); + skb_dst_drop(skb); nf_reset_ct(skb); -- GitLab From 160e19b95b30e6211a506b2b3ad7327482d6109c Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Tue, 23 Apr 2024 17:35:52 +0200 Subject: [PATCH 2061/2290] s390/vdso: Add CFI for RA register to asm macro vdso_func [ Upstream commit b961ec10b9f9719987470236feb50c967db5a652 ] The return-address (RA) register r14 is specified as volatile in the s390x ELF ABI [1]. Nevertheless proper CFI directives must be provided for an unwinder to restore the return address, if the RA register value is changed from its value at function entry, as it is the case. [1]: s390x ELF ABI, https://github.com/IBM/s390x-abi/releases Fixes: 4bff8cb54502 ("s390: convert to GENERIC_VDSO") Signed-off-by: Jens Remus Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- arch/s390/include/asm/dwarf.h | 1 + arch/s390/kernel/vdso64/vdso_user_wrapper.S | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h index 4f21ae561e4dd..390906b8e386e 100644 --- a/arch/s390/include/asm/dwarf.h +++ b/arch/s390/include/asm/dwarf.h @@ -9,6 +9,7 @@ #define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset #define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset #define CFI_RESTORE .cfi_restore +#define CFI_REL_OFFSET .cfi_rel_offset #ifdef CONFIG_AS_CFI_VAL_OFFSET #define CFI_VAL_OFFSET .cfi_val_offset diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index 97f0c0a669a59..0625381359df4 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -23,8 +23,10 @@ __kernel_\func: CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE) CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD stg %r14,STACK_FRAME_OVERHEAD(%r15) + CFI_REL_OFFSET 14, STACK_FRAME_OVERHEAD brasl %r14,__s390_vdso_\func lg %r14,STACK_FRAME_OVERHEAD(%r15) + CFI_RESTORE 14 aghi %r15,WRAPPER_FRAME_SIZE CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 -- GitLab From 3b588a16ac01f8057ebd8849dfd11e876b9a0d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 26 Apr 2024 09:12:23 +0000 Subject: [PATCH 2062/2290] net: qede: sanitize 'rc' in qede_add_tc_flower_fltr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e25714466abd9d96901b15efddf82c60a38abd86 ] Explicitly set 'rc' (return code), before jumping to the unlock and return path. By not having any code depend on that 'rc' remains at it's initial value of -EINVAL, then we can re-use 'rc' for the return code of function calls in subsequent patches. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: David S. Miller Stable-dep-of: fcee2065a178 ("net: qede: use return from qede_parse_flow_attr() for flower") Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 3010833ddde33..76aa5934e985b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1868,8 +1868,8 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, struct flow_cls_offload *f) { struct qede_arfs_fltr_node *n; - int min_hlen, rc = -EINVAL; struct qede_arfs_tuple t; + int min_hlen, rc; __qede_lock(edev); @@ -1879,8 +1879,10 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse flower attribute and prepare filter */ - if (qede_parse_flow_attr(edev, proto, f->rule, &t)) + if (qede_parse_flow_attr(edev, proto, f->rule, &t)) { + rc = -EINVAL; goto unlock; + } /* Validate profile mode and number of filters */ if ((edev->arfs->filter_count && edev->arfs->mode != t.mode) || @@ -1888,12 +1890,15 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, DP_NOTICE(edev, "Filter configuration invalidated, filter mode=0x%x, configured mode=0x%x, filter count=0x%x\n", t.mode, edev->arfs->mode, edev->arfs->filter_count); + rc = -EINVAL; goto unlock; } /* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) + if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) { + rc = -EINVAL; goto unlock; + } if (qede_flow_find_fltr(edev, &t)) { rc = -EEXIST; -- GitLab From de5f3a63a95ea1d2b39043c7888b3bb7bde7040f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 26 Apr 2024 09:12:24 +0000 Subject: [PATCH 2063/2290] net: qede: use return from qede_parse_flow_attr() for flower MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fcee2065a178f78be6fd516302830378b17dba3d ] In qede_add_tc_flower_fltr(), when calling qede_parse_flow_attr() then the return code was only used for a non-zero check, and then -EINVAL was returned. qede_parse_flow_attr() can currently fail with: * -EINVAL * -EOPNOTSUPP * -EPROTONOSUPPORT This patch changes the code to use the actual return code, not just return -EINVAL. The blaimed commit introduced these functions. Only compile tested. Fixes: 2ce9c93eaca6 ("qede: Ingress tc flower offload (drop action) support.") Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 76aa5934e985b..aedb98713bbf2 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1879,10 +1879,9 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse flower attribute and prepare filter */ - if (qede_parse_flow_attr(edev, proto, f->rule, &t)) { - rc = -EINVAL; + rc = qede_parse_flow_attr(edev, proto, f->rule, &t); + if (rc) goto unlock; - } /* Validate profile mode and number of filters */ if ((edev->arfs->filter_count && edev->arfs->mode != t.mode) || -- GitLab From 3f4a70e2ffd0580879efaf0a5aec334b0b5bed2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 26 Apr 2024 09:12:25 +0000 Subject: [PATCH 2064/2290] net: qede: use return from qede_parse_flow_attr() for flow_spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 27b44414a34b108c5a37cd5b4894f606061d86e7 ] In qede_flow_spec_to_rule(), when calling qede_parse_flow_attr() then the return code was only used for a non-zero check, and then -EINVAL was returned. qede_parse_flow_attr() can currently fail with: * -EINVAL * -EOPNOTSUPP * -EPROTONOSUPPORT This patch changes the code to use the actual return code, not just return -EINVAL. The blaimed commit introduced qede_flow_spec_to_rule(), and this call to qede_parse_flow_attr(), it looks like it just duplicated how it was already used. Only compile tested. Fixes: 37c5d3efd7f8 ("qede: use ethtool_rx_flow_rule() to remove duplicated parser code") Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index aedb98713bbf2..aeff091cdfaee 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -2002,10 +2002,9 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, if (IS_ERR(flow)) return PTR_ERR(flow); - if (qede_parse_flow_attr(edev, proto, flow->rule, t)) { - err = -EINVAL; + err = qede_parse_flow_attr(edev, proto, flow->rule, t); + if (err) goto err_out; - } /* Make sure location is valid and filter isn't already set */ err = qede_flow_spec_validate(edev, &flow->rule->action, t, -- GitLab From 52f6ac863942890520f9b9e17f757aaed1a8479c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 26 Apr 2024 09:12:26 +0000 Subject: [PATCH 2065/2290] net: qede: use return from qede_parse_actions() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f26f719a36e56381a1f4230e5364e7ad4d485888 ] When calling qede_parse_actions() then the return code was only used for a non-zero check, and then -EINVAL was returned. qede_parse_actions() can currently fail with: * -EINVAL * -EOPNOTSUPP This patch changes the code to use the actual return code, not just return -EINVAL. The blaimed commit broke the implicit assumption that only -EINVAL would ever be returned. Only compile tested. Fixes: 319a1d19471e ("flow_offload: check for basic action hw stats type") Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index aeff091cdfaee..8871099b99d8a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1894,10 +1894,9 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) { - rc = -EINVAL; + rc = qede_parse_actions(edev, &f->rule->action, f->common.extack); + if (rc) goto unlock; - } if (qede_flow_find_fltr(edev, &t)) { rc = -EEXIST; -- GitLab From 821b719884248e4f3878adc7605737746cfd8c14 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 27 Feb 2024 16:08:25 +0100 Subject: [PATCH 2066/2290] ASoC: meson: axg-fifo: use FIELD helpers [ Upstream commit 9e6f39535c794adea6ba802a52c722d193c28124 ] Use FIELD_GET() and FIELD_PREP() helpers instead of doing it manually. Signed-off-by: Jerome Brunet Link: https://msgid.link/r/20240227150826.573581-1-jbrunet@baylibre.com Signed-off-by: Mark Brown Stable-dep-of: b11d26660dff ("ASoC: meson: axg-fifo: use threaded irq to check periods") Signed-off-by: Sasha Levin --- sound/soc/meson/axg-fifo.c | 25 +++++++++++++------------ sound/soc/meson/axg-fifo.h | 12 +++++------- sound/soc/meson/axg-frddr.c | 5 +++-- sound/soc/meson/axg-toddr.c | 22 ++++++++++------------ 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index bccfb770b3391..bde7598750064 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -3,6 +3,7 @@ // Copyright (c) 2018 BayLibre, SAS. // Author: Jerome Brunet +#include #include #include #include @@ -145,8 +146,8 @@ int axg_fifo_pcm_hw_params(struct snd_soc_component *component, /* Enable irq if necessary */ irq_en = runtime->no_period_wakeup ? 0 : FIFO_INT_COUNT_REPEAT; regmap_update_bits(fifo->map, FIFO_CTRL0, - CTRL0_INT_EN(FIFO_INT_COUNT_REPEAT), - CTRL0_INT_EN(irq_en)); + CTRL0_INT_EN, + FIELD_PREP(CTRL0_INT_EN, irq_en)); return 0; } @@ -176,9 +177,9 @@ int axg_fifo_pcm_hw_free(struct snd_soc_component *component, { struct axg_fifo *fifo = axg_fifo_data(ss); - /* Disable the block count irq */ + /* Disable irqs */ regmap_update_bits(fifo->map, FIFO_CTRL0, - CTRL0_INT_EN(FIFO_INT_COUNT_REPEAT), 0); + CTRL0_INT_EN, 0); return 0; } @@ -187,13 +188,13 @@ EXPORT_SYMBOL_GPL(axg_fifo_pcm_hw_free); static void axg_fifo_ack_irq(struct axg_fifo *fifo, u8 mask) { regmap_update_bits(fifo->map, FIFO_CTRL1, - CTRL1_INT_CLR(FIFO_INT_MASK), - CTRL1_INT_CLR(mask)); + CTRL1_INT_CLR, + FIELD_PREP(CTRL1_INT_CLR, mask)); /* Clear must also be cleared */ regmap_update_bits(fifo->map, FIFO_CTRL1, - CTRL1_INT_CLR(FIFO_INT_MASK), - 0); + CTRL1_INT_CLR, + FIELD_PREP(CTRL1_INT_CLR, 0)); } static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) @@ -204,7 +205,7 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) regmap_read(fifo->map, FIFO_STATUS1, &status); - status = STATUS1_INT_STS(status) & FIFO_INT_MASK; + status = FIELD_GET(STATUS1_INT_STS, status); if (status & FIFO_INT_COUNT_REPEAT) snd_pcm_period_elapsed(ss); else @@ -254,15 +255,15 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, /* Setup status2 so it reports the memory pointer */ regmap_update_bits(fifo->map, FIFO_CTRL1, - CTRL1_STATUS2_SEL_MASK, - CTRL1_STATUS2_SEL(STATUS2_SEL_DDR_READ)); + CTRL1_STATUS2_SEL, + FIELD_PREP(CTRL1_STATUS2_SEL, STATUS2_SEL_DDR_READ)); /* Make sure the dma is initially disabled */ __dma_enable(fifo, false); /* Disable irqs until params are ready */ regmap_update_bits(fifo->map, FIFO_CTRL0, - CTRL0_INT_EN(FIFO_INT_MASK), 0); + CTRL0_INT_EN, 0); /* Clear any pending interrupt */ axg_fifo_ack_irq(fifo, FIFO_INT_MASK); diff --git a/sound/soc/meson/axg-fifo.h b/sound/soc/meson/axg-fifo.h index b63acd723c870..5b7d32c37991b 100644 --- a/sound/soc/meson/axg-fifo.h +++ b/sound/soc/meson/axg-fifo.h @@ -42,21 +42,19 @@ struct snd_soc_pcm_runtime; #define FIFO_CTRL0 0x00 #define CTRL0_DMA_EN BIT(31) -#define CTRL0_INT_EN(x) ((x) << 16) +#define CTRL0_INT_EN GENMASK(23, 16) #define CTRL0_SEL_MASK GENMASK(2, 0) #define CTRL0_SEL_SHIFT 0 #define FIFO_CTRL1 0x04 -#define CTRL1_INT_CLR(x) ((x) << 0) -#define CTRL1_STATUS2_SEL_MASK GENMASK(11, 8) -#define CTRL1_STATUS2_SEL(x) ((x) << 8) +#define CTRL1_INT_CLR GENMASK(7, 0) +#define CTRL1_STATUS2_SEL GENMASK(11, 8) #define STATUS2_SEL_DDR_READ 0 -#define CTRL1_FRDDR_DEPTH_MASK GENMASK(31, 24) -#define CTRL1_FRDDR_DEPTH(x) ((x) << 24) +#define CTRL1_FRDDR_DEPTH GENMASK(31, 24) #define FIFO_START_ADDR 0x08 #define FIFO_FINISH_ADDR 0x0c #define FIFO_INT_ADDR 0x10 #define FIFO_STATUS1 0x14 -#define STATUS1_INT_STS(x) ((x) << 0) +#define STATUS1_INT_STS GENMASK(7, 0) #define FIFO_STATUS2 0x18 #define FIFO_INIT_ADDR 0x24 #define FIFO_CTRL2 0x28 diff --git a/sound/soc/meson/axg-frddr.c b/sound/soc/meson/axg-frddr.c index 61f9d417fd608..f0a9e181ee72a 100644 --- a/sound/soc/meson/axg-frddr.c +++ b/sound/soc/meson/axg-frddr.c @@ -7,6 +7,7 @@ * This driver implements the frontend playback DAI of AXG and G12A based SoCs */ +#include #include #include #include @@ -59,8 +60,8 @@ static int axg_frddr_dai_hw_params(struct snd_pcm_substream *substream, /* Trim the FIFO depth if the period is small to improve latency */ depth = min(period, fifo->depth); val = (depth / AXG_FIFO_BURST) - 1; - regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_FRDDR_DEPTH_MASK, - CTRL1_FRDDR_DEPTH(val)); + regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_FRDDR_DEPTH, + FIELD_PREP(CTRL1_FRDDR_DEPTH, val)); return 0; } diff --git a/sound/soc/meson/axg-toddr.c b/sound/soc/meson/axg-toddr.c index e9208e74e9659..f875304463e2f 100644 --- a/sound/soc/meson/axg-toddr.c +++ b/sound/soc/meson/axg-toddr.c @@ -5,6 +5,7 @@ /* This driver implements the frontend capture DAI of AXG based SoCs */ +#include #include #include #include @@ -19,12 +20,9 @@ #define CTRL0_TODDR_EXT_SIGNED BIT(29) #define CTRL0_TODDR_PP_MODE BIT(28) #define CTRL0_TODDR_SYNC_CH BIT(27) -#define CTRL0_TODDR_TYPE_MASK GENMASK(15, 13) -#define CTRL0_TODDR_TYPE(x) ((x) << 13) -#define CTRL0_TODDR_MSB_POS_MASK GENMASK(12, 8) -#define CTRL0_TODDR_MSB_POS(x) ((x) << 8) -#define CTRL0_TODDR_LSB_POS_MASK GENMASK(7, 3) -#define CTRL0_TODDR_LSB_POS(x) ((x) << 3) +#define CTRL0_TODDR_TYPE GENMASK(15, 13) +#define CTRL0_TODDR_MSB_POS GENMASK(12, 8) +#define CTRL0_TODDR_LSB_POS GENMASK(7, 3) #define CTRL1_TODDR_FORCE_FINISH BIT(25) #define CTRL1_SEL_SHIFT 28 @@ -76,12 +74,12 @@ static int axg_toddr_dai_hw_params(struct snd_pcm_substream *substream, width = params_width(params); regmap_update_bits(fifo->map, FIFO_CTRL0, - CTRL0_TODDR_TYPE_MASK | - CTRL0_TODDR_MSB_POS_MASK | - CTRL0_TODDR_LSB_POS_MASK, - CTRL0_TODDR_TYPE(type) | - CTRL0_TODDR_MSB_POS(TODDR_MSB_POS) | - CTRL0_TODDR_LSB_POS(TODDR_MSB_POS - (width - 1))); + CTRL0_TODDR_TYPE | + CTRL0_TODDR_MSB_POS | + CTRL0_TODDR_LSB_POS, + FIELD_PREP(CTRL0_TODDR_TYPE, type) | + FIELD_PREP(CTRL0_TODDR_MSB_POS, TODDR_MSB_POS) | + FIELD_PREP(CTRL0_TODDR_LSB_POS, TODDR_MSB_POS - (width - 1))); return 0; } -- GitLab From d41a1d5c4592ff70ee99bd9ba943b9a986f22edd Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 17:29:38 +0200 Subject: [PATCH 2067/2290] ASoC: meson: axg-fifo: use threaded irq to check periods [ Upstream commit b11d26660dff8d7430892008616452dc8e5fb0f3 ] With the AXG audio subsystem, there is a possible random channel shift on TDM capture, when the slot number per lane is more than 2, and there is more than one lane used. The problem has been there since the introduction of the axg audio support but such scenario is pretty uncommon. This is why there is no loud complains about the problem. Solving the problem require to make the links non-atomic and use the trigger() callback to start FEs and BEs in the appropriate order. This was tried in the past and reverted because it caused the block irq to sleep while atomic. However, instead of reverting, the solution is to call snd_pcm_period_elapsed() in a non atomic context. Use the bottom half of a threaded IRQ to do so. Fixes: 6dc4fa179fb8 ("ASoC: meson: add axg fifo base driver") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426152946.3078805-2-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/axg-fifo.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index bde7598750064..94b169a5493b5 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -204,18 +204,26 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) unsigned int status; regmap_read(fifo->map, FIFO_STATUS1, &status); - status = FIELD_GET(STATUS1_INT_STS, status); + axg_fifo_ack_irq(fifo, status); + + /* Use the thread to call period elapsed on nonatomic links */ if (status & FIFO_INT_COUNT_REPEAT) - snd_pcm_period_elapsed(ss); - else - dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", - status); + return IRQ_WAKE_THREAD; - /* Ack irqs */ - axg_fifo_ack_irq(fifo, status); + dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", + status); + + return IRQ_NONE; +} + +static irqreturn_t axg_fifo_pcm_irq_block_thread(int irq, void *dev_id) +{ + struct snd_pcm_substream *ss = dev_id; + + snd_pcm_period_elapsed(ss); - return IRQ_RETVAL(status); + return IRQ_HANDLED; } int axg_fifo_pcm_open(struct snd_soc_component *component, @@ -243,8 +251,9 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, if (ret) return ret; - ret = request_irq(fifo->irq, axg_fifo_pcm_irq_block, 0, - dev_name(dev), ss); + ret = request_threaded_irq(fifo->irq, axg_fifo_pcm_irq_block, + axg_fifo_pcm_irq_block_thread, + IRQF_ONESHOT, dev_name(dev), ss); if (ret) return ret; -- GitLab From f0f8ec97ac2ec8478006d7efad30259902dabe24 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 17:29:39 +0200 Subject: [PATCH 2068/2290] ASoC: meson: axg-card: make links nonatomic [ Upstream commit dcba52ace7d4c12e2c8c273eff55ea03a84c8baf ] Non atomic operations need to be performed in the trigger callback of the TDM interfaces. Those are BEs but what matters is the nonatomic flag of the FE in the DPCM context. Just set nonatomic for everything so, at least, what is done is clear. Fixes: 7864a79f37b5 ("ASoC: meson: add axg sound card support") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426152946.3078805-3-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/axg-card.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c index 2b77010c2c5ce..cbbaa55d92a66 100644 --- a/sound/soc/meson/axg-card.c +++ b/sound/soc/meson/axg-card.c @@ -320,6 +320,7 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np, dai_link->cpus = cpu; dai_link->num_cpus = 1; + dai_link->nonatomic = true; ret = meson_card_parse_dai(card, np, &dai_link->cpus->of_node, &dai_link->cpus->dai_name); -- GitLab From 46071eeb0ba8ceb1b09a53f70fae8208cd42b543 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 17:29:40 +0200 Subject: [PATCH 2069/2290] ASoC: meson: axg-tdm-interface: manage formatters in trigger [ Upstream commit f949ed458ad15a00d41b37c745ebadaef171aaae ] So far, the formatters have been reset/enabled using the .prepare() callback. This was done in this callback because walking the formatters use a mutex. A mutex is used because formatter handling require dealing possibly slow clock operation. With the support of non-atomic, .trigger() callback may be used which also allows to properly enable and disable formatters on start but also pause/resume. This solve a random shift on TDMIN as well repeated samples on for TDMOUT. Fixes: d60e4f1e4be5 ("ASoC: meson: add tdm interface driver") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426152946.3078805-4-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/axg-tdm-interface.c | 34 ++++++++++++++++------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index 028383f949efd..272c3d2d68cb7 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -351,26 +351,31 @@ static int axg_tdm_iface_hw_params(struct snd_pcm_substream *substream, return 0; } -static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, +static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream, + int cmd, struct snd_soc_dai *dai) { - struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); + struct axg_tdm_stream *ts = + snd_soc_dai_get_dma_data(dai, substream); - /* Stop all attached formatters */ - axg_tdm_stream_stop(ts); + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + axg_tdm_stream_start(ts); + break; + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_STOP: + axg_tdm_stream_stop(ts); + break; + default: + return -EINVAL; + } return 0; } -static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); - - /* Force all attached formatters to update */ - return axg_tdm_stream_reset(ts); -} - static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai) { if (dai->capture_dma_data) @@ -408,8 +413,7 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = { .set_fmt = axg_tdm_iface_set_fmt, .startup = axg_tdm_iface_startup, .hw_params = axg_tdm_iface_hw_params, - .prepare = axg_tdm_iface_prepare, - .hw_free = axg_tdm_iface_hw_free, + .trigger = axg_tdm_iface_trigger, }; /* TDM Backend DAIs */ -- GitLab From 21d458ecf443a3873d2f50ca38bb7ba30ad8be2e Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 15:41:47 +0200 Subject: [PATCH 2070/2290] ASoC: meson: cards: select SND_DYNAMIC_MINORS [ Upstream commit 6db26f9ea4edd8a17d39ab3c20111e3ccd704aef ] Amlogic sound cards do create a lot of pcm interfaces, possibly more than 8. Some pcm interfaces are internal (like DPCM backends and c2c) and not exposed to userspace. Those interfaces still increase the number passed to snd_find_free_minor(), which eventually exceeds 8 causing -EBUSY error on card registration if CONFIG_SND_DYNAMIC_MINORS=n and the interface is exposed to userspace. select CONFIG_SND_DYNAMIC_MINORS for Amlogic cards to avoid the problem. Fixes: 7864a79f37b5 ("ASoC: meson: add axg sound card support") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426134150.3053741-1-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/meson/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/meson/Kconfig b/sound/soc/meson/Kconfig index b93ea33739f29..6458d5dc4902f 100644 --- a/sound/soc/meson/Kconfig +++ b/sound/soc/meson/Kconfig @@ -99,6 +99,7 @@ config SND_MESON_AXG_PDM config SND_MESON_CARD_UTILS tristate + select SND_DYNAMIC_MINORS config SND_MESON_CODEC_GLUE tristate -- GitLab From 722d33c442e66e4aabd3e778958d696ff3a2777e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 26 Apr 2024 10:27:31 -0500 Subject: [PATCH 2071/2290] ALSA: hda: intel-sdw-acpi: fix usage of device_get_named_child_node() [ Upstream commit c158cf914713efc3bcdc25680c7156c48c12ef6a ] The documentation for device_get_named_child_node() mentions this important point: " The caller is responsible for calling fwnode_handle_put() on the returned fwnode pointer. " Add fwnode_handle_put() to avoid a leaked reference. Signed-off-by: Pierre-Louis Bossart Fixes: 08c2a4bc9f2a ("ALSA: hda: move Intel SoundWire ACPI scan to dedicated module") Message-ID: <20240426152731.38420-1-pierre-louis.bossart@linux.intel.com> Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/hda/intel-sdw-acpi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index b57d72ea4503f..4e376994bf78b 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -41,6 +41,8 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, u8 idx) "intel-quirk-mask", &quirk_mask); + fwnode_handle_put(link); + if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) return false; -- GitLab From 10452edd175fcc4fd0f5ac782ed2a002e3e5d65c Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 24 Apr 2024 21:44:22 +0700 Subject: [PATCH 2072/2290] s390/cio: Ensure the copied buf is NUL terminated [ Upstream commit da7c622cddd4fe36be69ca61e8c42e43cde94784 ] Currently, we allocate a lbuf-sized kernel buffer and copy lbuf from userspace to that buffer. Later, we use scanf on this buffer but we don't ensure that the string is terminated inside the buffer, this can lead to OOB read when using scanf. Fix this issue by using memdup_user_nul instead. Fixes: a4f17cc72671 ("s390/cio: add CRW inject functionality") Signed-off-by: Bui Quang Minh Reviewed-by: Heiko Carstens Link: https://lore.kernel.org/r/20240424-fix-oob-read-v2-5-f1f1b53a10f4@gmail.com Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- drivers/s390/cio/cio_inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/cio_inject.c b/drivers/s390/cio/cio_inject.c index 8613fa937237b..a2e771ebae8eb 100644 --- a/drivers/s390/cio/cio_inject.c +++ b/drivers/s390/cio/cio_inject.c @@ -95,7 +95,7 @@ static ssize_t crw_inject_write(struct file *file, const char __user *buf, return -EINVAL; } - buffer = vmemdup_user(buf, lbuf); + buffer = memdup_user_nul(buf, lbuf); if (IS_ERR(buffer)) return -ENOMEM; -- GitLab From 3636dcdafbd14ce4af21097d5c806faaddb0d36b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 29 Apr 2024 11:11:47 +0200 Subject: [PATCH 2073/2290] cxgb4: Properly lock TX queue for the selftest. [ Upstream commit 9067eccdd7849dd120d5495dbd5a686fa6ed2c1a ] The selftest for the driver sends a dummy packet and checks if the packet will be received properly as it should be. The regular TX path and the selftest can use the same network queue so locking is required and was missing in the selftest path. This was addressed in the commit cited below. Unfortunately locking the TX queue requires BH to be disabled which is not the case in selftest path which is invoked in process context. Lockdep should be complaining about this. Use __netif_tx_lock_bh() for TX queue locking. Fixes: c650e04898072 ("cxgb4: Fix race between loopback and normal Tx path") Reported-by: "John B. Wyatt IV" Closes: https://lore.kernel.org/all/Zic0ot5aGgR-V4Ks@thinkpad2021/ Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20240429091147.YWAaal4v@linutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 46809e2d94ee0..4809d9eae6ca5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2684,12 +2684,12 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) lb->loopback = 1; q = &adap->sge.ethtxq[pi->first_qset]; - __netif_tx_lock(q->txq, smp_processor_id()); + __netif_tx_lock_bh(q->txq); reclaim_completed_tx(adap, &q->q, -1, true); credits = txq_avail(&q->q) - ndesc; if (unlikely(credits < 0)) { - __netif_tx_unlock(q->txq); + __netif_tx_unlock_bh(q->txq); return -ENOMEM; } @@ -2724,7 +2724,7 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) init_completion(&lb->completion); txq_advance(&q->q, ndesc); cxgb4_ring_tx_db(adap, &q->q, ndesc); - __netif_tx_unlock(q->txq); + __netif_tx_unlock_bh(q->txq); /* wait for the pkt to return */ ret = wait_for_completion_timeout(&lb->completion, 10 * HZ); -- GitLab From 7e52c09c284704d3eeb5a8023af8b4c653308166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Mon, 29 Apr 2024 15:38:32 +0200 Subject: [PATCH 2074/2290] net: dsa: mv88e6xxx: Fix number of databases for 88E6141 / 88E6341 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b9a61c20179fda7bdfe2c1210aa72451991ab81a ] The Topaz family (88E6141 and 88E6341) only support 256 Forwarding Information Tables. Fixes: a75961d0ebfd ("net: dsa: mv88e6xxx: Add support for ethernet switch 88E6341") Fixes: 1558727a1c1b ("net: dsa: mv88e6xxx: Add support for ethernet switch 88E6141") Signed-off-by: Marek Behún Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20240429133832.9547-1-kabel@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ba906dfab055c..517c50d11fbce 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5677,7 +5677,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6141, .family = MV88E6XXX_FAMILY_6341, .name = "Marvell 88E6141", - .num_databases = 4096, + .num_databases = 256, .num_macs = 2048, .num_ports = 6, .num_internal_phys = 5, @@ -6134,7 +6134,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, .family = MV88E6XXX_FAMILY_6341, .name = "Marvell 88E6341", - .num_databases = 4096, + .num_databases = 256, .num_macs = 2048, .num_internal_phys = 5, .num_ports = 6, -- GitLab From e005d6754e3e440257006795b687c4ad8733b493 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 30 Apr 2024 19:27:05 +0100 Subject: [PATCH 2075/2290] spi: fix null pointer dereference within spi_sync [ Upstream commit 4756fa529b2f12b7cb8f21fe229b0f6f47190829 ] If spi_sync() is called with the non-empty queue and the same spi_message is then reused, the complete callback for the message remains set while the context is cleared, leading to a null pointer dereference when the callback is invoked from spi_finalize_current_message(). With function inlining disabled, the call stack might look like this: _raw_spin_lock_irqsave from complete_with_flags+0x18/0x58 complete_with_flags from spi_complete+0x8/0xc spi_complete from spi_finalize_current_message+0xec/0x184 spi_finalize_current_message from spi_transfer_one_message+0x2a8/0x474 spi_transfer_one_message from __spi_pump_transfer_message+0x104/0x230 __spi_pump_transfer_message from __spi_transfer_message_noqueue+0x30/0xc4 __spi_transfer_message_noqueue from __spi_sync+0x204/0x248 __spi_sync from spi_sync+0x24/0x3c spi_sync from mcp251xfd_regmap_crc_read+0x124/0x28c [mcp251xfd] mcp251xfd_regmap_crc_read [mcp251xfd] from _regmap_raw_read+0xf8/0x154 _regmap_raw_read from _regmap_bus_read+0x44/0x70 _regmap_bus_read from _regmap_read+0x60/0xd8 _regmap_read from regmap_read+0x3c/0x5c regmap_read from mcp251xfd_alloc_can_err_skb+0x1c/0x54 [mcp251xfd] mcp251xfd_alloc_can_err_skb [mcp251xfd] from mcp251xfd_irq+0x194/0xe70 [mcp251xfd] mcp251xfd_irq [mcp251xfd] from irq_thread_fn+0x1c/0x78 irq_thread_fn from irq_thread+0x118/0x1f4 irq_thread from kthread+0xd8/0xf4 kthread from ret_from_fork+0x14/0x28 Fix this by also setting message->complete to NULL when the transfer is complete. Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Signed-off-by: Mans Rullgard Link: https://lore.kernel.org/r/20240430182705.13019-1-mans@mansr.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 4aa2e0928de9c..1018feff468c9 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4217,6 +4217,7 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message) wait_for_completion(&done); status = message->status; } + message->complete = NULL; message->context = NULL; return status; -- GitLab From cd37a5a08c4ee701b594d1ef48d996567baf0392 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 27 Apr 2024 20:24:18 +0200 Subject: [PATCH 2076/2290] net: bridge: fix multicast-to-unicast with fraglist GSO [ Upstream commit 59c878cbcdd80ed39315573b3511d0acfd3501b5 ] Calling skb_copy on a SKB_GSO_FRAGLIST skb is not valid, since it returns an invalid linearized skb. This code only needs to change the ethernet header, so pskb_copy is the right function to call here. Fixes: 6db6f0eae605 ("bridge: multicast to unicast") Signed-off-by: Felix Fietkau Acked-by: Paolo Abeni Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 4e3394a7d7d45..982e7a9ccc41c 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -261,7 +261,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; - skb = skb_copy(skb, GFP_ATOMIC); + skb = pskb_copy(skb, GFP_ATOMIC); if (!skb) { DEV_STATS_INC(dev, tx_dropped); return; -- GitLab From 989bf6fd1e1d058e73a364dce1a0c53d33373f62 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 27 Apr 2024 20:24:19 +0200 Subject: [PATCH 2077/2290] net: core: reject skb_copy(_expand) for fraglist GSO skbs [ Upstream commit d091e579b864fa790dd6a0cd537a22c383126681 ] SKB_GSO_FRAGLIST skbs must not be linearized, otherwise they become invalid. Return NULL if such an skb is passed to skb_copy or skb_copy_expand, in order to prevent a crash on a potential later call to skb_gso_segment. Fixes: 3a1296a38d0c ("net: Support GRO/GSO fraglist chaining.") Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/skbuff.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e38a4c7449f62..4d46788cd493a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1720,11 +1720,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb) struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { - int headerlen = skb_headroom(skb); - unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + struct sk_buff *n; + unsigned int size; + int headerlen; + + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + headerlen = skb_headroom(skb); + size = skb_end_offset(skb) + skb->data_len; + n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -2037,12 +2043,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask, skb_alloc_rx_flag(skb), - NUMA_NO_NODE); - int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; + struct sk_buff *n; + int oldheadroom; + + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + oldheadroom = skb_headroom(skb); + n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (!n) return NULL; -- GitLab From d03a82f4f8144befdc10518e732e2a60b34c870e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 30 Apr 2024 10:03:38 -0400 Subject: [PATCH 2078/2290] tipc: fix a possible memleak in tipc_buf_append [ Upstream commit 97bf6f81b29a8efaf5d0983251a7450e5794370d ] __skb_linearize() doesn't free the skb when it fails, so move '*buf = NULL' after __skb_linearize(), so that the skb can be freed on the err path. Fixes: b7df21cf1b79 ("tipc: skb_linearize the head skb when reassembling msgs") Reported-by: Paolo Abeni Signed-off-by: Xin Long Reviewed-by: Simon Horman Reviewed-by: Tung Nguyen Link: https://lore.kernel.org/r/90710748c29a1521efac4f75ea01b3b7e61414cf.1714485818.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 5c9fd4791c4ba..c52ab423082cd 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - *buf = NULL; if (skb_has_frag_list(frag) && __skb_linearize(frag)) goto err; + *buf = NULL; frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; -- GitLab From 51ad57c9b0ff7fc01ed82a2d394b0869b7ff93d7 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 30 Apr 2024 18:50:13 +0200 Subject: [PATCH 2079/2290] vxlan: Pull inner IP header in vxlan_rcv(). [ Upstream commit f7789419137b18e3847d0cc41afd788c3c00663d ] Ensure the inner IP header is part of skb's linear data before reading its ECN bits. Otherwise we might read garbage. One symptom is the system erroneously logging errors like "vxlan: non-ECT from xxx.xxx.xxx.xxx with TOS=xxxx". Similar bugs have been fixed in geneve, ip_tunnel and ip6_tunnel (see commit 1ca1ba465e55 ("geneve: make sure to pull inner header in geneve_rx()") for example). So let's reuse the same code structure for consistency. Maybe we'll can add a common helper in the future. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Reviewed-by: Nikolay Aleksandrov Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/1239c8db54efec341dd6455c77e0380f58923a3c.1714495737.git.gnault@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/vxlan/vxlan_core.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index fbd36dff9ec27..01ce289f4abf0 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1721,6 +1721,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) bool raw_proto = false; void *oiph; __be32 vni = 0; + int nh; /* Need UDP and VXLAN header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -1809,9 +1810,25 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(vxlan->dev, rx_length_errors); + DEV_STATS_INC(vxlan->dev, rx_errors); + vxlan_vnifilter_count(vxlan, vni, vninode, + VXLAN_VNI_STATS_RX_ERRORS, 0); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { ++vxlan->dev->stats.rx_frame_errors; ++vxlan->dev->stats.rx_errors; -- GitLab From 10cb803aff3b11fe0bd5f274fc1c231a43e88df6 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 30 Apr 2024 11:10:04 +0200 Subject: [PATCH 2080/2290] s390/qeth: Fix kernel panic after setting hsuid [ Upstream commit 8a2e4d37afb8500b276e5ee903dee06f50ab0494 ] Symptom: When the hsuid attribute is set for the first time on an IQD Layer3 device while the corresponding network interface is already UP, the kernel will try to execute a napi function pointer that is NULL. Example: --------------------------------------------------------------------------- [ 2057.572696] illegal operation: 0001 ilc:1 [#1] SMP [ 2057.572702] Modules linked in: af_iucv qeth_l3 zfcp scsi_transport_fc sunrpc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nf_tables_set nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink ghash_s390 prng xts aes_s390 des_s390 de s_generic sha3_512_s390 sha3_256_s390 sha512_s390 vfio_ccw vfio_mdev mdev vfio_iommu_type1 eadm_sch vfio ext4 mbcache jbd2 qeth_l2 bridge stp llc dasd_eckd_mod qeth dasd_mod qdio ccwgroup pkey zcrypt [ 2057.572739] CPU: 6 PID: 60182 Comm: stress_client Kdump: loaded Not tainted 4.18.0-541.el8.s390x #1 [ 2057.572742] Hardware name: IBM 3931 A01 704 (LPAR) [ 2057.572744] Krnl PSW : 0704f00180000000 0000000000000002 (0x2) [ 2057.572748] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:3 PM:0 RI:0 EA:3 [ 2057.572751] Krnl GPRS: 0000000000000004 0000000000000000 00000000a3b008d8 0000000000000000 [ 2057.572754] 00000000a3b008d8 cb923a29c779abc5 0000000000000000 00000000814cfd80 [ 2057.572756] 000000000000012c 0000000000000000 00000000a3b008d8 00000000a3b008d8 [ 2057.572758] 00000000bab6d500 00000000814cfd80 0000000091317e46 00000000814cfc68 [ 2057.572762] Krnl Code:#0000000000000000: 0000 illegal >0000000000000002: 0000 illegal 0000000000000004: 0000 illegal 0000000000000006: 0000 illegal 0000000000000008: 0000 illegal 000000000000000a: 0000 illegal 000000000000000c: 0000 illegal 000000000000000e: 0000 illegal [ 2057.572800] Call Trace: [ 2057.572801] ([<00000000ec639700>] 0xec639700) [ 2057.572803] [<00000000913183e2>] net_rx_action+0x2ba/0x398 [ 2057.572809] [<0000000091515f76>] __do_softirq+0x11e/0x3a0 [ 2057.572813] [<0000000090ce160c>] do_softirq_own_stack+0x3c/0x58 [ 2057.572817] ([<0000000090d2cbd6>] do_softirq.part.1+0x56/0x60) [ 2057.572822] [<0000000090d2cc60>] __local_bh_enable_ip+0x80/0x98 [ 2057.572825] [<0000000091314706>] __dev_queue_xmit+0x2be/0xd70 [ 2057.572827] [<000003ff803dd6d6>] afiucv_hs_send+0x24e/0x300 [af_iucv] [ 2057.572830] [<000003ff803dd88a>] iucv_send_ctrl+0x102/0x138 [af_iucv] [ 2057.572833] [<000003ff803de72a>] iucv_sock_connect+0x37a/0x468 [af_iucv] [ 2057.572835] [<00000000912e7e90>] __sys_connect+0xa0/0xd8 [ 2057.572839] [<00000000912e9580>] sys_socketcall+0x228/0x348 [ 2057.572841] [<0000000091514e1a>] system_call+0x2a6/0x2c8 [ 2057.572843] Last Breaking-Event-Address: [ 2057.572844] [<0000000091317e44>] __napi_poll+0x4c/0x1d8 [ 2057.572846] [ 2057.572847] Kernel panic - not syncing: Fatal exception in interrupt ------------------------------------------------------------------------------------------- Analysis: There is one napi structure per out_q: card->qdio.out_qs[i].napi The napi.poll functions are set during qeth_open(). Since commit 1cfef80d4c2b ("s390/qeth: Don't call dev_close/dev_open (DOWN/UP)") qeth_set_offline()/qeth_set_online() no longer call dev_close()/ dev_open(). So if qeth_free_qdio_queues() cleared card->qdio.out_qs[i].napi.poll while the network interface was UP and the card was offline, they are not set again. Reproduction: chzdev -e $devno layer2=0 ip link set dev $network_interface up echo 0 > /sys/bus/ccwgroup/devices/0.0.$devno/online echo foo > /sys/bus/ccwgroup/devices/0.0.$devno/hsuid echo 1 > /sys/bus/ccwgroup/devices/0.0.$devno/online -> Crash (can be enforced e.g. by af_iucv connect(), ip link down/up, ...) Note that a Completion Queue (CQ) is only enabled or disabled, when hsuid is set for the first time or when it is removed. Workarounds: - Set hsuid before setting the device online for the first time or - Use chzdev -d $devno; chzdev $devno hsuid=xxx; chzdev -e $devno; to set hsuid on an existing device. (this will remove and recreate the network interface) Fix: There is no need to free the output queues when a completion queue is added or removed. card->qdio.state now indicates whether the inbound buffer pool and the outbound queues are allocated. card->qdio.c_q indicates whether a CQ is allocated. Fixes: 1cfef80d4c2b ("s390/qeth: Don't call dev_close/dev_open (DOWN/UP)") Signed-off-by: Alexandra Winter Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240430091004.2265683-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 61 ++++++++++++++----------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 1e6340e2c2588..f99d1d325f3ea 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -364,30 +364,33 @@ out: return rc; } +static void qeth_free_cq(struct qeth_card *card) +{ + if (card->qdio.c_q) { + qeth_free_qdio_queue(card->qdio.c_q); + card->qdio.c_q = NULL; + } +} + static int qeth_alloc_cq(struct qeth_card *card) { if (card->options.cq == QETH_CQ_ENABLED) { QETH_CARD_TEXT(card, 2, "cqon"); - card->qdio.c_q = qeth_alloc_qdio_queue(); if (!card->qdio.c_q) { - dev_err(&card->gdev->dev, "Failed to create completion queue\n"); - return -ENOMEM; + card->qdio.c_q = qeth_alloc_qdio_queue(); + if (!card->qdio.c_q) { + dev_err(&card->gdev->dev, + "Failed to create completion queue\n"); + return -ENOMEM; + } } } else { QETH_CARD_TEXT(card, 2, "nocq"); - card->qdio.c_q = NULL; + qeth_free_cq(card); } return 0; } -static void qeth_free_cq(struct qeth_card *card) -{ - if (card->qdio.c_q) { - qeth_free_qdio_queue(card->qdio.c_q); - card->qdio.c_q = NULL; - } -} - static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, int delayed) { @@ -2628,6 +2631,10 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "allcqdbf"); + /* completion */ + if (qeth_alloc_cq(card)) + goto out_err; + if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED, QETH_QDIO_ALLOCATED) != QETH_QDIO_UNINITIALIZED) return 0; @@ -2663,10 +2670,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT; } - /* completion */ - if (qeth_alloc_cq(card)) - goto out_freeoutq; - return 0; out_freeoutq: @@ -2677,6 +2680,8 @@ out_freeoutq: qeth_free_buffer_pool(card); out_buffer_pool: atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED); + qeth_free_cq(card); +out_err: return -ENOMEM; } @@ -2684,11 +2689,12 @@ static void qeth_free_qdio_queues(struct qeth_card *card) { int i, j; + qeth_free_cq(card); + if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) == QETH_QDIO_UNINITIALIZED) return; - qeth_free_cq(card); for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (card->qdio.in_q->bufs[j].rx_skb) { consume_skb(card->qdio.in_q->bufs[j].rx_skb); @@ -3740,24 +3746,11 @@ static void qeth_qdio_poll(struct ccw_device *cdev, unsigned long card_ptr) int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq) { - int rc; - - if (card->options.cq == QETH_CQ_NOTAVAILABLE) { - rc = -1; - goto out; - } else { - if (card->options.cq == cq) { - rc = 0; - goto out; - } - - qeth_free_qdio_queues(card); - card->options.cq = cq; - rc = 0; - } -out: - return rc; + if (card->options.cq == QETH_CQ_NOTAVAILABLE) + return -1; + card->options.cq = cq; + return 0; } EXPORT_SYMBOL_GPL(qeth_configure_cq); -- GitLab From f51181ac911de9eaac79aaa421c7695d8d06200a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Apr 2024 17:26:18 +0300 Subject: [PATCH 2081/2290] drm/panel: ili9341: Respect deferred probe [ Upstream commit 740fc1e0509be3f7e2207e89125b06119ed62943 ] GPIO controller might not be available when driver is being probed. There are plenty of reasons why, one of which is deferred probe. Since GPIOs are optional, return any error code we got to the upper layer, including deferred probe. With that in mind, use dev_err_probe() in order to avoid spamming the logs. Fixes: 5a04227326b0 ("drm/panel: Add ilitek ili9341 panel driver") Signed-off-by: Andy Shevchenko Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Reviewed-by: Sui Jingfeng Link: https://lore.kernel.org/r/20240425142706.2440113-3-andriy.shevchenko@linux.intel.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240425142706.2440113-3-andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-ilitek-ili9341.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 39dc40cf681f0..c46b5d820f5a0 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -717,11 +717,11 @@ static int ili9341_probe(struct spi_device *spi) reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(reset)) - dev_err(dev, "Failed to get gpio 'reset'\n"); + return dev_err_probe(dev, PTR_ERR(reset), "Failed to get gpio 'reset'\n"); dc = devm_gpiod_get_optional(dev, "dc", GPIOD_OUT_LOW); if (IS_ERR(dc)) - dev_err(dev, "Failed to get gpio 'dc'\n"); + return dev_err_probe(dev, PTR_ERR(dc), "Failed to get gpio 'dc'\n"); if (!strcmp(id->name, "sf-tc240t-9370-t")) return ili9341_dpi_probe(spi, dc, reset); -- GitLab From 3a1ea8a2656762eeda3ccad4a83719c30ecb9474 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Apr 2024 17:26:19 +0300 Subject: [PATCH 2082/2290] drm/panel: ili9341: Use predefined error codes [ Upstream commit da85f0aaa9f21999753b01d45c0343f885a8f905 ] In one case the -1 is returned which is quite confusing code for the wrong device ID, in another the ret is returning instead of plain 0 that also confusing as readed may ask the possible meaning of positive codes, which are never the case there. Convert both to use explicit predefined error codes to make it clear what's going on there. Fixes: 5a04227326b0 ("drm/panel: Add ilitek ili9341 panel driver") Signed-off-by: Andy Shevchenko Reviewed-by: Neil Armstrong Reviewed-by: Sui Jingfeng Link: https://lore.kernel.org/r/20240425142706.2440113-4-andriy.shevchenko@linux.intel.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240425142706.2440113-4-andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-ilitek-ili9341.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index c46b5d820f5a0..285e76818d84d 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -420,7 +420,7 @@ static int ili9341_dpi_prepare(struct drm_panel *panel) ili9341_dpi_init(ili); - return ret; + return 0; } static int ili9341_dpi_enable(struct drm_panel *panel) @@ -728,7 +728,7 @@ static int ili9341_probe(struct spi_device *spi) else if (!strcmp(id->name, "yx240qv29")) return ili9341_dbi_probe(spi, dc, reset); - return -1; + return -ENODEV; } static void ili9341_remove(struct spi_device *spi) -- GitLab From 463c15af491b205f26b50e709ad23e4d62616158 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 30 Apr 2024 16:35:55 +0200 Subject: [PATCH 2083/2290] net: gro: add flush check in udp_gro_receive_segment [ Upstream commit 5babae777c61aa8a8679d59d3cdc54165ad96d42 ] GRO-GSO path is supposed to be transparent and as such L3 flush checks are relevant to all UDP flows merging in GRO. This patch uses the same logic and code from tcp_gro_receive, terminating merge if flush is non zero. Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Signed-off-by: Richard Gobert Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/udp_offload.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 84b7d6089f76c..794ea24292f62 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -463,6 +463,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *p; unsigned int ulen; int ret = 0; + int flush; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -496,13 +497,22 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return p; } + flush = NAPI_GRO_CB(p)->flush; + + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (ulen > ntohs(uh2->len)) { + if (ulen > ntohs(uh2->len) || flush) { pp = p; } else { if (NAPI_GRO_CB(skb)->is_flist) { -- GitLab From 70f64cb29014e4c4f1fabd3265feebd80590d069 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Fri, 13 Oct 2023 20:17:12 +0200 Subject: [PATCH 2084/2290] clk: sunxi-ng: h6: Reparent CPUX during PLL CPUX rate change [ Upstream commit 7e91ed763dc07437777bd012af7a2bd4493731ff ] While PLL CPUX clock rate change when CPU is running from it works in vast majority of cases, now and then it causes instability. This leads to system crashes and other undefined behaviour. After a lot of testing (30+ hours) while also doing a lot of frequency switches, we can't observe any instability issues anymore when doing reparenting to stable clock like 24 MHz oscillator. Fixes: 524353ea480b ("clk: sunxi-ng: add support for the Allwinner H6 CCU") Reported-by: Chad Wagner Link: https://forum.libreelec.tv/thread/27295-orange-pi-3-lts-freezes/ Tested-by: Chad Wagner Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20231013181712.2128037-1-jernej.skrabec@gmail.com Signed-off-by: Jernej Skrabec Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index 42568c6161814..892df807275c8 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c @@ -1181,11 +1181,18 @@ static const u32 usb2_clk_regs[] = { SUN50I_H6_USB3_CLK_REG, }; +static struct ccu_mux_nb sun50i_h6_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, + .bypass_index = 0, /* index of 24 MHz oscillator */ +}; + static int sun50i_h6_ccu_probe(struct platform_device *pdev) { void __iomem *reg; + int i, ret; u32 val; - int i; reg = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(reg)) @@ -1252,7 +1259,15 @@ static int sun50i_h6_ccu_probe(struct platform_device *pdev) val |= BIT(24); writel(val, reg + SUN50I_H6_HDMI_CEC_CLK_REG); - return devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h6_ccu_desc); + ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h6_ccu_desc); + if (ret) + return ret; + + /* Reparent CPU during PLL CPUX rate changes */ + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun50i_h6_cpu_nb); + + return 0; } static const struct of_device_id sun50i_h6_ccu_ids[] = { -- GitLab From 5aa59e14ec738f74b0a66c24a9772ccc27055015 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:38 -0500 Subject: [PATCH 2085/2290] powerpc/pseries: replace kmalloc with kzalloc in PLPKS driver [ Upstream commit 212dd5cfbee7815f3c665a51c501701edb881599 ] Replace kmalloc with kzalloc in construct_auth() function to default initialize structure with zeroes. Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-6-nayna@linux.ibm.com Stable-dep-of: 784354349d2c ("powerpc/pseries: make max polling consistent for longer H_CALLs") Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/plpks.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 63a1e1fe01851..bb6f5437d83ab 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -162,19 +162,15 @@ static struct plpks_auth *construct_auth(u8 consumer) if (consumer > PKS_OS_OWNER) return ERR_PTR(-EINVAL); - auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); + auth = kzalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); if (!auth) return ERR_PTR(-ENOMEM); auth->version = 1; auth->consumer = consumer; - auth->rsvd0 = 0; - auth->rsvd1 = 0; - if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) { - auth->passwordlength = 0; + if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) return auth; - } memcpy(auth->password, ospassword, ospasswordlength); -- GitLab From 0b59ae6b5f599773b2e52960d2841b298d0530bc Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:50 +1100 Subject: [PATCH 2086/2290] powerpc/pseries: Move PLPKS constants to header file [ Upstream commit 3def7a3e7c2ce2ab5e5c54561da7125206851be4 ] Move the constants defined in plpks.c to plpks.h, and standardise their naming, so that PLPKS consumers can make use of them later on. Signed-off-by: Russell Currey Co-developed-by: Andrew Donnellan Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-16-ajd@linux.ibm.com Stable-dep-of: 784354349d2c ("powerpc/pseries: make max polling consistent for longer H_CALLs") Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/plpks.c | 57 ++++++++++---------------- arch/powerpc/platforms/pseries/plpks.h | 36 +++++++++++++--- 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index bb6f5437d83ab..06b52fe12c88b 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -21,19 +21,6 @@ #include "plpks.h" -#define PKS_FW_OWNER 0x1 -#define PKS_BOOTLOADER_OWNER 0x2 -#define PKS_OS_OWNER 0x3 - -#define LABEL_VERSION 0 -#define MAX_LABEL_ATTR_SIZE 16 -#define MAX_NAME_SIZE 239 -#define MAX_DATA_SIZE 4000 - -#define PKS_FLUSH_MAX_TIMEOUT 5000 //msec -#define PKS_FLUSH_SLEEP 10 //msec -#define PKS_FLUSH_SLEEP_RANGE 400 - static u8 *ospassword; static u16 ospasswordlength; @@ -60,7 +47,7 @@ struct label_attr { struct label { struct label_attr attr; - u8 name[MAX_NAME_SIZE]; + u8 name[PLPKS_MAX_NAME_SIZE]; size_t size; }; @@ -123,7 +110,7 @@ static int pseries_status_to_err(int rc) static int plpks_gen_password(void) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; - u8 *password, consumer = PKS_OS_OWNER; + u8 *password, consumer = PLPKS_OS_OWNER; int rc; password = kzalloc(maxpwsize, GFP_KERNEL); @@ -159,7 +146,7 @@ static struct plpks_auth *construct_auth(u8 consumer) { struct plpks_auth *auth; - if (consumer > PKS_OS_OWNER) + if (consumer > PLPKS_OS_OWNER) return ERR_PTR(-EINVAL); auth = kzalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); @@ -169,7 +156,7 @@ static struct plpks_auth *construct_auth(u8 consumer) auth->version = 1; auth->consumer = consumer; - if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) + if (consumer == PLPKS_FW_OWNER || consumer == PLPKS_BOOTLOADER_OWNER) return auth; memcpy(auth->password, ospassword, ospasswordlength); @@ -189,7 +176,7 @@ static struct label *construct_label(char *component, u8 varos, u8 *name, struct label *label; size_t slen; - if (!name || namelen > MAX_NAME_SIZE) + if (!name || namelen > PLPKS_MAX_NAME_SIZE) return ERR_PTR(-EINVAL); slen = strlen(component); @@ -203,9 +190,9 @@ static struct label *construct_label(char *component, u8 varos, u8 *name, if (component) memcpy(&label->attr.prefix, component, slen); - label->attr.version = LABEL_VERSION; + label->attr.version = PLPKS_LABEL_VERSION; label->attr.os = varos; - label->attr.length = MAX_LABEL_ATTR_SIZE; + label->attr.length = PLPKS_MAX_LABEL_ATTR_SIZE; memcpy(&label->name, name, namelen); label->size = sizeof(struct label_attr) + namelen; @@ -267,10 +254,10 @@ static int plpks_confirm_object_flushed(struct label *label, if (!rc && status == 1) break; - usleep_range(PKS_FLUSH_SLEEP, - PKS_FLUSH_SLEEP + PKS_FLUSH_SLEEP_RANGE); - timeout = timeout + PKS_FLUSH_SLEEP; - } while (timeout < PKS_FLUSH_MAX_TIMEOUT); + usleep_range(PLPKS_FLUSH_SLEEP, + PLPKS_FLUSH_SLEEP + PLPKS_FLUSH_SLEEP_RANGE); + timeout = timeout + PLPKS_FLUSH_SLEEP; + } while (timeout < PLPKS_MAX_TIMEOUT); rc = pseries_status_to_err(rc); @@ -285,13 +272,13 @@ int plpks_write_var(struct plpks_var var) int rc; if (!var.component || !var.data || var.datalen <= 0 || - var.namelen > MAX_NAME_SIZE || var.datalen > MAX_DATA_SIZE) + var.namelen > PLPKS_MAX_NAME_SIZE || var.datalen > PLPKS_MAX_DATA_SIZE) return -EINVAL; - if (var.policy & SIGNEDUPDATE) + if (var.policy & PLPKS_SIGNEDUPDATE) return -EINVAL; - auth = construct_auth(PKS_OS_OWNER); + auth = construct_auth(PLPKS_OS_OWNER); if (IS_ERR(auth)) return PTR_ERR(auth); @@ -327,10 +314,10 @@ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) struct label *label; int rc; - if (!component || vname.namelen > MAX_NAME_SIZE) + if (!component || vname.namelen > PLPKS_MAX_NAME_SIZE) return -EINVAL; - auth = construct_auth(PKS_OS_OWNER); + auth = construct_auth(PLPKS_OS_OWNER); if (IS_ERR(auth)) return PTR_ERR(auth); @@ -366,14 +353,14 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) u8 *output; int rc; - if (var->namelen > MAX_NAME_SIZE) + if (var->namelen > PLPKS_MAX_NAME_SIZE) return -EINVAL; auth = construct_auth(consumer); if (IS_ERR(auth)) return PTR_ERR(auth); - if (consumer == PKS_OS_OWNER) { + if (consumer == PLPKS_OS_OWNER) { label = construct_label(var->component, var->os, var->name, var->namelen); if (IS_ERR(label)) { @@ -388,7 +375,7 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) goto out_free_label; } - if (consumer == PKS_OS_OWNER) + if (consumer == PLPKS_OS_OWNER) rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), virt_to_phys(label), label->size, virt_to_phys(output), maxobjsize); @@ -430,17 +417,17 @@ out_free_auth: int plpks_read_os_var(struct plpks_var *var) { - return plpks_read_var(PKS_OS_OWNER, var); + return plpks_read_var(PLPKS_OS_OWNER, var); } int plpks_read_fw_var(struct plpks_var *var) { - return plpks_read_var(PKS_FW_OWNER, var); + return plpks_read_var(PLPKS_FW_OWNER, var); } int plpks_read_bootloader_var(struct plpks_var *var) { - return plpks_read_var(PKS_BOOTLOADER_OWNER, var); + return plpks_read_var(PLPKS_BOOTLOADER_OWNER, var); } static __init int pseries_plpks_init(void) diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h index 275ccd86bfb5e..6afb44ee74a16 100644 --- a/arch/powerpc/platforms/pseries/plpks.h +++ b/arch/powerpc/platforms/pseries/plpks.h @@ -12,14 +12,40 @@ #include #include -#define OSSECBOOTAUDIT 0x40000000 -#define OSSECBOOTENFORCE 0x20000000 -#define WORLDREADABLE 0x08000000 -#define SIGNEDUPDATE 0x01000000 +// Object policy flags from supported_policies +#define PLPKS_OSSECBOOTAUDIT PPC_BIT32(1) // OS secure boot must be audit/enforce +#define PLPKS_OSSECBOOTENFORCE PPC_BIT32(2) // OS secure boot must be enforce +#define PLPKS_PWSET PPC_BIT32(3) // No access without password set +#define PLPKS_WORLDREADABLE PPC_BIT32(4) // Readable without authentication +#define PLPKS_IMMUTABLE PPC_BIT32(5) // Once written, object cannot be removed +#define PLPKS_TRANSIENT PPC_BIT32(6) // Object does not persist through reboot +#define PLPKS_SIGNEDUPDATE PPC_BIT32(7) // Object can only be modified by signed updates +#define PLPKS_HVPROVISIONED PPC_BIT32(28) // Hypervisor has provisioned this object -#define PLPKS_VAR_LINUX 0x02 +// Signature algorithm flags from signed_update_algorithms +#define PLPKS_ALG_RSA2048 PPC_BIT(0) +#define PLPKS_ALG_RSA4096 PPC_BIT(1) + +// Object label OS metadata flags +#define PLPKS_VAR_LINUX 0x02 #define PLPKS_VAR_COMMON 0x04 +// Flags for which consumer owns an object is owned by +#define PLPKS_FW_OWNER 0x1 +#define PLPKS_BOOTLOADER_OWNER 0x2 +#define PLPKS_OS_OWNER 0x3 + +// Flags for label metadata fields +#define PLPKS_LABEL_VERSION 0 +#define PLPKS_MAX_LABEL_ATTR_SIZE 16 +#define PLPKS_MAX_NAME_SIZE 239 +#define PLPKS_MAX_DATA_SIZE 4000 + +// Timeouts for PLPKS operations +#define PLPKS_MAX_TIMEOUT 5000 // msec +#define PLPKS_FLUSH_SLEEP 10 // msec +#define PLPKS_FLUSH_SLEEP_RANGE 400 + struct plpks_var { char *component; u8 *name; -- GitLab From 15e1f8425f1a7c2db18ed07959c38c8ef28e09d1 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 17 Apr 2024 23:12:30 -0400 Subject: [PATCH 2087/2290] powerpc/pseries: make max polling consistent for longer H_CALLs [ Upstream commit 784354349d2c988590c63a5a001ca37b2a6d4da1 ] Currently, plpks_confirm_object_flushed() function polls for 5msec in total instead of 5sec. Keep max polling time consistent for all the H_CALLs, which take longer than expected, to be 5sec. Also, make use of fsleep() everywhere to insert delay. Reported-by: Nageswara R Sastry Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Tested-by: Nageswara R Sastry Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240418031230.170954-1-nayna@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/plpks.c | 3 +-- arch/powerpc/platforms/pseries/plpks.h | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 06b52fe12c88b..25f95440a773b 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -254,8 +254,7 @@ static int plpks_confirm_object_flushed(struct label *label, if (!rc && status == 1) break; - usleep_range(PLPKS_FLUSH_SLEEP, - PLPKS_FLUSH_SLEEP + PLPKS_FLUSH_SLEEP_RANGE); + fsleep(PLPKS_FLUSH_SLEEP); timeout = timeout + PLPKS_FLUSH_SLEEP; } while (timeout < PLPKS_MAX_TIMEOUT); diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h index 6afb44ee74a16..07278a990c2df 100644 --- a/arch/powerpc/platforms/pseries/plpks.h +++ b/arch/powerpc/platforms/pseries/plpks.h @@ -42,9 +42,8 @@ #define PLPKS_MAX_DATA_SIZE 4000 // Timeouts for PLPKS operations -#define PLPKS_MAX_TIMEOUT 5000 // msec -#define PLPKS_FLUSH_SLEEP 10 // msec -#define PLPKS_FLUSH_SLEEP_RANGE 400 +#define PLPKS_MAX_TIMEOUT (5 * USEC_PER_SEC) +#define PLPKS_FLUSH_SLEEP 10000 // usec struct plpks_var { char *component; -- GitLab From 7fb5793c53f8c024e3eae9f0d44eb659aed833c4 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Mon, 22 Apr 2024 15:51:41 -0500 Subject: [PATCH 2088/2290] powerpc/pseries/iommu: LPAR panics during boot up with a frozen PE [ Upstream commit 49a940dbdc3107fecd5e6d3063dc07128177e058 ] At the time of LPAR boot up, partition firmware provides Open Firmware property ibm,dma-window for the PE. This property is provided on the PCI bus the PE is attached to. There are execptions where the partition firmware might not provide this property for the PE at the time of LPAR boot up. One of the scenario is where the firmware has frozen the PE due to some error condition. This PE is frozen for 24 hours or unless the whole system is reinitialized. Within this time frame, if the LPAR is booted, the frozen PE will be presented to the LPAR but ibm,dma-window property could be missing. Today, under these circumstances, the LPAR oopses with NULL pointer dereference, when configuring the PCI bus the PE is attached to. BUG: Kernel NULL pointer dereference on read at 0x000000c8 Faulting instruction address: 0xc0000000001024c0 Oops: Kernel access of bad area, sig: 7 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: Supported: Yes CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.4.0-150600.9-default #1 Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NM1060_023) hv:phyp pSeries NIP: c0000000001024c0 LR: c0000000001024b0 CTR: c000000000102450 REGS: c0000000037db5c0 TRAP: 0300 Not tainted (6.4.0-150600.9-default) MSR: 8000000002009033 CR: 28000822 XER: 00000000 CFAR: c00000000010254c DAR: 00000000000000c8 DSISR: 00080000 IRQMASK: 0 ... NIP [c0000000001024c0] pci_dma_bus_setup_pSeriesLP+0x70/0x2a0 LR [c0000000001024b0] pci_dma_bus_setup_pSeriesLP+0x60/0x2a0 Call Trace: pci_dma_bus_setup_pSeriesLP+0x60/0x2a0 (unreliable) pcibios_setup_bus_self+0x1c0/0x370 __of_scan_bus+0x2f8/0x330 pcibios_scan_phb+0x280/0x3d0 pcibios_init+0x88/0x12c do_one_initcall+0x60/0x320 kernel_init_freeable+0x344/0x3e4 kernel_init+0x34/0x1d0 ret_from_kernel_user_thread+0x14/0x1c Fixes: b1fc44eaa9ba ("pseries/iommu/ddw: Fix kdump to work in absence of ibm,dma-window") Signed-off-by: Gaurav Batra Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422205141.10662-1-gbatra@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/iommu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 1e5f083cdb720..5e00a3cde93b9 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -781,8 +781,16 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) * parent bus. During reboot, there will be ibm,dma-window property to * define DMA window. For kdump, there will at least be default window or DDW * or both. + * There is an exception to the above. In case the PE goes into frozen + * state, firmware may not provide ibm,dma-window property at the time + * of LPAR boot up. */ + if (!pdn) { + pr_debug(" no ibm,dma-window property !\n"); + return; + } + ppci = PCI_DN(pdn); pr_debug(" parent is %pOF, iommu_table: 0x%p\n", -- GitLab From a2184f533fadf382ed3fbf4cea3c4e0b9e64a51f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Sep 2023 10:09:04 +0100 Subject: [PATCH 2089/2290] KVM: arm64: vgic-v2: Use cpuid from userspace as vcpu_id [ Upstream commit 4e7728c81a54b17bd33be402ac140bc11bb0c4f4 ] When parsing a GICv2 attribute that contains a cpuid, handle this as the vcpu_id, not a vcpu_idx, as userspace cannot really know the mapping between the two. For this, use kvm_get_vcpu_by_id() instead of kvm_get_vcpu(). Take this opportunity to get rid of the pointless check against online_vcpus, which doesn't make much sense either, and switch to FIELD_GET as a way to extract the vcpu_id. Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230927090911.3355209-5-maz@kernel.org Signed-off-by: Oliver Upton Stable-dep-of: 6ddb4f372fc6 ("KVM: arm64: vgic-v2: Check for non-NULL vCPU in vgic_v2_parse_attr()") Signed-off-by: Sasha Levin --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index bf4b3d9631ce1..97ead28f81425 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -339,13 +339,9 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, { int cpuid; - cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> - KVM_DEV_ARM_VGIC_CPUID_SHIFT; + cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr); - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) - return -EINVAL; - - reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); + reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid); reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; return 0; -- GitLab From 3a5b0378ac6776c7c31b18e0f3c1389bd6005e80 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 24 Apr 2024 17:39:58 +0000 Subject: [PATCH 2090/2290] KVM: arm64: vgic-v2: Check for non-NULL vCPU in vgic_v2_parse_attr() [ Upstream commit 6ddb4f372fc63210034b903d96ebbeb3c7195adb ] vgic_v2_parse_attr() is responsible for finding the vCPU that matches the user-provided CPUID, which (of course) may not be valid. If the ID is invalid, kvm_get_vcpu_by_id() returns NULL, which isn't handled gracefully. Similar to the GICv3 uaccess flow, check that kvm_get_vcpu_by_id() actually returns something and fail the ioctl if not. Cc: stable@vger.kernel.org Fixes: 7d450e282171 ("KVM: arm/arm64: vgic-new: Add userland access to VGIC dist registers") Reported-by: Alexander Potapenko Tested-by: Alexander Potapenko Reviewed-by: Alexander Potapenko Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240424173959.3776798-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Sasha Levin --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 97ead28f81425..63731fb3d8f63 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -337,12 +337,12 @@ int kvm_register_vgic_device(unsigned long type) int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, struct vgic_reg_attr *reg_attr) { - int cpuid; + int cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr); - cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr); - - reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid); reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid); + if (!reg_attr->vcpu) + return -EINVAL; return 0; } -- GitLab From 0936809d968ecf81e0726fbd02ff2a5732d960c3 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 5 Mar 2024 12:04:53 -0800 Subject: [PATCH 2091/2290] scsi: lpfc: Move NPIV's transport unregistration to after resource clean up [ Upstream commit 4ddf01f2f1504fa08b766e8cfeec558e9f8eef6c ] There are cases after NPIV deletion where the fabric switch still believes the NPIV is logged into the fabric. This occurs when a vport is unregistered before the Remove All DA_ID CT and LOGO ELS are sent to the fabric. Currently fc_remove_host(), which calls dev_loss_tmo for all D_IDs including the fabric D_ID, removes the last ndlp reference and frees the ndlp rport object. This sometimes causes the race condition where the final DA_ID and LOGO are skipped from being sent to the fabric switch. Fix by moving the fc_remove_host() and scsi_remove_host() calls after DA_ID and LOGO are sent. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240305200503.57317-3-justintee8345@gmail.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_vport.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 4d171f5c213f7..6b4259894584f 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -693,10 +693,6 @@ lpfc_vport_delete(struct fc_vport *fc_vport) lpfc_free_sysfs_attr(vport); lpfc_debugfs_terminate(vport); - /* Remove FC host to break driver binding. */ - fc_remove_host(shost); - scsi_remove_host(shost); - /* Send the DA_ID and Fabric LOGO to cleanup Nameserver entries. */ ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) @@ -740,6 +736,10 @@ lpfc_vport_delete(struct fc_vport *fc_vport) skip_logo: + /* Remove FC host to break driver binding. */ + fc_remove_host(shost); + scsi_remove_host(shost); + lpfc_cleanup(vport); /* Remove scsi host now. The nodes are cleaned up. */ -- GitLab From 645b6a5e02d1f26436626e431ed760a10768b927 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 5 Mar 2024 12:04:55 -0800 Subject: [PATCH 2092/2290] scsi: lpfc: Update lpfc_ramp_down_queue_handler() logic [ Upstream commit bb011631435c705cdeddca68d5c85fd40a4320f9 ] Typically when an out of resource CQE status is detected, the lpfc_ramp_down_queue_handler() logic is called to help reduce I/O load by reducing an sdev's queue_depth. However, the current lpfc_rampdown_queue_depth() logic does not help reduce queue_depth. num_cmd_success is never updated and is always zero, which means new_queue_depth will always be set to sdev->queue_depth. So, new_queue_depth = sdev->queue_depth - new_queue_depth always sets new_queue_depth to zero. And, scsi_change_queue_depth(sdev, 0) is essentially a no-op. Change the lpfc_ramp_down_queue_handler() logic to set new_queue_depth equal to sdev->queue_depth subtracted from number of times num_rsrc_err was incremented. If num_rsrc_err is >= sdev->queue_depth, then set new_queue_depth equal to 1. Eventually, the frequency of Good_Status frames will signal SCSI upper layer to auto increase the queue_depth back to the driver default of 64 via scsi_handle_queue_ramp_up(). Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240305200503.57317-5-justintee8345@gmail.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc.h | 1 - drivers/scsi/lpfc/lpfc_scsi.c | 13 ++++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index dc5ac3cc70f6d..6f08fbe103cb9 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1355,7 +1355,6 @@ struct lpfc_hba { struct timer_list fabric_block_timer; unsigned long bit_flags; atomic_t num_rsrc_err; - atomic_t num_cmd_success; unsigned long last_rsrc_error_time; unsigned long last_ramp_down_time; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 0bb7e164b525f..2a81a42de5c14 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -167,11 +167,10 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) struct Scsi_Host *shost; struct scsi_device *sdev; unsigned long new_queue_depth; - unsigned long num_rsrc_err, num_cmd_success; + unsigned long num_rsrc_err; int i; num_rsrc_err = atomic_read(&phba->num_rsrc_err); - num_cmd_success = atomic_read(&phba->num_cmd_success); /* * The error and success command counters are global per @@ -186,20 +185,16 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { shost = lpfc_shost_from_vport(vports[i]); shost_for_each_device(sdev, shost) { - new_queue_depth = - sdev->queue_depth * num_rsrc_err / - (num_rsrc_err + num_cmd_success); - if (!new_queue_depth) - new_queue_depth = sdev->queue_depth - 1; + if (num_rsrc_err >= sdev->queue_depth) + new_queue_depth = 1; else new_queue_depth = sdev->queue_depth - - new_queue_depth; + num_rsrc_err; scsi_change_queue_depth(sdev, new_queue_depth); } } lpfc_destroy_vport_work_array(phba, vports); atomic_set(&phba->num_rsrc_err, 0); - atomic_set(&phba->num_cmd_success, 0); } /** -- GitLab From e25dca8db042cf85469694372748516da5c0c2ad Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 5 Mar 2024 12:04:56 -0800 Subject: [PATCH 2093/2290] scsi: lpfc: Replace hbalock with ndlp lock in lpfc_nvme_unregister_port() [ Upstream commit d11272be497e48a8e8f980470eb6b70e92eed0ce ] The ndlp object update in lpfc_nvme_unregister_port() should be protected by the ndlp lock rather than hbalock. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240305200503.57317-6-justintee8345@gmail.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nvme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 152245f7cacc7..7e9e0d969256a 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2621,9 +2621,9 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) /* No concern about the role change on the nvme remoteport. * The transport will update it. */ - spin_lock_irq(&vport->phba->hbalock); + spin_lock_irq(&ndlp->lock); ndlp->fc4_xpt_flags |= NVME_XPT_UNREG_WAIT; - spin_unlock_irq(&vport->phba->hbalock); + spin_unlock_irq(&ndlp->lock); /* Don't let the host nvme transport keep sending keep-alives * on this remoteport. Vport is unloading, no recovery. The -- GitLab From 6503c39398506cadda9f4c81695a9655ca5fb4fd Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Tue, 5 Mar 2024 12:04:57 -0800 Subject: [PATCH 2094/2290] scsi: lpfc: Release hbalock before calling lpfc_worker_wake_up() [ Upstream commit ded20192dff31c91cef2a04f7e20e60e9bb887d3 ] lpfc_worker_wake_up() calls the lpfc_work_done() routine, which takes the hbalock. Thus, lpfc_worker_wake_up() should not be called while holding the hbalock to avoid potential deadlock. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240305200503.57317-7-justintee8345@gmail.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_els.c | 20 ++++++++++---------- drivers/scsi/lpfc/lpfc_hbadisc.c | 5 ++--- drivers/scsi/lpfc/lpfc_sli.c | 14 +++++++------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 6b5ce9869e6b4..05764008f6e70 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4384,23 +4384,23 @@ lpfc_els_retry_delay(struct timer_list *t) unsigned long flags; struct lpfc_work_evt *evtp = &ndlp->els_retry_evt; + /* Hold a node reference for outstanding queued work */ + if (!lpfc_nlp_get(ndlp)) + return; + spin_lock_irqsave(&phba->hbalock, flags); if (!list_empty(&evtp->evt_listp)) { spin_unlock_irqrestore(&phba->hbalock, flags); + lpfc_nlp_put(ndlp); return; } - /* We need to hold the node by incrementing the reference - * count until the queued work is done - */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); - if (evtp->evt_arg1) { - evtp->evt = LPFC_EVT_ELS_RETRY; - list_add_tail(&evtp->evt_listp, &phba->work_list); - lpfc_worker_wake_up(phba); - } + evtp->evt_arg1 = ndlp; + evtp->evt = LPFC_EVT_ELS_RETRY; + list_add_tail(&evtp->evt_listp, &phba->work_list); spin_unlock_irqrestore(&phba->hbalock, flags); - return; + + lpfc_worker_wake_up(phba); } /** diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 549fa7d6c0f6f..aaa98a006fdcb 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -241,7 +241,9 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) if (evtp->evt_arg1) { evtp->evt = LPFC_EVT_DEV_LOSS; list_add_tail(&evtp->evt_listp, &phba->work_list); + spin_unlock_irqrestore(&phba->hbalock, iflags); lpfc_worker_wake_up(phba); + return; } spin_unlock_irqrestore(&phba->hbalock, iflags); } else { @@ -259,10 +261,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); } - } - - return; } /** diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 427a6ac803e50..47b8102a7063a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1217,9 +1217,9 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, empty = list_empty(&phba->active_rrq_list); list_add_tail(&rrq->list, &phba->active_rrq_list); phba->hba_flag |= HBA_RRQ_ACTIVE; + spin_unlock_irqrestore(&phba->hbalock, iflags); if (empty) lpfc_worker_wake_up(phba); - spin_unlock_irqrestore(&phba->hbalock, iflags); return 0; out: spin_unlock_irqrestore(&phba->hbalock, iflags); @@ -11361,18 +11361,18 @@ lpfc_sli_post_recovery_event(struct lpfc_hba *phba, unsigned long iflags; struct lpfc_work_evt *evtp = &ndlp->recovery_evt; + /* Hold a node reference for outstanding queued work */ + if (!lpfc_nlp_get(ndlp)) + return; + spin_lock_irqsave(&phba->hbalock, iflags); if (!list_empty(&evtp->evt_listp)) { spin_unlock_irqrestore(&phba->hbalock, iflags); + lpfc_nlp_put(ndlp); return; } - /* Incrementing the reference count until the queued work is done. */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); - if (!evtp->evt_arg1) { - spin_unlock_irqrestore(&phba->hbalock, iflags); - return; - } + evtp->evt_arg1 = ndlp; evtp->evt = LPFC_EVT_RECOVER_PORT; list_add_tail(&evtp->evt_listp, &phba->work_list); spin_unlock_irqrestore(&phba->hbalock, iflags); -- GitLab From 5f1d833429e380b8d77ac64583a09f82898f7105 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Mon, 11 Mar 2024 16:40:36 +0100 Subject: [PATCH 2095/2290] gfs2: Fix invalid metadata access in punch_hole [ Upstream commit c95346ac918c5badf51b9a7ac58a26d3bd5bb224 ] In punch_hole(), when the offset lies in the final block for a given height, there is no hole to punch, but the maximum size check fails to detect that. Consequently, punch_hole() will try to punch a hole beyond the end of the metadata and fail. Fix the maximum size check. Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/bmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e7537fd305dd2..9ad11e5bf14c3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1702,7 +1702,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; - u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; + unsigned int bsize = 1 << bsize_shift; + u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, end_aligned; @@ -1713,7 +1714,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) u64 prev_bnr = 0; __be64 *start, *end; - if (offset >= maxsize) { + if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated meta-data; * there are no blocks do deallocate. -- GitLab From ec9727406eb4d27e7f8fbd4367792b7f4015a2fb Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 14 Mar 2024 14:23:00 -0700 Subject: [PATCH 2096/2290] wifi: mac80211: fix ieee80211_bss_*_flags kernel-doc [ Upstream commit 774f8841f55d7ac4044c79812691649da203584a ] Running kernel-doc on ieee80211_i.h flagged the following: net/mac80211/ieee80211_i.h:145: warning: expecting prototype for enum ieee80211_corrupt_data_flags. Prototype was for enum ieee80211_bss_corrupt_data_flags instead net/mac80211/ieee80211_i.h:162: warning: expecting prototype for enum ieee80211_valid_data_flags. Prototype was for enum ieee80211_bss_valid_data_flags instead Fix these warnings. Signed-off-by: Jeff Johnson Reviewed-by: Simon Horman Link: https://msgid.link/20240314-kdoc-ieee80211_i-v1-1-72b91b55b257@quicinc.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/ieee80211_i.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d5dd2d9e89b48..3e14d5c9aa1b4 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -120,7 +120,7 @@ struct ieee80211_bss { }; /** - * enum ieee80211_corrupt_data_flags - BSS data corruption flags + * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags * @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted * @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted * @@ -133,7 +133,7 @@ enum ieee80211_bss_corrupt_data_flags { }; /** - * enum ieee80211_valid_data_flags - BSS valid data flags + * enum ieee80211_bss_valid_data_flags - BSS valid data flags * @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE * @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE -- GitLab From d6275e1028badf7b7b50fb15755472f88c3a7c10 Mon Sep 17 00:00:00 2001 From: Igor Artemiev Date: Mon, 11 Mar 2024 19:45:19 +0300 Subject: [PATCH 2097/2290] wifi: cfg80211: fix rdev_dump_mpp() arguments order [ Upstream commit ec50f3114e55406a1aad24b7dfaa1c3f4336d8eb ] Fix the order of arguments in the TP_ARGS macro for the rdev_dump_mpp tracepoint event. Found by Linux Verification Center (linuxtesting.org). Signed-off-by: Igor Artemiev Link: https://msgid.link/20240311164519.118398-1-Igor.A.Artemiev@mcst.ru Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index a405c3edbc47e..cb5c3224e038a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1018,7 +1018,7 @@ TRACE_EVENT(rdev_get_mpp, TRACE_EVENT(rdev_dump_mpp, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *dst, u8 *mpp), - TP_ARGS(wiphy, netdev, _idx, mpp, dst), + TP_ARGS(wiphy, netdev, _idx, dst, mpp), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY -- GitLab From 2a1dc2e942fd8575dfa0e70c0074c308e1ea70ff Mon Sep 17 00:00:00 2001 From: linke li Date: Thu, 21 Mar 2024 16:44:10 +0800 Subject: [PATCH 2098/2290] net: mark racy access on sk->sk_rcvbuf [ Upstream commit c2deb2e971f5d9aca941ef13ee05566979e337a4 ] sk->sk_rcvbuf in __sock_queue_rcv_skb() and __sk_receive_skb() can be changed by other threads. Mark this as benign using READ_ONCE(). This patch is aimed at reducing the number of benign races reported by KCSAN in order to focus future debugging effort on harmful races. Signed-off-by: linke li Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 550af616f5359..48199e6e8f161 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -482,7 +482,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { + if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; @@ -552,7 +552,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, skb->dev = NULL; - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { + if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) { atomic_inc(&sk->sk_drops); goto discard_and_relse; } -- GitLab From 5f0266044dc611563539705bff0b3e1545fbb6aa Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 23 Mar 2024 17:41:55 +0900 Subject: [PATCH 2099/2290] scsi: mpi3mr: Avoid memcpy field-spanning write WARNING [ Upstream commit 429846b4b6ce9853e0d803a2357bb2e55083adf0 ] When the "storcli2 show" command is executed for eHBA-9600, mpi3mr driver prints this WARNING message: memcpy: detected field-spanning write (size 128) of single field "bsg_reply_buf->reply_buf" at drivers/scsi/mpi3mr/mpi3mr_app.c:1658 (size 1) WARNING: CPU: 0 PID: 12760 at drivers/scsi/mpi3mr/mpi3mr_app.c:1658 mpi3mr_bsg_request+0x6b12/0x7f10 [mpi3mr] The cause of the WARN is 128 bytes memcpy to the 1 byte size array "__u8 replay_buf[1]" in the struct mpi3mr_bsg_in_reply_buf. The array is intended to be a flexible length array, so the WARN is a false positive. To suppress the WARN, remove the constant number '1' from the array declaration and clarify that it has flexible length. Also, adjust the memory allocation size to match the change. Suggested-by: Sathya Prakash Veerichetty Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240323084155.166835-1-shinichiro.kawasaki@wdc.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpi3mr/mpi3mr_app.c | 2 +- include/uapi/scsi/scsi_bsg_mpi3mr.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 8c662d08706f1..42600e5c457a1 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -1344,7 +1344,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply if ((mpirep_offset != 0xFF) && drv_bufs[mpirep_offset].bsg_buf_len) { drv_buf_iter = &drv_bufs[mpirep_offset]; - drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) - 1 + + drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) + mrioc->reply_sz); bsg_reply_buf = kzalloc(drv_buf_iter->kern_buf_len, GFP_KERNEL); diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h index fdc3517f9e199..c48c5d08c0fa0 100644 --- a/include/uapi/scsi/scsi_bsg_mpi3mr.h +++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h @@ -382,7 +382,7 @@ struct mpi3mr_bsg_in_reply_buf { __u8 mpi_reply_type; __u8 rsvd1; __u16 rsvd2; - __u8 reply_buf[1]; + __u8 reply_buf[]; }; /** -- GitLab From 1150606d47d711d5bfdf329a1a96ed7027085936 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Fri, 15 Mar 2024 12:44:27 +0530 Subject: [PATCH 2100/2290] scsi: bnx2fc: Remove spin_lock_bh while releasing resources after upload [ Upstream commit c214ed2a4dda35b308b0b28eed804d7ae66401f9 ] The session resources are used by FW and driver when session is offloaded, once session is uploaded these resources are not used. The lock is not required as these fields won't be used any longer. The offload and upload calls are sequential, hence lock is not required. This will suppress following BUG_ON(): [ 449.843143] ------------[ cut here ]------------ [ 449.848302] kernel BUG at mm/vmalloc.c:2727! [ 449.853072] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 449.858712] CPU: 5 PID: 1996 Comm: kworker/u24:2 Not tainted 5.14.0-118.el9.x86_64 #1 Rebooting. [ 449.867454] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.3.4 11/08/2016 [ 449.876966] Workqueue: fc_rport_eq fc_rport_work [libfc] [ 449.882910] RIP: 0010:vunmap+0x2e/0x30 [ 449.887098] Code: 00 65 8b 05 14 a2 f0 4a a9 00 ff ff 00 75 1b 55 48 89 fd e8 34 36 79 00 48 85 ed 74 0b 48 89 ef 31 f6 5d e9 14 fc ff ff 5d c3 <0f> 0b 0f 1f 44 00 00 41 57 41 56 49 89 ce 41 55 49 89 fd 41 54 41 [ 449.908054] RSP: 0018:ffffb83d878b3d68 EFLAGS: 00010206 [ 449.913887] RAX: 0000000080000201 RBX: ffff8f4355133550 RCX: 000000000d400005 [ 449.921843] RDX: 0000000000000001 RSI: 0000000000001000 RDI: ffffb83da53f5000 [ 449.929808] RBP: ffff8f4ac6675800 R08: ffffb83d878b3d30 R09: 00000000000efbdf [ 449.937774] R10: 0000000000000003 R11: ffff8f434573e000 R12: 0000000000001000 [ 449.945736] R13: 0000000000001000 R14: ffffb83da53f5000 R15: ffff8f43d4ea3ae0 [ 449.953701] FS: 0000000000000000(0000) GS:ffff8f529fc80000(0000) knlGS:0000000000000000 [ 449.962732] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 449.969138] CR2: 00007f8cf993e150 CR3: 0000000efbe10003 CR4: 00000000003706e0 [ 449.977102] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 449.985065] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 449.993028] Call Trace: [ 449.995756] __iommu_dma_free+0x96/0x100 [ 450.000139] bnx2fc_free_session_resc+0x67/0x240 [bnx2fc] [ 450.006171] bnx2fc_upload_session+0xce/0x100 [bnx2fc] [ 450.011910] bnx2fc_rport_event_handler+0x9f/0x240 [bnx2fc] [ 450.018136] fc_rport_work+0x103/0x5b0 [libfc] [ 450.023103] process_one_work+0x1e8/0x3c0 [ 450.027581] worker_thread+0x50/0x3b0 [ 450.031669] ? rescuer_thread+0x370/0x370 [ 450.036143] kthread+0x149/0x170 [ 450.039744] ? set_kthread_struct+0x40/0x40 [ 450.044411] ret_from_fork+0x22/0x30 [ 450.048404] Modules linked in: vfat msdos fat xfs nfs_layout_nfsv41_files rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver dm_service_time qedf qed crc8 bnx2fc libfcoe libfc scsi_transport_fc intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp dcdbas rapl intel_cstate intel_uncore mei_me pcspkr mei ipmi_ssif lpc_ich ipmi_si fuse zram ext4 mbcache jbd2 loop nfsv3 nfs_acl nfs lockd grace fscache netfs irdma ice sd_mod t10_pi sg ib_uverbs ib_core 8021q garp mrp stp llc mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt mxm_wmi fb_sys_fops cec crct10dif_pclmul ahci crc32_pclmul bnx2x drm ghash_clmulni_intel libahci rfkill i40e libata megaraid_sas mdio wmi sunrpc lrw dm_crypt dm_round_robin dm_multipath dm_snapshot dm_bufio dm_mirror dm_region_hash dm_log dm_zero dm_mod linear raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid6_pq libcrc32c crc32c_intel raid1 raid0 iscsi_ibft squashfs be2iscsi bnx2i cnic uio cxgb4i cxgb4 tls [ 450.048497] libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi edd ipmi_devintf ipmi_msghandler [ 450.159753] ---[ end trace 712de2c57c64abc8 ]--- Reported-by: Guangwu Zhang Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240315071427.31842-1-skashyap@marvell.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/bnx2fc/bnx2fc_tgt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index 2c246e80c1c4d..d91659811eb3c 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -833,7 +833,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba, BNX2FC_TGT_DBG(tgt, "Freeing up session resources\n"); - spin_lock_bh(&tgt->cq_lock); ctx_base_ptr = tgt->ctx_base; tgt->ctx_base = NULL; @@ -889,7 +888,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba, tgt->sq, tgt->sq_dma); tgt->sq = NULL; } - spin_unlock_bh(&tgt->cq_lock); if (ctx_base_ptr) iounmap(ctx_base_ptr); -- GitLab From 681fb3c25d46267ec041659d4b0f56925067507f Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 19 Mar 2024 08:28:18 +0530 Subject: [PATCH 2101/2290] btrfs: return accurate error code on open failure in open_fs_devices() [ Upstream commit 2f1aeab9fca1a5f583be1add175d1ee95c213cfa ] When attempting to exclusive open a device which has no exclusive open permission, such as a physical device associated with the flakey dm device, the open operation will fail, resulting in a mount failure. In this particular scenario, we erroneously return -EINVAL instead of the correct error code provided by the bdev_open_by_path() function, which is -EBUSY. Fix this, by returning error code from the bdev_open_by_path() function. With this correction, the mount error message will align with that of ext4 and xfs. Reviewed-by: Boris Burkov Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ab5d410d560e7..a92069fbc0287 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1233,25 +1233,32 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device; struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; + int ret = 0; flags |= FMODE_EXCL; list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, dev_list) { - int ret; + int ret2; - ret = btrfs_open_one_device(fs_devices, device, flags, holder); - if (ret == 0 && + ret2 = btrfs_open_one_device(fs_devices, device, flags, holder); + if (ret2 == 0 && (!latest_dev || device->generation > latest_dev->generation)) { latest_dev = device; - } else if (ret == -ENODATA) { + } else if (ret2 == -ENODATA) { fs_devices->num_devices--; list_del(&device->dev_list); btrfs_free_device(device); } + if (ret == 0 && ret2 != 0) + ret = ret2; } - if (fs_devices->open_devices == 0) + + if (fs_devices->open_devices == 0) { + if (ret) + return ret; return -EINVAL; + } fs_devices->opened = 1; fs_devices->latest_dev = latest_dev; -- GitLab From fa6995eeb62e74b5a1480c73fb7b420c270784d3 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Tue, 26 Mar 2024 22:42:44 -0400 Subject: [PATCH 2102/2290] bpf: Check bloom filter map value size [ Upstream commit a8d89feba7e54e691ca7c4efc2a6264fa83f3687 ] This patch adds a missing check to bloom filter creating, rejecting values above KMALLOC_MAX_SIZE. This brings the bloom map in line with many other map types. The lack of this protection can cause kernel crashes for value sizes that overflow int's. Such a crash was caught by syzkaller. The next patch adds more guard-rails at a lower level. Signed-off-by: Andrei Matei Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240327024245.318299-2-andreimatei1@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/bloom_filter.c | 13 +++++++++++++ .../selftests/bpf/prog_tests/bloom_filter_map.c | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 48ee750849f25..78e810f49c445 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -88,6 +88,18 @@ static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key return -EOPNOTSUPP; } +/* Called from syscall */ +static int bloom_map_alloc_check(union bpf_attr *attr) +{ + if (attr->value_size > KMALLOC_MAX_SIZE) + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + return -E2BIG; + + return 0; +} + static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; @@ -196,6 +208,7 @@ static int bloom_map_check_btf(const struct bpf_map *map, BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bloom_map_alloc_check, .map_alloc = bloom_map_alloc, .map_free = bloom_map_free, .map_get_next_key = bloom_map_get_next_key, diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index d2d9e965eba59..f79815b7e951b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include +#include #include #include "bloom_filter_map.skel.h" @@ -21,6 +22,11 @@ static void test_fail_cases(void) if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0")) close(fd); + /* Invalid value size: too big */ + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, INT32_MAX, 100, NULL); + if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value too large")) + close(fd); + /* Invalid max entries size */ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL); if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size")) -- GitLab From 7ec2581823346d5ad432165aac444b2caca486fa Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 26 Mar 2024 21:25:48 +0100 Subject: [PATCH 2103/2290] kbuild: Disable KCSAN for autogenerated *.mod.c intermediaries [ Upstream commit 54babdc0343fff2f32dfaafaaa9e42c4db278204 ] When KCSAN and CONSTRUCTORS are enabled, one can trigger the "Unpatched return thunk in use. This should not happen!" catch-all warning. Usually, when objtool runs on the .o objects, it does generate a section .return_sites which contains all offsets in the objects to the return thunks of the functions present there. Those return thunks then get patched at runtime by the alternatives. KCSAN and CONSTRUCTORS add this to the object file's .text.startup section: ------------------- Disassembly of section .text.startup: ... 0000000000000010 <_sub_I_00099_0>: 10: f3 0f 1e fa endbr64 14: e8 00 00 00 00 call 19 <_sub_I_00099_0+0x9> 15: R_X86_64_PLT32 __tsan_init-0x4 19: e9 00 00 00 00 jmp 1e <__UNIQUE_ID___addressable_cryptd_alloc_aead349+0x6> 1a: R_X86_64_PLT32 __x86_return_thunk-0x4 ------------------- which, if it is built as a module goes through the intermediary stage of creating a .mod.c file which, when translated, receives a second constructor: ------------------- Disassembly of section .text.startup: 0000000000000010 <_sub_I_00099_0>: 10: f3 0f 1e fa endbr64 14: e8 00 00 00 00 call 19 <_sub_I_00099_0+0x9> 15: R_X86_64_PLT32 __tsan_init-0x4 19: e9 00 00 00 00 jmp 1e <_sub_I_00099_0+0xe> 1a: R_X86_64_PLT32 __x86_return_thunk-0x4 ... 0000000000000030 <_sub_I_00099_0>: 30: f3 0f 1e fa endbr64 34: e8 00 00 00 00 call 39 <_sub_I_00099_0+0x9> 35: R_X86_64_PLT32 __tsan_init-0x4 39: e9 00 00 00 00 jmp 3e <__ksymtab_cryptd_alloc_ahash+0x2> 3a: R_X86_64_PLT32 __x86_return_thunk-0x4 ------------------- in the .ko file. Objtool has run already so that second constructor's return thunk cannot be added to the .return_sites section and thus the return thunk remains unpatched and the warning rightfully fires. Drop KCSAN flags from the mod.c generation stage as those constructors do not contain data races one would be interested about. Debugged together with David Kaplan and Nikolay Borisov . Reported-by: Paul Menzel Closes: https://lore.kernel.org/r/0851a207-7143-417e-be31-8bf2b3afb57d@molgen.mpg.de Signed-off-by: Borislav Petkov (AMD) Tested-by: Paul Menzel # Dell XPS 13 Reviewed-by: Nikolay Borisov Reviewed-by: Marco Elver Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/Makefile.modfinal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 3af5e5807983a..650d59388336f 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -23,7 +23,7 @@ modname = $(notdir $(@:.mod.o=)) part-of-module = y quiet_cmd_cc_o_c = CC [M] $@ - cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV), $(c_flags)) -c -o $@ $< + cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV) $(CFLAGS_KCSAN), $(c_flags)) -c -o $@ $< %.mod.o: %.mod.c FORCE $(call if_changed_dep,cc_o_c) -- GitLab From c30a4ca93dfcbd1348c6953bf4894f4b7033f313 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Fri, 29 Mar 2024 09:50:36 +0800 Subject: [PATCH 2104/2290] scsi: ufs: core: WLUN suspend dev/link state error recovery [ Upstream commit 6bc5e70b1c792b31b497e48b4668a9a2909aca0d ] When wl suspend error occurs, for example BKOP or SSU timeout, the host triggers an error handler and returns -EBUSY to break the wl suspend process. However, it is possible for the runtime PM to enter wl suspend again before the error handler has finished, and return -EINVAL because the device is in an error state. To address this, ensure that the rumtime PM waits for the error handler to finish, or trigger the error handler in such cases, because returning -EINVAL can cause the I/O to hang. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20240329015036.15707-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f3c25467e571f..948449a13247c 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -9044,7 +9044,10 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) /* UFS device & link must be active before we enter in this function */ if (!ufshcd_is_ufs_dev_active(hba) || !ufshcd_is_link_active(hba)) { - ret = -EINVAL; + /* Wait err handler finish or trigger err recovery */ + if (!ufshcd_eh_in_progress(hba)) + ufshcd_force_error_recovery(hba); + ret = -EBUSY; goto enable_scaling; } -- GitLab From 07e72fe9432ba13e46942f211302a23bd294d6c7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 2 Apr 2024 08:36:25 +0200 Subject: [PATCH 2105/2290] ALSA: line6: Zero-initialize message buffers [ Upstream commit c4e51e424e2c772ce1836912a8b0b87cd61bc9d5 ] For shutting up spurious KMSAN uninit-value warnings, just replace kmalloc() calls with kzalloc() for the buffers used for communications. There should be no real issue with the original code, but it's still better to cover. Reported-by: syzbot+7fb05ccf7b3d2f9617b3@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/00000000000084b18706150bcca5@google.com Message-ID: <20240402063628.26609-1-tiwai@suse.de> Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/line6/driver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index b67617b68e509..f4437015d43a7 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -202,7 +202,7 @@ int line6_send_raw_message_async(struct usb_line6 *line6, const char *buffer, struct urb *urb; /* create message: */ - msg = kmalloc(sizeof(struct message), GFP_ATOMIC); + msg = kzalloc(sizeof(struct message), GFP_ATOMIC); if (msg == NULL) return -ENOMEM; @@ -688,7 +688,7 @@ static int line6_init_cap_control(struct usb_line6 *line6) int ret; /* initialize USB buffers: */ - line6->buffer_listen = kmalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL); + line6->buffer_listen = kzalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL); if (!line6->buffer_listen) return -ENOMEM; @@ -697,7 +697,7 @@ static int line6_init_cap_control(struct usb_line6 *line6) return -ENOMEM; if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) { - line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL); + line6->buffer_message = kzalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL); if (!line6->buffer_message) return -ENOMEM; -- GitLab From 8a26198186e97ee5fc4b42fde82629cff8c75cd6 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 29 Mar 2024 09:23:19 +0800 Subject: [PATCH 2106/2290] block: fix overflow in blk_ioctl_discard() [ Upstream commit 22d24a544b0d49bbcbd61c8c0eaf77d3c9297155 ] There is no check for overflow of 'start + len' in blk_ioctl_discard(). Hung task occurs if submit an discard ioctl with the following param: start = 0x80000000000ff000, len = 0x8000000000fff000; Add the overflow validation now. Signed-off-by: Li Nan Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240329012319.2034550-1-linan666@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index 47567ba1185a6..99b8e2e448729 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -89,7 +89,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, unsigned long arg) { uint64_t range[2]; - uint64_t start, len; + uint64_t start, len, end; struct inode *inode = bdev->bd_inode; int err; @@ -110,7 +110,8 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, if (len & 511) return -EINVAL; - if (start + len > bdev_nr_bytes(bdev)) + if (check_add_overflow(start, len, &end) || + end > bdev_nr_bytes(bdev)) return -EINVAL; filemap_invalidate_lock(inode->i_mapping); -- GitLab From 9c08b9a943393ed095005e83b4ca753b097b2407 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Mon, 1 Apr 2024 13:09:33 +0200 Subject: [PATCH 2107/2290] net: bcmgenet: Reset RBUF on first open [ Upstream commit 0a6380cb4c6b5c1d6dad226ba3130f9090f0ccea ] If the RBUF logic is not reset when the kernel starts then there may be some data left over from any network boot loader. If the 64-byte packet headers are enabled then this can be fatal. Extend bcmgenet_dma_disable to do perform the reset, but not when called from bcmgenet_resume in order to preserve a wake packet. N.B. This different handling of resume is just based on a hunch - why else wouldn't one reset the RBUF as well as the TBUF? If this isn't the case then it's easy to change the patch to make the RBUF reset unconditional. See: https://github.com/raspberrypi/linux/issues/3850 See: https://github.com/raspberrypi/firmware/issues/1882 Signed-off-by: Phil Elwell Signed-off-by: Maarten Vanraes Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c2a9913082153..4b8574acf848e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3298,7 +3298,7 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv, } /* Returns a reusable dma control register value */ -static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) +static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx) { unsigned int i; u32 reg; @@ -3323,6 +3323,14 @@ static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) udelay(10); bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH); + if (flush_rx) { + reg = bcmgenet_rbuf_ctrl_get(priv); + bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0)); + udelay(10); + bcmgenet_rbuf_ctrl_set(priv, reg); + udelay(10); + } + return dma_ctrl; } @@ -3386,8 +3394,8 @@ static int bcmgenet_open(struct net_device *dev) bcmgenet_set_hw_addr(priv, dev->dev_addr); - /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv); + /* Disable RX/TX DMA and flush TX and RX queues */ + dma_ctrl = bcmgenet_dma_disable(priv, true); /* Reinitialize TDMA and RDMA and SW housekeeping */ ret = bcmgenet_init_dma(priv); @@ -4258,7 +4266,7 @@ static int bcmgenet_resume(struct device *d) bcmgenet_hfb_create_rxnfc_filter(priv, rule); /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv); + dma_ctrl = bcmgenet_dma_disable(priv, false); /* Reinitialize TDMA and RDMA and SW housekeeping */ ret = bcmgenet_init_dma(priv); -- GitLab From 337f84a0efe21dccef8b11a73cd19fb332af0ea2 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 3 Apr 2024 04:33:49 +0000 Subject: [PATCH 2108/2290] ata: sata_gemini: Check clk_enable() result [ Upstream commit e85006ae7430aef780cc4f0849692e266a102ec0 ] The call to clk_enable() in gemini_sata_start_bridge() can fail. Add a check to detect such failure. Signed-off-by: Chen Ni Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/sata_gemini.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c index c96fcf9ee3c07..01f050b1bc93b 100644 --- a/drivers/ata/sata_gemini.c +++ b/drivers/ata/sata_gemini.c @@ -201,7 +201,10 @@ int gemini_sata_start_bridge(struct sata_gemini *sg, unsigned int bridge) pclk = sg->sata0_pclk; else pclk = sg->sata1_pclk; - clk_enable(pclk); + ret = clk_enable(pclk); + if (ret) + return ret; + msleep(10); /* Do not keep clocking a bridge that is not online */ -- GitLab From 6fafe3661712b143d9c69a7322294bd53f559d5d Mon Sep 17 00:00:00 2001 From: Adam Goldman Date: Mon, 25 Mar 2024 07:38:41 +0900 Subject: [PATCH 2109/2290] firewire: ohci: mask bus reset interrupts between ISR and bottom half [ Upstream commit 752e3c53de0fa3b7d817a83050b6699b8e9c6ec9 ] In the FireWire OHCI interrupt handler, if a bus reset interrupt has occurred, mask bus reset interrupts until bus_reset_work has serviced and cleared the interrupt. Normally, we always leave bus reset interrupts masked. We infer the bus reset from the self-ID interrupt that happens shortly thereafter. A scenario where we unmask bus reset interrupts was introduced in 2008 in a007bb857e0b26f5d8b73c2ff90782d9c0972620: If OHCI_PARAM_DEBUG_BUSRESETS (8) is set in the debug parameter bitmask, we will unmask bus reset interrupts so we can log them. irq_handler logs the bus reset interrupt. However, we can't clear the bus reset event flag in irq_handler, because we won't service the event until later. irq_handler exits with the event flag still set. If the corresponding interrupt is still unmasked, the first bus reset will usually freeze the system due to irq_handler being called again each time it exits. This freeze can be reproduced by loading firewire_ohci with "modprobe firewire_ohci debug=-1" (to enable all debugging output). Apparently there are also some cases where bus_reset_work will get called soon enough to clear the event, and operation will continue normally. This freeze was first reported a few months after a007bb85 was committed, but until now it was never fixed. The debug level could safely be set to -1 through sysfs after the module was loaded, but this would be ineffectual in logging bus reset interrupts since they were only unmasked during initialization. irq_handler will now leave the event flag set but mask bus reset interrupts, so irq_handler won't be called again and there will be no freeze. If OHCI_PARAM_DEBUG_BUSRESETS is enabled, bus_reset_work will unmask the interrupt after servicing the event, so future interrupts will be caught as desired. As a side effect to this change, OHCI_PARAM_DEBUG_BUSRESETS can now be enabled through sysfs in addition to during initial module loading. However, when enabled through sysfs, logging of bus reset interrupts will be effective only starting with the second bus reset, after bus_reset_work has executed. Signed-off-by: Adam Goldman Signed-off-by: Takashi Sakamoto Signed-off-by: Sasha Levin --- drivers/firewire/ohci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 667ff40f39353..7d94e1cbc0ed3 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2049,6 +2049,8 @@ static void bus_reset_work(struct work_struct *work) ohci->generation = generation; reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset); + if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS) + reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset); if (ohci->quirks & QUIRK_RESET_PACKET) ohci->request_generation = generation; @@ -2115,12 +2117,14 @@ static irqreturn_t irq_handler(int irq, void *data) return IRQ_NONE; /* - * busReset and postedWriteErr must not be cleared yet + * busReset and postedWriteErr events must not be cleared yet * (OHCI 1.1 clauses 7.2.3.2 and 13.2.8.1) */ reg_write(ohci, OHCI1394_IntEventClear, event & ~(OHCI1394_busReset | OHCI1394_postedWriteErr)); log_irqs(ohci, event); + if (event & OHCI1394_busReset) + reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset); if (event & OHCI1394_selfIDComplete) queue_work(selfid_workqueue, &ohci->bus_reset_work); -- GitLab From 768b167281d34471fcdc1ef85920e880d848fd5e Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Mon, 3 Apr 2023 14:11:38 -0700 Subject: [PATCH 2110/2290] tools/power turbostat: Fix added raw MSR output [ Upstream commit e5f4e68eed85fa8495d78cd966eecc2b27bb9e53 ] When using --Summary mode, added MSRs in raw mode always print zeros. Print the actual register contents. Example, with patch: note the added column: --add msr0x64f,u32,package,raw,REASON Where: 0x64F is MSR_CORE_PERF_LIMIT_REASONS Busy% Bzy_MHz PkgTmp PkgWatt CorWatt REASON 0.00 4800 35 1.42 0.76 0x00000000 0.00 4801 34 1.42 0.76 0x00000000 80.08 4531 66 108.17 107.52 0x08000000 98.69 4530 66 133.21 132.54 0x08000000 99.28 4505 66 128.26 127.60 0x0c000400 99.65 4486 68 124.91 124.25 0x0c000400 99.63 4483 68 124.90 124.25 0x0c000400 79.34 4481 41 99.80 99.13 0x0c000000 0.00 4801 41 1.40 0.73 0x0c000000 Where, for the test processor (i5-10600K): PKG Limit #1: 125.000 Watts, 8.000000 sec MSR bit 26 = log; bit 10 = status PKG Limit #2: 136.000 Watts, 0.002441 sec MSR bit 27 = log; bit 11 = status Example, without patch: Busy% Bzy_MHz PkgTmp PkgWatt CorWatt REASON 0.01 4800 35 1.43 0.77 0x00000000 0.00 4801 35 1.39 0.73 0x00000000 83.49 4531 66 112.71 112.06 0x00000000 98.69 4530 68 133.35 132.69 0x00000000 99.31 4500 67 127.96 127.30 0x00000000 99.63 4483 69 124.91 124.25 0x00000000 99.61 4481 69 124.90 124.25 0x00000000 99.61 4481 71 124.92 124.25 0x00000000 59.35 4479 42 75.03 74.37 0x00000000 0.00 4800 42 1.39 0.73 0x00000000 0.00 4801 42 1.42 0.76 0x00000000 c000000 [lenb: simplified patch to apply only to package scope] Signed-off-by: Doug Smythies Signed-off-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b113900d94879..9018e47e0bc26 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1811,9 +1811,10 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) - continue; - average.packages.counter[i] += p->counter[i]; + if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) + average.packages.counter[i] = p->counter[i]; + else + average.packages.counter[i] += p->counter[i]; } return 0; } -- GitLab From fcdeb34d14af02b06e2367554d3f3d5b157b11c6 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 3 Oct 2023 05:07:51 +0000 Subject: [PATCH 2111/2290] tools/power turbostat: Increase the limit for fd opened [ Upstream commit 3ac1d14d0583a2de75d49a5234d767e2590384dd ] When running turbostat, a system with 512 cpus reaches the limit for maximum number of file descriptors that can be opened. To solve this problem, the limit is raised to 2^15, which is a large enough number. Below data is collected from AMD server systems while running turbostat: |-----------+-------------------------------| | # of cpus | # of opened fds for turbostat | |-----------+-------------------------------| | 128 | 260 | |-----------+-------------------------------| | 192 | 388 | |-----------+-------------------------------| | 512 | 1028 | |-----------+-------------------------------| So, the new max limit would be sufficient up to 2^14 cpus (but this also depends on how many counters are enabled). Reviewed-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Wyes Karny Signed-off-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9018e47e0bc26..a674500e7e63d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -53,6 +53,8 @@ #define NAME_BYTES 20 #define PATH_BYTES 128 +#define MAX_NOFILE 0x8000 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; @@ -6719,6 +6721,22 @@ void cmdline(int argc, char **argv) } } +void set_rlimit(void) +{ + struct rlimit limit; + + if (getrlimit(RLIMIT_NOFILE, &limit) < 0) + err(1, "Failed to get rlimit"); + + if (limit.rlim_max < MAX_NOFILE) + limit.rlim_max = MAX_NOFILE; + if (limit.rlim_cur < MAX_NOFILE) + limit.rlim_cur = MAX_NOFILE; + + if (setrlimit(RLIMIT_NOFILE, &limit) < 0) + err(1, "Failed to set rlimit"); +} + int main(int argc, char **argv) { outf = stderr; @@ -6731,6 +6749,9 @@ int main(int argc, char **argv) probe_sysfs(); + if (!getuid()) + set_rlimit(); + turbostat_init(); msr_sum_record(); -- GitLab From bc40c15851828f8a6aec62cd34b75c009929f14e Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sat, 7 Oct 2023 13:46:22 +0800 Subject: [PATCH 2112/2290] tools/power turbostat: Fix Bzy_MHz documentation typo [ Upstream commit 0b13410b52c4636aacb6964a4253a797c0fa0d16 ] The code calculates Bzy_MHz by multiplying TSC_delta * APERF_delta/MPERF_delta The man page erroneously showed that TSC_delta was divided. Signed-off-by: Peng Liu Signed-off-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 3e1a4c4be001a..7112d4732d287 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -370,7 +370,7 @@ below the processor's base frequency. Busy% = MPERF_delta/TSC_delta -Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval +Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval Note that these calculations depend on TSC_delta, so they are not reliable during intervals when TSC_MHz is not running at the base frequency. -- GitLab From 66619d8ad398c3a8b29c153647836c39b4a30b64 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 26 Mar 2024 11:55:22 -0700 Subject: [PATCH 2113/2290] btrfs: make btrfs_clear_delalloc_extent() free delalloc reserve [ Upstream commit 3c6f0c5ecc8910d4ffb0dfe85609ebc0c91c8f34 ] Currently, this call site in btrfs_clear_delalloc_extent() only converts the reservation. We are marking it not delalloc, so I don't think it makes sense to keep the rsv around. This is a path where we are not sure to join a transaction, so it leads to incorrect free-ing during umount. Helps with the pass rate of generic/269 and generic/475. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f7f4bcc094642..10ded9c2be03b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2472,7 +2472,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode, */ if (bits & EXTENT_CLEAR_META_RESV && root != fs_info->tree_root) - btrfs_delalloc_release_metadata(inode, len, false); + btrfs_delalloc_release_metadata(inode, len, true); /* For sanity tests. */ if (btrfs_is_testing(fs_info)) -- GitLab From e04539f513e72f892ea0590d00668e314cb78537 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 26 Mar 2024 12:01:28 -0700 Subject: [PATCH 2114/2290] btrfs: always clear PERTRANS metadata during commit [ Upstream commit 6e68de0bb0ed59e0554a0c15ede7308c47351e2d ] It is possible to clear a root's IN_TRANS tag from the radix tree, but not clear its PERTRANS, if there is some error in between. Eliminate that possibility by moving the free up to where we clear the tag. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5549c843f0d3f..a7853a3a57190 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1440,6 +1440,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + btrfs_qgroup_free_meta_all_pertrans(root); spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log(trans, root); @@ -1464,7 +1465,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) if (ret2) return ret2; spin_lock(&fs_info->fs_roots_radix_lock); - btrfs_qgroup_free_meta_all_pertrans(root); } } spin_unlock(&fs_info->fs_roots_radix_lock); -- GitLab From 701248485be3e109ce88845b0833df6f70840ea9 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 2 Apr 2024 13:26:59 +0000 Subject: [PATCH 2115/2290] memblock tests: fix undefined reference to `early_pfn_to_nid' [ Upstream commit 7d8ed162e6a92268d4b2b84d364a931216102c8e ] commit 6a9531c3a880 ("memblock: fix crash when reserved memory is not added to memory") introduce the usage of early_pfn_to_nid, which is not defined in memblock tests. The original definition of early_pfn_to_nid is defined in mm.h, so let add this in the corresponding mm.h. Signed-off-by: Wei Yang CC: Yajun Deng CC: Mike Rapoport Link: https://lore.kernel.org/r/20240402132701.29744-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Sasha Levin --- tools/include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index 43be27bcc897d..2f401e8c6c0bb 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count) { } +static inline int early_pfn_to_nid(unsigned long pfn) +{ + return 0; +} + #endif -- GitLab From 223550f0e91011cd18dfb0834b448a306b9b6e79 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 2 Apr 2024 13:27:00 +0000 Subject: [PATCH 2116/2290] memblock tests: fix undefined reference to `panic' [ Upstream commit e0f5a8e74be88f2476e58b25d3b49a9521bdc4ec ] commit e96c6b8f212a ("memblock: report failures when memblock_can_resize is not set") introduced the usage of panic, which is not defined in memblock test. Let's define it directly in panic.h to fix it. Signed-off-by: Wei Yang CC: Song Shuai CC: Mike Rapoport Link: https://lore.kernel.org/r/20240402132701.29744-3-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Sasha Levin --- tools/include/linux/kernel.h | 1 + tools/include/linux/panic.h | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tools/include/linux/panic.h diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 4b0673bf52c2e..07cfad817d539 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h new file mode 100644 index 0000000000000..9c8f17a41ce8e --- /dev/null +++ b/tools/include/linux/panic.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_PANIC_H +#define _TOOLS_LINUX_PANIC_H + +#include +#include +#include + +static inline void panic(const char *fmt, ...) +{ + va_list argp; + + va_start(argp, fmt); + vfprintf(stderr, fmt, argp); + va_end(argp); + exit(-1); +} + +#endif -- GitLab From d38ca15be1154c30a60f8ab7a83b7f0c6a4c1208 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 2 Apr 2024 13:27:01 +0000 Subject: [PATCH 2117/2290] memblock tests: fix undefined reference to `BIT' [ Upstream commit 592447f6cb3c20d606d6c5d8e6af68e99707b786 ] commit 772dd0342727 ("mm: enumerate all gfp flags") define gfp flags with the help of BIT, while gfp_types.h doesn't include header file for the definition. This through an error on building memblock tests. Let's include linux/bits.h to fix it. Signed-off-by: Wei Yang CC: Suren Baghdasaryan CC: Michal Hocko Link: https://lore.kernel.org/r/20240402132701.29744-4-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Sasha Levin --- include/linux/gfp_types.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index d88c46ca82e17..6811ab702e8dc 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -2,6 +2,8 @@ #ifndef __LINUX_GFP_TYPES_H #define __LINUX_GFP_TYPES_H +#include + /* The typedef is in types.h but we want the documentation here */ #if 0 /** -- GitLab From 62b8582d93cb7c9edf791f6436e1f5e9f46dcd14 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Thu, 15 Feb 2024 15:39:43 +0100 Subject: [PATCH 2118/2290] scsi: target: Fix SELinux error when systemd-modules loads the target module [ Upstream commit 97a54ef596c3fd24ec2b227ba8aaf2cf5415e779 ] If the systemd-modules service loads the target module, the credentials of that userspace process will be used to validate the access to the target db directory. SELinux will prevent it, reporting an error like the following: kernel: audit: type=1400 audit(1676301082.205:4): avc: denied { read } for pid=1020 comm="systemd-modules" name="target" dev="dm-3" ino=4657583 scontext=system_u:system_r:systemd_modules_load_t:s0 tcontext=system_u:object_r:targetd_etc_rw_t:s0 tclass=dir permissive=0 Fix the error by using the kernel credentials to access the db directory Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20240215143944.847184-2-mlombard@redhat.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_configfs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 416514c5c7acd..1a26dd0d56662 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3587,6 +3587,8 @@ static int __init target_core_init_configfs(void) { struct configfs_subsystem *subsys = &target_core_fabrics; struct t10_alua_lu_gp *lu_gp; + struct cred *kern_cred; + const struct cred *old_cred; int ret; pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage" @@ -3663,11 +3665,21 @@ static int __init target_core_init_configfs(void) if (ret < 0) goto out; + /* We use the kernel credentials to access the target directory */ + kern_cred = prepare_kernel_cred(&init_task); + if (!kern_cred) { + ret = -ENOMEM; + goto out; + } + old_cred = override_creds(kern_cred); target_init_dbroot(); + revert_creds(old_cred); + put_cred(kern_cred); return 0; out: + target_xcopy_release_pt(); configfs_unregister_subsystem(subsys); core_dev_release_virtual_lun0(); rd_module_exit(); -- GitLab From f6add0a6f78dc6360b822ca4b6f9f2f14174c8ca Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 4 Apr 2024 12:32:53 -0400 Subject: [PATCH 2119/2290] blk-iocost: avoid out of bounds shift [ Upstream commit beaa51b36012fad5a4d3c18b88a617aea7a9b96d ] UBSAN catches undefined behavior in blk-iocost, where sometimes iocg->delay is shifted right by a number that is too large, resulting in undefined behavior on some architectures. [ 186.556576] ------------[ cut here ]------------ UBSAN: shift-out-of-bounds in block/blk-iocost.c:1366:23 shift exponent 64 is too large for 64-bit type 'u64' (aka 'unsigned long long') CPU: 16 PID: 0 Comm: swapper/16 Tainted: G S E N 6.9.0-0_fbk700_debug_rc2_kbuilder_0_gc85af715cac0 #1 Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A23 12/08/2020 Call Trace: dump_stack_lvl+0x8f/0xe0 __ubsan_handle_shift_out_of_bounds+0x22c/0x280 iocg_kick_delay+0x30b/0x310 ioc_timer_fn+0x2fb/0x1f80 __run_timer_base+0x1b6/0x250 ... Avoid that undefined behavior by simply taking the "delay = 0" branch if the shift is too large. I am not sure what the symptoms of an undefined value delay will be, but I suspect it could be more than a little annoying to debug. Signed-off-by: Rik van Riel Cc: Tejun Heo Cc: Josef Bacik Cc: Jens Axboe Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20240404123253.0f58010f@imladris.surriel.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-iocost.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index e6557024e3da8..64b594d660b79 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1331,7 +1331,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); - u64 tdelta, delay, new_delay; + u64 tdelta, delay, new_delay, shift; s64 vover, vover_pct; u32 hwa; @@ -1346,8 +1346,9 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; - if (iocg->delay) - delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC); + shift = div64_u64(tdelta, USEC_PER_SEC); + if (iocg->delay && shift < BITS_PER_LONG) + delay = iocg->delay >> shift; else delay = 0; -- GitLab From df541b658de47803a0d0c8f1c5e350d1cf3b4f69 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 14 Mar 2024 16:49:43 +0100 Subject: [PATCH 2120/2290] gpu: host1x: Do not setup DMA for virtual devices [ Upstream commit 8ab58f6841b19423231c5db3378691ec80c778f8 ] The host1x devices are virtual compound devices and do not perform DMA accesses themselves, so they do not need to be set up for DMA. Ideally we would also not need to set up DMA masks for the virtual devices, but we currently still need those for legacy support on old hardware. Tested-by: Jon Hunter Acked-by: Jon Hunter Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240314154943.2487549-1-thierry.reding@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/host1x/bus.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index bdee16a0bb8e2..ba622fb5e4822 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -368,11 +368,6 @@ static int host1x_device_uevent(struct device *dev, return 0; } -static int host1x_dma_configure(struct device *dev) -{ - return of_dma_configure(dev, dev->of_node, true); -} - static const struct dev_pm_ops host1x_device_pm_ops = { .suspend = pm_generic_suspend, .resume = pm_generic_resume, @@ -386,7 +381,6 @@ struct bus_type host1x_bus_type = { .name = "host1x", .match = host1x_device_match, .uevent = host1x_device_uevent, - .dma_configure = host1x_dma_configure, .pm = &host1x_device_pm_ops, }; @@ -475,8 +469,6 @@ static int host1x_device_add(struct host1x *host1x, device->dev.bus = &host1x_bus_type; device->dev.parent = host1x->dev; - of_dma_configure(&device->dev, host1x->dev->of_node, true); - device->dev.dma_parms = &device->dma_parms; dma_set_max_seg_size(&device->dev, UINT_MAX); -- GitLab From b002a1b321c646d849f0356a08a033acc95b907c Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 28 Mar 2024 14:27:56 +0000 Subject: [PATCH 2121/2290] MIPS: scall: Save thread_info.syscall unconditionally on entry [ Upstream commit 4370b673ccf240bf7587b0cb8e6726a5ccaf1f17 ] thread_info.syscall is used by syscall_get_nr to supply syscall nr over a thread stack frame. Previously, thread_info.syscall is only saved at syscall_trace_enter when syscall tracing is enabled. However rest of the kernel code do expect syscall_get_nr to be available without syscall tracing. The previous design breaks collect_syscall. Move saving process to syscall entry to fix it. Reported-by: Xi Ruoyao Link: https://github.com/util-linux/util-linux/issues/2867 Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/include/asm/ptrace.h | 2 +- arch/mips/kernel/asm-offsets.c | 1 + arch/mips/kernel/ptrace.c | 15 ++++++--------- arch/mips/kernel/scall32-o32.S | 23 +++++++++++++---------- arch/mips/kernel/scall64-n32.S | 3 ++- arch/mips/kernel/scall64-n64.S | 3 ++- arch/mips/kernel/scall64-o32.S | 33 +++++++++++++++++---------------- 7 files changed, 42 insertions(+), 38 deletions(-) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index b3e4dd6be7e20..428b9f1cf1de2 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -157,7 +157,7 @@ static inline long regs_return_value(struct pt_regs *regs) #define instruction_pointer(regs) ((regs)->cp0_epc) #define profile_pc(regs) instruction_pointer(regs) -extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall); +extern asmlinkage long syscall_trace_enter(struct pt_regs *regs); extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); extern void die(const char *, struct pt_regs *) __noreturn; diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index c4501897b870b..08342b9eccdbd 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -98,6 +98,7 @@ void output_thread_info_defines(void) OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE_COUNT, thread_info, preempt_count); OFFSET(TI_REGS, thread_info, regs); + OFFSET(TI_SYSCALL, thread_info, syscall); DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 567aec4abac0f..a8e569830ec8d 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1309,16 +1309,13 @@ long arch_ptrace(struct task_struct *child, long request, * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ -asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) +asmlinkage long syscall_trace_enter(struct pt_regs *regs) { user_exit(); - current_thread_info()->syscall = syscall; - if (test_thread_flag(TIF_SYSCALL_TRACE)) { if (ptrace_report_syscall_entry(regs)) return -1; - syscall = current_thread_info()->syscall; } #ifdef CONFIG_SECCOMP @@ -1327,7 +1324,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) struct seccomp_data sd; unsigned long args[6]; - sd.nr = syscall; + sd.nr = current_thread_info()->syscall; sd.arch = syscall_get_arch(current); syscall_get_arguments(current, regs, args); for (i = 0; i < 6; i++) @@ -1337,23 +1334,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) ret = __secure_computing(&sd); if (ret == -1) return ret; - syscall = current_thread_info()->syscall; } #endif if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[2]); - audit_syscall_entry(syscall, regs->regs[4], regs->regs[5], + audit_syscall_entry(current_thread_info()->syscall, + regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); /* * Negative syscall numbers are mistaken for rejected syscalls, but * won't have had the return value set appropriately, so we do so now. */ - if (syscall < 0) + if (current_thread_info()->syscall < 0) syscall_set_return_value(current, regs, -ENOSYS, 0); - return syscall; + return current_thread_info()->syscall; } /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 18dc9b3450561..2c604717e6308 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -77,6 +77,18 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + */ + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + lw t0, TI_FLAGS($28) # syscall tracing enabled? li t1, _TIF_WORK_SYSCALL_ENTRY and t0, t1 @@ -114,16 +126,7 @@ syscall_trace_entry: SAVE_STATIC move a0, sp - /* - * syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - */ - move a1, v0 - subu t2, v0, __NR_O32_Linux - bnez t2, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 97456b2ca7dc3..97788859238c3 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -72,7 +74,6 @@ syscall_common: n32_syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index e6264aa62e457..be11ea5cc67e0 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -82,7 +84,6 @@ n64_syscall_exit: syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d3c2616cba226..7a5abb73e5312 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -79,6 +79,22 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * absolute syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + * note: NR_syscall is the first O32 syscall but the macro is + * only defined when compiling with -mabi=32 (CONFIG_32BIT) + * therefore __NR_O32_Linux is used (4000) + */ + + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -113,22 +129,7 @@ trace_a_syscall: sd a7, PT_R11(sp) # For indirect syscalls move a0, sp - /* - * absolute syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - * note: NR_syscall is the first O32 syscall but the macro is - * only defined when compiling with -mabi=32 (CONFIG_32BIT) - * therefore __NR_O32_Linux is used (4000) - */ - .set push - .set reorder - subu t1, v0, __NR_O32_Linux - move a1, v0 - bnez t1, 1f /* __NR_syscall at offset 0 */ - ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ - .set pop - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall -- GitLab From a513ccd91549d333ea344a456d53fed33475df6a Mon Sep 17 00:00:00 2001 From: Justin Ernst Date: Tue, 2 Apr 2024 13:40:29 -0400 Subject: [PATCH 2122/2290] tools/power/turbostat: Fix uncore frequency file string [ Upstream commit 60add818ab2543b7e4f2bfeaacf2504743c1eb50 ] Running turbostat on a 16 socket HPE Scale-up Compute 3200 (SapphireRapids) fails with: turbostat: /sys/devices/system/cpu/intel_uncore_frequency/package_010_die_00/current_freq_khz: open failed: No such file or directory We observe the sysfs uncore frequency directories named: ... package_09_die_00/ package_10_die_00/ package_11_die_00/ ... package_15_die_00/ The culprit is an incorrect sprintf format string "package_0%d_die_0%d" used with each instance of reading uncore frequency files. uncore-frequency-common.c creates the sysfs directory with the format "package_%02d_die_%02d". Once the package value reaches double digits, the formats diverge. Change each instance of "package_0%d_die_0%d" to "package_%02d_die_%02d". [lenb: deleted the probe part of this patch, as it was already fixed] Signed-off-by: Justin Ernst Reviewed-by: Thomas Renninger Signed-off-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index a674500e7e63d..a41bad8e653bb 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1969,7 +1969,7 @@ unsigned long long get_uncore_mhz(int package, int die) { char path[128]; - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package, + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, die); return (snapshot_sysfs_counter(path) / 1000); -- GitLab From 45289683c7eb3c0950685cea9d344dc22b309baa Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 21 Mar 2024 17:46:36 +0530 Subject: [PATCH 2123/2290] drm/amdgpu: Refine IB schedule error logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4b18a91faf1752f9bd69a4ed3aed2c8f6e5b0528 ] Downgrade to debug information when IBs are skipped. Also, use dev_* to identify the device. Signed-off-by: Lijo Lazar Reviewed-by: Christian König Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 3bf0e893c07df..f34bc9bb7045a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -301,12 +301,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) dma_fence_set_error(finished, -ECANCELED); if (finished->error < 0) { - DRM_INFO("Skip scheduling IBs!\n"); + dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)", + ring->name); } else { r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, &fence); if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, + "Error scheduling IBs (%d) in ring(%s)", r, + ring->name); } job->job_run_counter++; -- GitLab From 1f3484dec916a3c4f43c4c44bad398bc24373110 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 9 Apr 2024 13:22:12 -0700 Subject: [PATCH 2124/2290] selftests: timers: Fix valid-adjtimex signed left-shift undefined behavior [ Upstream commit 076361362122a6d8a4c45f172ced5576b2d4a50d ] The struct adjtimex freq field takes a signed value who's units are in shifted (<<16) parts-per-million. Unfortunately for negative adjustments, the straightforward use of: freq = ppm << 16 trips undefined behavior warnings with clang: valid-adjtimex.c:66:6: warning: shifting a negative signed value is undefined [-Wshift-negative-value] -499<<16, ~~~~^ valid-adjtimex.c:67:6: warning: shifting a negative signed value is undefined [-Wshift-negative-value] -450<<16, ~~~~^ .. Fix it by using a multiply by (1 << 16) instead of shifting negative values in the valid-adjtimex test case. Align the values for better readability. Reported-by: Lee Jones Reported-by: Muhammad Usama Anjum Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240409202222.2830476-1-jstultz@google.com Link: https://lore.kernel.org/lkml/0c6d4f0d-2064-4444-986b-1d1ed782135f@collabora.com/ Signed-off-by: Sasha Levin --- .../testing/selftests/timers/valid-adjtimex.c | 73 +++++++++---------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index 48b9a803235a8..d13ebde203221 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -21,9 +21,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ - - - #include #include #include @@ -62,45 +59,47 @@ int clear_time_state(void) #define NUM_FREQ_OUTOFRANGE 4 #define NUM_FREQ_INVALID 2 +#define SHIFTED_PPM (1 << 16) + long valid_freq[NUM_FREQ_VALID] = { - -499<<16, - -450<<16, - -400<<16, - -350<<16, - -300<<16, - -250<<16, - -200<<16, - -150<<16, - -100<<16, - -75<<16, - -50<<16, - -25<<16, - -10<<16, - -5<<16, - -1<<16, + -499 * SHIFTED_PPM, + -450 * SHIFTED_PPM, + -400 * SHIFTED_PPM, + -350 * SHIFTED_PPM, + -300 * SHIFTED_PPM, + -250 * SHIFTED_PPM, + -200 * SHIFTED_PPM, + -150 * SHIFTED_PPM, + -100 * SHIFTED_PPM, + -75 * SHIFTED_PPM, + -50 * SHIFTED_PPM, + -25 * SHIFTED_PPM, + -10 * SHIFTED_PPM, + -5 * SHIFTED_PPM, + -1 * SHIFTED_PPM, -1000, - 1<<16, - 5<<16, - 10<<16, - 25<<16, - 50<<16, - 75<<16, - 100<<16, - 150<<16, - 200<<16, - 250<<16, - 300<<16, - 350<<16, - 400<<16, - 450<<16, - 499<<16, + 1 * SHIFTED_PPM, + 5 * SHIFTED_PPM, + 10 * SHIFTED_PPM, + 25 * SHIFTED_PPM, + 50 * SHIFTED_PPM, + 75 * SHIFTED_PPM, + 100 * SHIFTED_PPM, + 150 * SHIFTED_PPM, + 200 * SHIFTED_PPM, + 250 * SHIFTED_PPM, + 300 * SHIFTED_PPM, + 350 * SHIFTED_PPM, + 400 * SHIFTED_PPM, + 450 * SHIFTED_PPM, + 499 * SHIFTED_PPM, }; long outofrange_freq[NUM_FREQ_OUTOFRANGE] = { - -1000<<16, - -550<<16, - 550<<16, - 1000<<16, + -1000 * SHIFTED_PPM, + -550 * SHIFTED_PPM, + 550 * SHIFTED_PPM, + 1000 * SHIFTED_PPM, }; #define LONG_MAX (~0UL>>1) -- GitLab From 1999644d95194d4a58d3e80ad04ce19220a01a81 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 11 Mar 2024 09:15:55 -0700 Subject: [PATCH 2125/2290] Drivers: hv: vmbus: Track decrypted status in vmbus_gpadl [ Upstream commit 211f514ebf1ef5de37b1cf6df9d28a56cfd242ca ] In CoCo VMs it is possible for the untrusted host to cause set_memory_encrypted() or set_memory_decrypted() to fail such that an error is returned and the resulting memory is shared. Callers need to take care to handle these errors to avoid returning decrypted (shared) memory to the page allocator, which could lead to functional or security issues. In order to make sure callers of vmbus_establish_gpadl() and vmbus_teardown_gpadl() don't return decrypted/shared pages to allocators, add a field in struct vmbus_gpadl to keep track of the decryption status of the buffers. This will allow the callers to know if they should free or leak the pages. Signed-off-by: Rick Edgecombe Signed-off-by: Michael Kelley Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20240311161558.1310-3-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240311161558.1310-3-mhklinux@outlook.com> Signed-off-by: Sasha Levin --- drivers/hv/channel.c | 25 +++++++++++++++++++++---- include/linux/hyperv.h | 1 + 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 56f7e06c673e4..bb5abdcda18f8 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -472,9 +472,18 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel, (atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1); ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo); - if (ret) + if (ret) { + gpadl->decrypted = false; return ret; + } + /* + * Set the "decrypted" flag to true for the set_memory_decrypted() + * success case. In the failure case, the encryption state of the + * memory is unknown. Leave "decrypted" as true to ensure the + * memory will be leaked instead of going back on the free list. + */ + gpadl->decrypted = true; ret = set_memory_decrypted((unsigned long)kbuffer, PFN_UP(size)); if (ret) { @@ -563,9 +572,15 @@ cleanup: kfree(msginfo); - if (ret) - set_memory_encrypted((unsigned long)kbuffer, - PFN_UP(size)); + if (ret) { + /* + * If set_memory_encrypted() fails, the decrypted flag is + * left as true so the memory is leaked instead of being + * put back on the free list. + */ + if (!set_memory_encrypted((unsigned long)kbuffer, PFN_UP(size))) + gpadl->decrypted = false; + } return ret; } @@ -886,6 +901,8 @@ post_msg_err: if (ret) pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret); + gpadl->decrypted = ret; + return ret; } EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 4fbd5d8417111..811d59cf891ba 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -832,6 +832,7 @@ struct vmbus_gpadl { u32 gpadl_handle; u32 size; void *buffer; + bool decrypted; }; struct vmbus_channel { -- GitLab From dabf12bf994318d939f70d47cfda30e47abb2c54 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 11 Mar 2024 09:15:57 -0700 Subject: [PATCH 2126/2290] uio_hv_generic: Don't free decrypted memory [ Upstream commit 3d788b2fbe6a1a1a9e3db09742b90809d51638b7 ] In CoCo VMs it is possible for the untrusted host to cause set_memory_encrypted() or set_memory_decrypted() to fail such that an error is returned and the resulting memory is shared. Callers need to take care to handle these errors to avoid returning decrypted (shared) memory to the page allocator, which could lead to functional or security issues. The VMBus device UIO driver could free decrypted/shared pages if set_memory_decrypted() fails. Check the decrypted field in the gpadl to decide whether to free the memory. Signed-off-by: Rick Edgecombe Signed-off-by: Michael Kelley Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20240311161558.1310-5-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240311161558.1310-5-mhklinux@outlook.com> Signed-off-by: Sasha Levin --- drivers/uio/uio_hv_generic.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index c08a6cfd119f2..e5789dfcaff61 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -181,12 +181,14 @@ hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata) { if (pdata->send_gpadl.gpadl_handle) { vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl); - vfree(pdata->send_buf); + if (!pdata->send_gpadl.decrypted) + vfree(pdata->send_buf); } if (pdata->recv_gpadl.gpadl_handle) { vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl); - vfree(pdata->recv_buf); + if (!pdata->recv_gpadl.decrypted) + vfree(pdata->recv_buf); } } @@ -295,7 +297,8 @@ hv_uio_probe(struct hv_device *dev, ret = vmbus_establish_gpadl(channel, pdata->recv_buf, RECV_BUFFER_SIZE, &pdata->recv_gpadl); if (ret) { - vfree(pdata->recv_buf); + if (!pdata->recv_gpadl.decrypted) + vfree(pdata->recv_buf); goto fail_close; } @@ -317,7 +320,8 @@ hv_uio_probe(struct hv_device *dev, ret = vmbus_establish_gpadl(channel, pdata->send_buf, SEND_BUFFER_SIZE, &pdata->send_gpadl); if (ret) { - vfree(pdata->send_buf); + if (!pdata->send_gpadl.decrypted) + vfree(pdata->send_buf); goto fail_close; } -- GitLab From 2f622008bf784a9f5dd17baa19223cc2ac30a039 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 11 Mar 2024 09:15:58 -0700 Subject: [PATCH 2127/2290] Drivers: hv: vmbus: Don't free ring buffers that couldn't be re-encrypted [ Upstream commit 30d18df6567be09c1433e81993e35e3da573ac48 ] In CoCo VMs it is possible for the untrusted host to cause set_memory_encrypted() or set_memory_decrypted() to fail such that an error is returned and the resulting memory is shared. Callers need to take care to handle these errors to avoid returning decrypted (shared) memory to the page allocator, which could lead to functional or security issues. The VMBus ring buffer code could free decrypted/shared pages if set_memory_decrypted() fails. Check the decrypted field in the struct vmbus_gpadl for the ring buffers to decide whether to free the memory. Signed-off-by: Michael Kelley Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20240311161558.1310-6-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240311161558.1310-6-mhklinux@outlook.com> Signed-off-by: Sasha Levin --- drivers/hv/channel.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index bb5abdcda18f8..47e1bd8de9fcf 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -153,7 +153,9 @@ void vmbus_free_ring(struct vmbus_channel *channel) hv_ringbuffer_cleanup(&channel->inbound); if (channel->ringbuffer_page) { - __free_pages(channel->ringbuffer_page, + /* In a CoCo VM leak the memory if it didn't get re-encrypted */ + if (!channel->ringbuffer_gpadlhandle.decrypted) + __free_pages(channel->ringbuffer_page, get_order(channel->ringbuffer_pagecount << PAGE_SHIFT)); channel->ringbuffer_page = NULL; -- GitLab From 3aba6c4ec526ecb6785b2dfa9f2977e7ba99b48c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 10 Apr 2024 18:41:09 +0200 Subject: [PATCH 2128/2290] iommu: mtk: fix module autoloading [ Upstream commit 7537e31df80cb58c27f3b6fef702534ea87a5957 ] Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from of_device_id table. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240410164109.233308-1-krzk@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/mtk_iommu.c | 1 + drivers/iommu/mtk_iommu_v1.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 0ba2a63a9538a..576163f88a4a5 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1570,6 +1570,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8195-iommu-vpp", .data = &mt8195_data_vpp}, {} }; +MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids); static struct platform_driver mtk_iommu_driver = { .probe = mtk_iommu_probe, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index a978220eb620e..5dd06bcb507f6 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -602,6 +602,7 @@ static const struct of_device_id mtk_iommu_v1_of_ids[] = { { .compatible = "mediatek,mt2701-m4u", }, {} }; +MODULE_DEVICE_TABLE(of, mtk_iommu_v1_of_ids); static const struct component_master_ops mtk_iommu_v1_com_ops = { .bind = mtk_iommu_v1_bind, -- GitLab From ca9b5c81f0c918c63d73d962ed8a8e231f840bc8 Mon Sep 17 00:00:00 2001 From: Joakim Sindholt Date: Mon, 18 Mar 2024 12:22:31 +0100 Subject: [PATCH 2129/2290] fs/9p: only translate RWX permissions for plain 9P2000 [ Upstream commit cd25e15e57e68a6b18dc9323047fe9c68b99290b ] Garbage in plain 9P2000's perm bits is allowed through, which causes it to be able to set (among others) the suid bit. This was presumably not the intent since the unix extended bits are handled explicitly and conditionally on .u. Signed-off-by: Joakim Sindholt Signed-off-by: Eric Van Hensbergen Signed-off-by: Sasha Levin --- fs/9p/vfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 5e2657c1dbbe6..a0c5a372dcf62 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -85,7 +85,7 @@ static int p9mode2perm(struct v9fs_session_info *v9ses, int res; int mode = stat->mode; - res = mode & S_IALLUGO; + res = mode & 0777; /* S_IRWXUGO */ if (v9fs_proto_dotu(v9ses)) { if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; -- GitLab From 5d74f4d80ad9b8f4fb72c03b05e8617fb8d64ec5 Mon Sep 17 00:00:00 2001 From: Joakim Sindholt Date: Mon, 18 Mar 2024 12:22:33 +0100 Subject: [PATCH 2130/2290] fs/9p: translate O_TRUNC into OTRUNC [ Upstream commit 87de39e70503e04ddb58965520b15eb9efa7eef3 ] This one hits both 9P2000 and .u as it appears v9fs has never translated the O_TRUNC flag. Signed-off-by: Joakim Sindholt Signed-off-by: Eric Van Hensbergen Signed-off-by: Sasha Levin --- fs/9p/vfs_inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index a0c5a372dcf62..8f287009545c9 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -181,6 +181,9 @@ int v9fs_uflags2omode(int uflags, int extended) break; } + if (uflags & O_TRUNC) + ret |= P9_OTRUNC; + if (extended) { if (uflags & O_EXCL) ret |= P9_OEXCL; -- GitLab From f3b0226fc9f49a1b953b59581c714246378e0964 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 19 Mar 2024 12:34:45 -0400 Subject: [PATCH 2131/2290] 9p: explicitly deny setlease attempts [ Upstream commit 7a84602297d36617dbdadeba55a2567031e5165b ] 9p is a remote network protocol, and it doesn't support asynchronous notifications from the server. Ensure that we don't hand out any leases since we can't guarantee they'll be broken when a file's contents change. Signed-off-by: Jeff Layton Signed-off-by: Eric Van Hensbergen Signed-off-by: Sasha Levin --- fs/9p/vfs_file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index aec43ba837992..87222067fe5de 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -667,6 +667,7 @@ const struct file_operations v9fs_file_operations = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, + .setlease = simple_nosetlease, }; const struct file_operations v9fs_file_operations_dotl = { @@ -708,4 +709,5 @@ const struct file_operations v9fs_mmap_file_operations_dotl = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, + .setlease = simple_nosetlease, }; -- GitLab From 8ebcd16238229f3279c09d7f6554cc46a23b6b49 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Apr 2024 19:25:21 +0300 Subject: [PATCH 2132/2290] gpio: wcove: Use -ENOTSUPP consistently [ Upstream commit 0c3b532ad3fbf82884a2e7e83e37c7dcdd4d1d99 ] The GPIO library expects the drivers to return -ENOTSUPP in some cases and not using analogue POSIX code. Make the driver to follow this. Reviewed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/gpio/gpio-wcove.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c index c18b6b47384f1..94ca9d03c0949 100644 --- a/drivers/gpio/gpio-wcove.c +++ b/drivers/gpio/gpio-wcove.c @@ -104,7 +104,7 @@ static inline int to_reg(int gpio, enum ctrl_register type) unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE; if (gpio >= WCOVE_GPIO_NUM) - return -EOPNOTSUPP; + return -ENOTSUPP; return reg + gpio; } -- GitLab From e4e82ef35a7cac0ce22f304f6f090e960e06090c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Apr 2024 19:26:22 +0300 Subject: [PATCH 2133/2290] gpio: crystalcove: Use -ENOTSUPP consistently [ Upstream commit ace0ebe5c98d66889f19e0f30e2518d0c58d0e04 ] The GPIO library expects the drivers to return -ENOTSUPP in some cases and not using analogue POSIX code. Make the driver to follow this. Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/gpio/gpio-crystalcove.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c index 1ee62cd58582b..25db014494a4d 100644 --- a/drivers/gpio/gpio-crystalcove.c +++ b/drivers/gpio/gpio-crystalcove.c @@ -92,7 +92,7 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type) case 0x5e: return GPIOPANELCTL; default: - return -EOPNOTSUPP; + return -ENOTSUPP; } } -- GitLab From d063d13af7dcddab2ee48b390b12d517de965855 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:56 -0700 Subject: [PATCH 2134/2290] clk: Don't hold prepare_lock when calling kref_put() [ Upstream commit 6f63af7511e7058f3fa4ad5b8102210741c9f947 ] We don't need to hold the prepare_lock when dropping a ref on a struct clk_core. The release function is only freeing memory and any code with a pointer reference has already unlinked anything pointing to the clk_core. This reduces the holding area of the prepare_lock a bit. Note that we also don't call free_clk() with the prepare_lock held. There isn't any reason to do that. Reviewed-by: Douglas Anderson Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-3-sboyd@kernel.org Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index fe1d45eac837c..8ecbb8f494655 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4435,7 +4435,8 @@ void clk_unregister(struct clk *clk) if (ops == &clk_nodrv_ops) { pr_err("%s: unregistered clock: %s\n", __func__, clk->core->name); - goto unlock; + clk_prepare_unlock(); + return; } /* * Assign empty clock ops for consumers that might still hold @@ -4469,11 +4470,10 @@ void clk_unregister(struct clk *clk) if (clk->core->protect_count) pr_warn("%s: unregistering protected clock: %s\n", __func__, clk->core->name); + clk_prepare_unlock(); kref_put(&clk->core->ref, __clk_release); free_clk(clk); -unlock: - clk_prepare_unlock(); } EXPORT_SYMBOL_GPL(clk_unregister); @@ -4632,13 +4632,11 @@ void __clk_put(struct clk *clk) if (clk->min_rate > 0 || clk->max_rate < ULONG_MAX) clk_set_rate_range_nolock(clk, 0, ULONG_MAX); - owner = clk->core->owner; - kref_put(&clk->core->ref, __clk_release); - clk_prepare_unlock(); + owner = clk->core->owner; + kref_put(&clk->core->ref, __clk_release); module_put(owner); - free_clk(clk); } -- GitLab From ba2adb442277de5890b7b416e4059d1a503be6d6 Mon Sep 17 00:00:00 2001 From: Joakim Sindholt Date: Mon, 18 Mar 2024 12:22:32 +0100 Subject: [PATCH 2135/2290] fs/9p: drop inodes immediately on non-.L too [ Upstream commit 7fd524b9bd1be210fe79035800f4bd78a41b349f ] Signed-off-by: Joakim Sindholt Signed-off-by: Eric Van Hensbergen Signed-off-by: Sasha Levin --- fs/9p/vfs_super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 2d9ee073d12c3..7c35347f1d9be 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -342,6 +342,7 @@ static const struct super_operations v9fs_super_ops = { .alloc_inode = v9fs_alloc_inode, .free_inode = v9fs_free_inode, .statfs = simple_statfs, + .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, .show_options = v9fs_show_options, .umount_begin = v9fs_umount_begin, -- GitLab From 04fa2cfc26fb94fe8a2854057b3af9f5289ed299 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 4 Apr 2024 19:35:54 -0400 Subject: [PATCH 2136/2290] drm/nouveau/dp: Don't probe eDP ports twice harder [ Upstream commit bf52d7f9b2067f02efe7e32697479097aba4a055 ] I didn't pay close enough attention the last time I tried to fix this problem - while we currently do correctly take care to make sure we don't probe a connected eDP port more then once, we don't do the same thing for eDP ports we found to be disconnected. So, fix this and make sure we only ever probe eDP ports once and then leave them at that connector state forever (since without HPD, it's not going to change on its own anyway). This should get rid of the last few GSP errors getting spit out during runtime suspend and resume on some machines, as we tried to reprobe eDP ports in response to ACPI hotplug probe events. Signed-off-by: Lyude Paul Reviewed-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240404233736.7946-3-lyude@redhat.com (cherry picked from commit fe6660b661c3397af0867d5d098f5b26581f1290) Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_dp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index 53185746fb3d1..17e1e23a780e0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -109,12 +109,15 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector, u8 *dpcd = nv_encoder->dp.dpcd; int ret = NOUVEAU_DP_NONE, hpd; - /* If we've already read the DPCD on an eDP device, we don't need to - * reread it as it won't change + /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we + * haven't probed them once before. */ - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP && - dpcd[DP_DPCD_REV] != 0) - return NOUVEAU_DP_SST; + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + if (connector->status == connector_status_connected) + return NOUVEAU_DP_SST; + else if (connector->status == connector_status_disconnected) + return NOUVEAU_DP_NONE; + } mutex_lock(&nv_encoder->dp.hpd_irq_lock); if (mstm) { -- GitLab From 56633a5e15aa0a7b51e1608c064e74bec1ded73a Mon Sep 17 00:00:00 2001 From: Vanillan Wang Date: Tue, 16 Apr 2024 20:07:13 +0800 Subject: [PATCH 2137/2290] net:usb:qmi_wwan: support Rolling modules [ Upstream commit d362046021ea122309da8c8e0b6850c792ca97b5 ] Update the qmi_wwan driver support for the Rolling LTE modules. - VID:PID 33f8:0104, RW101-GL for laptop debug M.2 cards(with RMNET interface for /Linux/Chrome OS) 0x0104: RMNET, diag, at, pipe Here are the outputs of usb-devices: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=33f8 ProdID=0104 Rev=05.04 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=ba2eb033 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Vanillan Wang Link: https://lore.kernel.org/r/20240416120713.24777-1-vanillanwang@163.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 2d82481d34e6b..45a542659a814 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1429,6 +1429,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ + {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ -- GitLab From 53ce433a6d536947189d70c810c8563b12fbcb64 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 23 Jul 2023 16:21:28 +0200 Subject: [PATCH 2138/2290] kbuild: rust: avoid creating temporary files [ Upstream commit df01b7cfcef08bf3fdcac2909d0e1910781d6bfd ] `rustc` outputs by default the temporary files (i.e. the ones saved by `-Csave-temps`, such as `*.rcgu*` files) in the current working directory when `-o` and `--out-dir` are not given (even if `--emit=x=path` is given, i.e. it does not use those for temporaries). Since out-of-tree modules are compiled from the `linux` tree, `rustc` then tries to create them there, which may not be accessible. Thus pass `--out-dir` explicitly, even if it is just for the temporary files. Similarly, do so for Rust host programs too. Reported-by: Raphael Nestler Closes: https://github.com/Rust-for-Linux/linux/issues/1015 Reported-by: Andrea Righi Tested-by: Raphael Nestler # non-hostprogs Tested-by: Andrea Righi # non-hostprogs Fixes: 295d8398c67e ("kbuild: specify output names separately for each emission type from rustc") Cc: stable@vger.kernel.org Signed-off-by: Miguel Ojeda Tested-by: Martin Rodriguez Reboredo Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/Makefile.build | 5 ++++- scripts/Makefile.host | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 9ae02542b9389..1827bc1db1e98 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -277,6 +277,9 @@ $(obj)/%.lst: $(src)/%.c FORCE rust_allowed_features := core_ffi_c +# `--out-dir` is required to avoid temporaries being created by `rustc` in the +# current working directory, which may be not accessible in the out-of-tree +# modules case. rust_common_cmd = \ RUST_MODFILE=$(modfile) $(RUSTC_OR_CLIPPY) $(rust_flags) \ -Zallow-features=$(rust_allowed_features) \ @@ -285,7 +288,7 @@ rust_common_cmd = \ --extern alloc --extern kernel \ --crate-type rlib -L $(objtree)/rust/ \ --crate-name $(basename $(notdir $@)) \ - --emit=dep-info=$(depfile) + --out-dir $(dir $@) --emit=dep-info=$(depfile) rust_handle_depfile = \ sed -i '/^\#/d' $(depfile) diff --git a/scripts/Makefile.host b/scripts/Makefile.host index d812241144d44..a447c91893de6 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -86,7 +86,11 @@ hostc_flags = -Wp,-MMD,$(depfile) \ hostcxx_flags = -Wp,-MMD,$(depfile) \ $(KBUILD_HOSTCXXFLAGS) $(HOST_EXTRACXXFLAGS) \ $(HOSTCXXFLAGS_$(target-stem).o) -hostrust_flags = --emit=dep-info=$(depfile) \ + +# `--out-dir` is required to avoid temporaries being created by `rustc` in the +# current working directory, which may be not accessible in the out-of-tree +# modules case. +hostrust_flags = --out-dir $(dir $@) --emit=dep-info=$(depfile) \ $(KBUILD_HOSTRUSTFLAGS) $(HOST_EXTRARUSTFLAGS) \ $(HOSTRUSTFLAGS_$(target-stem)) -- GitLab From 2f5e8322cd809adae9d9654425a79f3235768089 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Nov 2022 11:01:27 +0100 Subject: [PATCH 2139/2290] spi: Merge spi_controller.{slave,target}_abort() [ Upstream commit 6c6871cdaef96361f6b79a3e45d451a6475df4d6 ] Mixing SPI slave/target handlers and SPI slave/target controllers using legacy and modern naming does not work well: there are now two different callbacks for aborting a slave/target operation, of which only one is populated, while spi_{slave,target}_abort() check and use only one, which may be the unpopulated one. Fix this by merging the slave/target abort callbacks into a single callback using a union, like is already done for the slave/target flags. Fixes: b8d3b056a78dcc94 ("spi: introduce new helpers with using modern naming") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/809c82d54b85dd87ef7ee69fc93016085be85cec.1667555967.git.geert+renesas@glider.be Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/linux/spi/spi.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 6edf8a2962d4a..0ce659d6fcb75 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -676,8 +676,10 @@ struct spi_controller { struct spi_message *message); int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); - int (*slave_abort)(struct spi_controller *ctlr); - int (*target_abort)(struct spi_controller *ctlr); + union { + int (*slave_abort)(struct spi_controller *ctlr); + int (*target_abort)(struct spi_controller *ctlr); + }; /* * These hooks are for drivers that use a generic implementation -- GitLab From cf731a5dce7cd5f0c284fb437f1f46962c80e3b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 8 May 2024 14:00:55 -0700 Subject: [PATCH 2140/2290] perf unwind-libunwind: Fix base address for .eh_frame [ Upstream commit 4fb54994b2360ab5029ee3a959161f6fe6bbb349 ] The base address of a DSO mapping should start at the start of the file. Usually DSOs are mapped from the pgoff 0 so it doesn't matter when it uses the start of the map address. But generated DSOs for JIT codes doesn't start from the 0 so it should subtract the offset to calculate the .eh_frame table offsets correctly. Fixes: dc2cf4ca866f5715 ("perf unwind: Fix segbase for ld.lld linked objects") Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Fangrui Song Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Pablo Galindo Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231212070547.612536-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/unwind-libunwind-local.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 81b6bd6e1536a..b276e36e3fb41 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -327,7 +327,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, maps__for_each_entry(ui->thread->maps, map) { if (map->dso == dso && map->start < base_addr) - base_addr = map->start; + base_addr = map->start - map->pgoff; } base_addr -= dso->data.elf_base_addr; /* Address of .eh_frame_hdr */ -- GitLab From f269a8ce52fa8641206d9b9fa73a1326b09506ce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 8 May 2024 14:00:56 -0700 Subject: [PATCH 2141/2290] perf unwind-libdw: Handle JIT-generated DSOs properly [ Upstream commit c966d23a351a33f8a977fd7efbb6f467132f7383 ] Usually DSOs are mapped from the beginning of the file, so the base address of the DSO can be calculated by map->start - map->pgoff. However, JIT DSOs which are generated by `perf inject -j`, are mapped only the code segment. This makes unwind-libdw code confusing and rejects processing unwinds in the JIT DSOs. It should use the map start address as base for them to fix the confusion. Fixes: 1fe627da30331024 ("perf unwind: Take pgoff into account when reporting elf to libdwfl") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Fangrui Song Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Pablo Galindo Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231212070547.612536-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/unwind-libdw.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 94aa40f6e3482..9a7bdc0e14cc2 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -45,6 +45,7 @@ static int __report_module(struct addr_location *al, u64 ip, { Dwfl_Module *mod; struct dso *dso = NULL; + Dwarf_Addr base; /* * Some callers will use al->sym, so we can't just use the * cheaper thread__find_map() here. @@ -57,24 +58,36 @@ static int __report_module(struct addr_location *al, u64 ip, if (!dso) return 0; + /* + * The generated JIT DSO files only map the code segment without + * ELF headers. Since JIT codes used to be packed in a memory + * segment, calculating the base address using pgoff falls info + * a different code in another DSO. So just use the map->start + * directly to pick the correct one. + */ + if (!strncmp(dso->long_name, "/tmp/jitted-", 12)) + base = al->map->start; + else + base = al->map->start - al->map->pgoff; + mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != al->map->start - al->map->pgoff) - mod = 0; + if (s != base) + mod = NULL; } if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, - al->map->start - al->map->pgoff, false); + base, false); if (!mod) { char filename[PATH_MAX]; if (dso__build_id_filename(dso, filename, sizeof(filename), false)) mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - al->map->start - al->map->pgoff, false); + base, false); } if (mod) { -- GitLab From bd8f78c71defbcb7a9ed331e7f287507df972b00 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Feb 2024 23:58:42 -0500 Subject: [PATCH 2142/2290] qibfs: fix dentry leak [ Upstream commit aa23317d0268b309bb3f0801ddd0d61813ff5afb ] simple_recursive_removal() drops the pinning references to all positives in subtree. For the cases when its argument has been kept alive by the pinning alone that's exactly the right thing to do, but here the argument comes from dcache lookup, that needs to be balanced by explicit dput(). Fixes: e41d237818598 "qib_fs: switch to simple_recursive_removal()" Fucked-up-by: Al Viro Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qib/qib_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index a973905afd132..182a89bb24ef4 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -440,6 +440,7 @@ static int remove_device_files(struct super_block *sb, return PTR_ERR(dir); } simple_recursive_removal(dir, NULL); + dput(dir); return 0; } -- GitLab From 179db49d7e1ae25e437f4e2a9c016c87b2ab5a8a Mon Sep 17 00:00:00 2001 From: Paul Davey Date: Tue, 23 Apr 2024 18:00:24 +1200 Subject: [PATCH 2143/2290] xfrm: Preserve vlan tags for transport mode software GRO [ Upstream commit 58fbfecab965014b6e3cc956a76b4a96265a1add ] The software GRO path for esp transport mode uses skb_mac_header_rebuild prior to re-injecting the packet via the xfrm_napi_dev. This only copies skb->mac_len bytes of header which may not be sufficient if the packet contains 802.1Q tags or other VLAN tags. Worse copying only the initial header will leave a packet marked as being VLAN tagged but without the corresponding tag leading to mangling when it is later untagged. The VLAN tags are important when receiving the decrypted esp transport mode packet after GRO processing to ensure it is received on the correct interface. Therefore record the full mac header length in xfrm*_transport_input for later use in corresponding xfrm*_transport_finish to copy the entire mac header when rebuilding the mac header for GRO. The skb->data pointer is left pointing skb->mac_header bytes after the start of the mac header as is expected by the network stack and network and transport header offsets reset to this location. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Signed-off-by: Paul Davey Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 15 +++++++++++++++ include/net/xfrm.h | 3 +++ net/ipv4/xfrm4_input.c | 6 +++++- net/ipv6/xfrm6_input.c | 6 +++++- net/xfrm/xfrm_input.c | 8 ++++++++ 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d5f888fe0e331..cecd3b6bebb8b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2927,6 +2927,21 @@ static inline void skb_mac_header_rebuild(struct sk_buff *skb) } } +/* Move the full mac header up to current network_header. + * Leaves skb->data pointing at offset skb->mac_len into the mac_header. + * Must be provided the complete mac header length. + */ +static inline void skb_mac_header_rebuild_full(struct sk_buff *skb, u32 full_mac_len) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -full_mac_len); + memmove(skb_mac_header(skb), old_mac, full_mac_len); + __skb_push(skb, full_mac_len - skb->mac_len); + } +} + static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9ec6f2e92ad3a..5b9c2c535702c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1032,6 +1032,9 @@ struct xfrm_offload { #define CRYPTO_INVALID_PACKET_SYNTAX 64 #define CRYPTO_INVALID_PROTOCOL 128 + /* Used to keep whole l2 header for transport mode GRO */ + __u32 orig_mac_len; + __u8 proto; __u8 inner_ipproto; }; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 183f6dc372429..f6e90ba50b639 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -61,7 +61,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) ip_send_check(iph); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4156387248e40..8432b50d9ce4c 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -56,7 +56,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) skb_postpush_rcsum(skb, skb_network_header(skb), nhlen); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index d0320e35accbf..4bba890ff3bc0 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -388,11 +388,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) */ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); @@ -403,11 +407,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - -- GitLab From ad702338fe423cb1e79745787090317256a98dab Mon Sep 17 00:00:00 2001 From: "Boy.Wu" Date: Mon, 15 Apr 2024 05:21:55 +0100 Subject: [PATCH 2144/2290] ARM: 9381/1: kasan: clear stale stack poison [ Upstream commit c4238686f9093b98bd6245a348bcf059cdce23af ] We found below OOB crash: [ 33.452494] ================================================================== [ 33.453513] BUG: KASAN: stack-out-of-bounds in refresh_cpu_vm_stats.constprop.0+0xcc/0x2ec [ 33.454660] Write of size 164 at addr c1d03d30 by task swapper/0/0 [ 33.455515] [ 33.455767] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 6.1.25-mainline #1 [ 33.456880] Hardware name: Generic DT based system [ 33.457555] unwind_backtrace from show_stack+0x18/0x1c [ 33.458326] show_stack from dump_stack_lvl+0x40/0x4c [ 33.459072] dump_stack_lvl from print_report+0x158/0x4a4 [ 33.459863] print_report from kasan_report+0x9c/0x148 [ 33.460616] kasan_report from kasan_check_range+0x94/0x1a0 [ 33.461424] kasan_check_range from memset+0x20/0x3c [ 33.462157] memset from refresh_cpu_vm_stats.constprop.0+0xcc/0x2ec [ 33.463064] refresh_cpu_vm_stats.constprop.0 from tick_nohz_idle_stop_tick+0x180/0x53c [ 33.464181] tick_nohz_idle_stop_tick from do_idle+0x264/0x354 [ 33.465029] do_idle from cpu_startup_entry+0x20/0x24 [ 33.465769] cpu_startup_entry from rest_init+0xf0/0xf4 [ 33.466528] rest_init from arch_post_acpi_subsys_init+0x0/0x18 [ 33.467397] [ 33.467644] The buggy address belongs to stack of task swapper/0/0 [ 33.468493] and is located at offset 112 in frame: [ 33.469172] refresh_cpu_vm_stats.constprop.0+0x0/0x2ec [ 33.469917] [ 33.470165] This frame has 2 objects: [ 33.470696] [32, 76) 'global_zone_diff' [ 33.470729] [112, 276) 'global_node_diff' [ 33.471294] [ 33.472095] The buggy address belongs to the physical page: [ 33.472862] page:3cd72da8 refcount:1 mapcount:0 mapping:00000000 index:0x0 pfn:0x41d03 [ 33.473944] flags: 0x1000(reserved|zone=0) [ 33.474565] raw: 00001000 ed741470 ed741470 00000000 00000000 00000000 ffffffff 00000001 [ 33.475656] raw: 00000000 [ 33.476050] page dumped because: kasan: bad access detected [ 33.476816] [ 33.477061] Memory state around the buggy address: [ 33.477732] c1d03c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 33.478630] c1d03c80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00 [ 33.479526] >c1d03d00: 00 04 f2 f2 f2 f2 00 00 00 00 00 00 f1 f1 f1 f1 [ 33.480415] ^ [ 33.481195] c1d03d80: 00 00 00 00 00 00 00 00 00 00 04 f3 f3 f3 f3 f3 [ 33.482088] c1d03e00: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 [ 33.482978] ================================================================== We find the root cause of this OOB is that arm does not clear stale stack poison in the case of cpuidle. This patch refer to arch/arm64/kernel/sleep.S to resolve this issue. From cited commit [1] that explain the problem Functions which the compiler has instrumented for KASAN place poison on the stack shadow upon entry and remove this poison prior to returning. In the case of cpuidle, CPUs exit the kernel a number of levels deep in C code. Any instrumented functions on this critical path will leave portions of the stack shadow poisoned. If CPUs lose context and return to the kernel via a cold path, we restore a prior context saved in __cpu_suspend_enter are forgotten, and we never remove the poison they placed in the stack shadow area by functions calls between this and the actual exit of the kernel. Thus, (depending on stackframe layout) subsequent calls to instrumented functions may hit this stale poison, resulting in (spurious) KASAN splats to the console. To avoid this, clear any stale poison from the idle thread for a CPU prior to bringing a CPU online. From cited commit [2] Extend to check for CONFIG_KASAN_STACK [1] commit 0d97e6d8024c ("arm64: kasan: clear stale stack poison") [2] commit d56a9ef84bd0 ("kasan, arm64: unpoison stack only with CONFIG_KASAN_STACK") Signed-off-by: Boy Wu Reviewed-by: Mark Rutland Acked-by: Andrey Ryabinin Reviewed-by: Linus Walleij Fixes: 5615f69bc209 ("ARM: 9016/2: Initialize the mapping of KASan shadow memory") Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/kernel/sleep.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index a86a1d4f34618..93afd1005b43c 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -127,6 +127,10 @@ cpu_resume_after_mmu: instr_sync #endif bl cpu_init @ restore the und/abt/irq banked regs +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) + mov r0, sp + bl kasan_unpoison_task_stack_below +#endif mov r0, #0 @ return zero on success ldmfd sp!, {r4 - r11, pc} ENDPROC(cpu_resume_after_mmu) -- GitLab From 3fe4ef0568a48369b1891395d13ac593b1ba41b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 May 2024 12:54:48 +0000 Subject: [PATCH 2145/2290] tcp: defer shutdown(SEND_SHUTDOWN) for TCP_SYN_RECV sockets [ Upstream commit 94062790aedb505bdda209b10bea47b294d6394f ] TCP_SYN_RECV state is really special, it is only used by cross-syn connections, mostly used by fuzzers. In the following crash [1], syzbot managed to trigger a divide by zero in tcp_rcv_space_adjust() A socket makes the following state transitions, without ever calling tcp_init_transfer(), meaning tcp_init_buffer_space() is also not called. TCP_CLOSE connect() TCP_SYN_SENT TCP_SYN_RECV shutdown() -> tcp_shutdown(sk, SEND_SHUTDOWN) TCP_FIN_WAIT1 To fix this issue, change tcp_shutdown() to not perform a TCP_SYN_RECV -> TCP_FIN_WAIT1 transition, which makes no sense anyway. When tcp_rcv_state_process() later changes socket state from TCP_SYN_RECV to TCP_ESTABLISH, then look at sk->sk_shutdown to finally enter TCP_FIN_WAIT1 state, and send a FIN packet from a sane socket state. This means tcp_send_fin() can now be called from BH context, and must use GFP_ATOMIC allocations. [1] divide error: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 1 PID: 5084 Comm: syz-executor358 Not tainted 6.9.0-rc6-syzkaller-00022-g98369dccd2f8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 RIP: 0010:tcp_rcv_space_adjust+0x2df/0x890 net/ipv4/tcp_input.c:767 Code: e3 04 4c 01 eb 48 8b 44 24 38 0f b6 04 10 84 c0 49 89 d5 0f 85 a5 03 00 00 41 8b 8e c8 09 00 00 89 e8 29 c8 48 0f af c3 31 d2 <48> f7 f1 48 8d 1c 43 49 8d 96 76 08 00 00 48 89 d0 48 c1 e8 03 48 RSP: 0018:ffffc900031ef3f0 EFLAGS: 00010246 RAX: 0c677a10441f8f42 RBX: 000000004fb95e7e RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000027d4b11f R08: ffffffff89e535a4 R09: 1ffffffff25e6ab7 R10: dffffc0000000000 R11: ffffffff8135e920 R12: ffff88802a9f8d30 R13: dffffc0000000000 R14: ffff88802a9f8d00 R15: 1ffff1100553f2da FS: 00005555775c0380(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1155bf2304 CR3: 000000002b9f2000 CR4: 0000000000350ef0 Call Trace: tcp_recvmsg_locked+0x106d/0x25a0 net/ipv4/tcp.c:2513 tcp_recvmsg+0x25d/0x920 net/ipv4/tcp.c:2578 inet6_recvmsg+0x16a/0x730 net/ipv6/af_inet6.c:680 sock_recvmsg_nosec net/socket.c:1046 [inline] sock_recvmsg+0x109/0x280 net/socket.c:1068 ____sys_recvmsg+0x1db/0x470 net/socket.c:2803 ___sys_recvmsg net/socket.c:2845 [inline] do_recvmmsg+0x474/0xae0 net/socket.c:2939 __sys_recvmmsg net/socket.c:3018 [inline] __do_sys_recvmmsg net/socket.c:3041 [inline] __se_sys_recvmmsg net/socket.c:3034 [inline] __x64_sys_recvmmsg+0x199/0x250 net/socket.c:3034 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7faeb6363db9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffcc1997168 EFLAGS: 00000246 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007faeb6363db9 RDX: 0000000000000001 RSI: 0000000020000bc0 RDI: 0000000000000005 RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000000001c R10: 0000000000000122 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000001 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240501125448.896529-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 2 ++ net/ipv4/tcp_output.c | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f01c0a5d2c37b..3447a09ee83a2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2831,7 +2831,7 @@ void tcp_shutdown(struct sock *sk, int how) /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { + TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); @@ -2940,7 +2940,7 @@ void __tcp_close(struct sock *sk, long timeout) * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 - * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) + * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 34460c9b37ae2..4c9da94553365 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6597,6 +6597,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); + if (sk->sk_shutdown & SEND_SHUTDOWN) + tcp_shutdown(sk, SEND_SHUTDOWN); break; case TCP_FIN_WAIT1: { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 67087da45a1f7..15f814c1e1693 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3480,7 +3480,9 @@ void tcp_send_fin(struct sock *sk) return; } } else { - skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + skb = alloc_skb_fclone(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | + __GFP_NOWARN)); if (unlikely(!skb)) return; -- GitLab From 13ed7cdf079686ccd3618335205700c03f6fb446 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 1 May 2024 14:31:45 -0700 Subject: [PATCH 2146/2290] tcp: Use refcount_inc_not_zero() in tcp_twsk_unique(). [ Upstream commit f2db7230f73a80dbb179deab78f88a7947f0ab7e ] Anderson Nascimento reported a use-after-free splat in tcp_twsk_unique() with nice analysis. Since commit ec94c2696f0b ("tcp/dccp: avoid one atomic operation for timewait hashdance"), inet_twsk_hashdance() sets TIME-WAIT socket's sk_refcnt after putting it into ehash and releasing the bucket lock. Thus, there is a small race window where other threads could try to reuse the port during connect() and call sock_hold() in tcp_twsk_unique() for the TIME-WAIT socket with zero refcnt. If that happens, the refcnt taken by tcp_twsk_unique() is overwritten and sock_put() will cause underflow, triggering a real use-after-free somewhere else. To avoid the use-after-free, we need to use refcount_inc_not_zero() in tcp_twsk_unique() and give up on reusing the port if it returns false. [0]: refcount_t: addition on 0; use-after-free. WARNING: CPU: 0 PID: 1039313 at lib/refcount.c:25 refcount_warn_saturate+0xe5/0x110 CPU: 0 PID: 1039313 Comm: trigger Not tainted 6.8.6-200.fc39.x86_64 #1 Hardware name: VMware, Inc. VMware20,1/440BX Desktop Reference Platform, BIOS VMW201.00V.21805430.B64.2305221830 05/22/2023 RIP: 0010:refcount_warn_saturate+0xe5/0x110 Code: 42 8e ff 0f 0b c3 cc cc cc cc 80 3d aa 13 ea 01 00 0f 85 5e ff ff ff 48 c7 c7 f8 8e b7 82 c6 05 96 13 ea 01 01 e8 7b 42 8e ff <0f> 0b c3 cc cc cc cc 48 c7 c7 50 8f b7 82 c6 05 7a 13 ea 01 01 e8 RSP: 0018:ffffc90006b43b60 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff888009bb3ef0 RCX: 0000000000000027 RDX: ffff88807be218c8 RSI: 0000000000000001 RDI: ffff88807be218c0 RBP: 0000000000069d70 R08: 0000000000000000 R09: ffffc90006b439f0 R10: ffffc90006b439e8 R11: 0000000000000003 R12: ffff8880029ede84 R13: 0000000000004e20 R14: ffffffff84356dc0 R15: ffff888009bb3ef0 FS: 00007f62c10926c0(0000) GS:ffff88807be00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020ccb000 CR3: 000000004628c005 CR4: 0000000000f70ef0 PKRU: 55555554 Call Trace: ? refcount_warn_saturate+0xe5/0x110 ? __warn+0x81/0x130 ? refcount_warn_saturate+0xe5/0x110 ? report_bug+0x171/0x1a0 ? refcount_warn_saturate+0xe5/0x110 ? handle_bug+0x3c/0x80 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? refcount_warn_saturate+0xe5/0x110 tcp_twsk_unique+0x186/0x190 __inet_check_established+0x176/0x2d0 __inet_hash_connect+0x74/0x7d0 ? __pfx___inet_check_established+0x10/0x10 tcp_v4_connect+0x278/0x530 __inet_stream_connect+0x10f/0x3d0 inet_stream_connect+0x3a/0x60 __sys_connect+0xa8/0xd0 __x64_sys_connect+0x18/0x20 do_syscall_64+0x83/0x170 entry_SYSCALL_64_after_hwframe+0x78/0x80 RIP: 0033:0x7f62c11a885d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d a3 45 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007f62c1091e58 EFLAGS: 00000296 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000020ccb004 RCX: 00007f62c11a885d RDX: 0000000000000010 RSI: 0000000020ccb000 RDI: 0000000000000003 RBP: 00007f62c1091e90 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000296 R12: 00007f62c10926c0 R13: ffffffffffffff88 R14: 0000000000000000 R15: 00007ffe237885b0 Fixes: ec94c2696f0b ("tcp/dccp: avoid one atomic operation for timewait hashdance") Reported-by: Anderson Nascimento Closes: https://lore.kernel.org/netdev/37a477a6-d39e-486b-9577-3463f655a6b7@allelesecurity.com/ Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240501213145.62261-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_ipv4.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index be2c807eed15d..5dcb969cb5e9c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -153,6 +153,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (tcptw->tw_ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), tcptw->tw_ts_recent_stamp)))) { + /* inet_twsk_hashdance() sets sk_refcnt after putting twsk + * and releasing the bucket lock. + */ + if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt))) + return 0; + /* In case of repair and re-using TIME-WAIT sockets we still * want to be sure that it is safe as above but honor the * sequence numbers and time stamps set as part of the repair @@ -173,7 +179,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) tp->rx_opt.ts_recent = tcptw->tw_ts_recent; tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; } - sock_hold(sktw); + return 1; } -- GitLab From bfab2c1f7940a232cd519e82fff137e308abfd93 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 25 Apr 2024 22:23:45 +0800 Subject: [PATCH 2147/2290] Bluetooth: Fix use-after-free bugs caused by sco_sock_timeout [ Upstream commit 483bc08181827fc475643272ffb69c533007e546 ] When the sco connection is established and then, the sco socket is releasing, timeout_work will be scheduled to judge whether the sco disconnection is timeout. The sock will be deallocated later, but it is dereferenced again in sco_sock_timeout. As a result, the use-after-free bugs will happen. The root cause is shown below: Cleanup Thread | Worker Thread sco_sock_release | sco_sock_close | __sco_sock_close | sco_sock_set_timer | schedule_delayed_work | sco_sock_kill | (wait a time) sock_put(sk) //FREE | sco_sock_timeout | sock_hold(sk) //USE The KASAN report triggered by POC is shown below: [ 95.890016] ================================================================== [ 95.890496] BUG: KASAN: slab-use-after-free in sco_sock_timeout+0x5e/0x1c0 [ 95.890755] Write of size 4 at addr ffff88800c388080 by task kworker/0:0/7 ... [ 95.890755] Workqueue: events sco_sock_timeout [ 95.890755] Call Trace: [ 95.890755] [ 95.890755] dump_stack_lvl+0x45/0x110 [ 95.890755] print_address_description+0x78/0x390 [ 95.890755] print_report+0x11b/0x250 [ 95.890755] ? __virt_addr_valid+0xbe/0xf0 [ 95.890755] ? sco_sock_timeout+0x5e/0x1c0 [ 95.890755] kasan_report+0x139/0x170 [ 95.890755] ? update_load_avg+0xe5/0x9f0 [ 95.890755] ? sco_sock_timeout+0x5e/0x1c0 [ 95.890755] kasan_check_range+0x2c3/0x2e0 [ 95.890755] sco_sock_timeout+0x5e/0x1c0 [ 95.890755] process_one_work+0x561/0xc50 [ 95.890755] worker_thread+0xab2/0x13c0 [ 95.890755] ? pr_cont_work+0x490/0x490 [ 95.890755] kthread+0x279/0x300 [ 95.890755] ? pr_cont_work+0x490/0x490 [ 95.890755] ? kthread_blkcg+0xa0/0xa0 [ 95.890755] ret_from_fork+0x34/0x60 [ 95.890755] ? kthread_blkcg+0xa0/0xa0 [ 95.890755] ret_from_fork_asm+0x11/0x20 [ 95.890755] [ 95.890755] [ 95.890755] Allocated by task 506: [ 95.890755] kasan_save_track+0x3f/0x70 [ 95.890755] __kasan_kmalloc+0x86/0x90 [ 95.890755] __kmalloc+0x17f/0x360 [ 95.890755] sk_prot_alloc+0xe1/0x1a0 [ 95.890755] sk_alloc+0x31/0x4e0 [ 95.890755] bt_sock_alloc+0x2b/0x2a0 [ 95.890755] sco_sock_create+0xad/0x320 [ 95.890755] bt_sock_create+0x145/0x320 [ 95.890755] __sock_create+0x2e1/0x650 [ 95.890755] __sys_socket+0xd0/0x280 [ 95.890755] __x64_sys_socket+0x75/0x80 [ 95.890755] do_syscall_64+0xc4/0x1b0 [ 95.890755] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 95.890755] [ 95.890755] Freed by task 506: [ 95.890755] kasan_save_track+0x3f/0x70 [ 95.890755] kasan_save_free_info+0x40/0x50 [ 95.890755] poison_slab_object+0x118/0x180 [ 95.890755] __kasan_slab_free+0x12/0x30 [ 95.890755] kfree+0xb2/0x240 [ 95.890755] __sk_destruct+0x317/0x410 [ 95.890755] sco_sock_release+0x232/0x280 [ 95.890755] sock_close+0xb2/0x210 [ 95.890755] __fput+0x37f/0x770 [ 95.890755] task_work_run+0x1ae/0x210 [ 95.890755] get_signal+0xe17/0xf70 [ 95.890755] arch_do_signal_or_restart+0x3f/0x520 [ 95.890755] syscall_exit_to_user_mode+0x55/0x120 [ 95.890755] do_syscall_64+0xd1/0x1b0 [ 95.890755] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 95.890755] [ 95.890755] The buggy address belongs to the object at ffff88800c388000 [ 95.890755] which belongs to the cache kmalloc-1k of size 1024 [ 95.890755] The buggy address is located 128 bytes inside of [ 95.890755] freed 1024-byte region [ffff88800c388000, ffff88800c388400) [ 95.890755] [ 95.890755] The buggy address belongs to the physical page: [ 95.890755] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88800c38a800 pfn:0xc388 [ 95.890755] head: order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 95.890755] anon flags: 0x100000000000840(slab|head|node=0|zone=1) [ 95.890755] page_type: 0xffffffff() [ 95.890755] raw: 0100000000000840 ffff888006842dc0 0000000000000000 0000000000000001 [ 95.890755] raw: ffff88800c38a800 000000000010000a 00000001ffffffff 0000000000000000 [ 95.890755] head: 0100000000000840 ffff888006842dc0 0000000000000000 0000000000000001 [ 95.890755] head: ffff88800c38a800 000000000010000a 00000001ffffffff 0000000000000000 [ 95.890755] head: 0100000000000003 ffffea000030e201 ffffea000030e248 00000000ffffffff [ 95.890755] head: 0000000800000000 0000000000000000 00000000ffffffff 0000000000000000 [ 95.890755] page dumped because: kasan: bad access detected [ 95.890755] [ 95.890755] Memory state around the buggy address: [ 95.890755] ffff88800c387f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 95.890755] ffff88800c388000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 95.890755] >ffff88800c388080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 95.890755] ^ [ 95.890755] ffff88800c388100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 95.890755] ffff88800c388180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 95.890755] ================================================================== Fix this problem by adding a check protected by sco_conn_lock to judget whether the conn->hcon is null. Because the conn->hcon will be set to null, when the sock is releasing. Fixes: ba316be1b6a0 ("Bluetooth: schedule SCO timeouts with delayed_work") Signed-off-by: Duoming Zhou Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/sco.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4a6bf60f3e7aa..301cf802d32c4 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -84,6 +84,10 @@ static void sco_sock_timeout(struct work_struct *work) struct sock *sk; sco_conn_lock(conn); + if (!conn->hcon) { + sco_conn_unlock(conn); + return; + } sk = conn->sk; if (sk) sock_hold(sk); -- GitLab From e3880b531b68f98d3941d83f2f6dd11cf4fd6b76 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Tue, 30 Apr 2024 12:20:51 -0400 Subject: [PATCH 2148/2290] Bluetooth: msft: fix slab-use-after-free in msft_do_close() [ Upstream commit 10f9f426ac6e752c8d87bf4346930ba347aaabac ] Tying the msft->data lifetime to hdev by freeing it in hci_release_dev() to fix the following case: [use] msft_do_close() msft = hdev->msft_data; if (!msft) ...(1) <- passed. return; mutex_lock(&msft->filter_lock); ...(4) <- used after freed. [free] msft_unregister() msft = hdev->msft_data; hdev->msft_data = NULL; ...(2) kfree(msft); ...(3) <- msft is freed. ================================================================== BUG: KASAN: slab-use-after-free in __mutex_lock_common kernel/locking/mutex.c:587 [inline] BUG: KASAN: slab-use-after-free in __mutex_lock+0x8f/0xc30 kernel/locking/mutex.c:752 Read of size 8 at addr ffff888106cbbca8 by task kworker/u5:2/309 Fixes: bf6a4e30ffbd ("Bluetooth: disable advertisement filters during suspend") Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 3 +-- net/bluetooth/msft.c | 2 +- net/bluetooth/msft.h | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 02e67ff05b7b4..d6be3cb86598e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2733,8 +2733,6 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_unregister_suspend_notifier(hdev); - msft_unregister(hdev); - hci_dev_do_close(hdev); if (!test_bit(HCI_INIT, &hdev->flags) && @@ -2788,6 +2786,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_discovery_filter_clear(hdev); hci_blocked_keys_clear(hdev); hci_codec_list_clear(&hdev->local_codecs); + msft_release(hdev); hci_dev_unlock(hdev); ida_simple_remove(&hci_index_ida, hdev->id); diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index bee6a4c656be4..076cf8bce4d9e 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -584,7 +584,7 @@ void msft_register(struct hci_dev *hdev) hdev->msft_data = msft; } -void msft_unregister(struct hci_dev *hdev) +void msft_release(struct hci_dev *hdev) { struct msft_data *msft = hdev->msft_data; diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index 2a63205b377b7..fe538e9c91c01 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -14,7 +14,7 @@ bool msft_monitor_supported(struct hci_dev *hdev); void msft_register(struct hci_dev *hdev); -void msft_unregister(struct hci_dev *hdev); +void msft_release(struct hci_dev *hdev); void msft_do_open(struct hci_dev *hdev); void msft_do_close(struct hci_dev *hdev); void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb); @@ -35,7 +35,7 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev) } static inline void msft_register(struct hci_dev *hdev) {} -static inline void msft_unregister(struct hci_dev *hdev) {} +static inline void msft_release(struct hci_dev *hdev) {} static inline void msft_do_open(struct hci_dev *hdev) {} static inline void msft_do_close(struct hci_dev *hdev) {} static inline void msft_vendor_evt(struct hci_dev *hdev, void *data, -- GitLab From 8960ff650aec70485b40771cd8e6e8c4cb467d33 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 2 May 2024 20:57:36 +0800 Subject: [PATCH 2149/2290] Bluetooth: l2cap: fix null-ptr-deref in l2cap_chan_timeout [ Upstream commit adf0398cee86643b8eacde95f17d073d022f782c ] There is a race condition between l2cap_chan_timeout() and l2cap_chan_del(). When we use l2cap_chan_del() to delete the channel, the chan->conn will be set to null. But the conn could be dereferenced again in the mutex_lock() of l2cap_chan_timeout(). As a result the null pointer dereference bug will happen. The KASAN report triggered by POC is shown below: [ 472.074580] ================================================================== [ 472.075284] BUG: KASAN: null-ptr-deref in mutex_lock+0x68/0xc0 [ 472.075308] Write of size 8 at addr 0000000000000158 by task kworker/0:0/7 [ 472.075308] [ 472.075308] CPU: 0 PID: 7 Comm: kworker/0:0 Not tainted 6.9.0-rc5-00356-g78c0094a146b #36 [ 472.075308] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu4 [ 472.075308] Workqueue: events l2cap_chan_timeout [ 472.075308] Call Trace: [ 472.075308] [ 472.075308] dump_stack_lvl+0x137/0x1a0 [ 472.075308] print_report+0x101/0x250 [ 472.075308] ? __virt_addr_valid+0x77/0x160 [ 472.075308] ? mutex_lock+0x68/0xc0 [ 472.075308] kasan_report+0x139/0x170 [ 472.075308] ? mutex_lock+0x68/0xc0 [ 472.075308] kasan_check_range+0x2c3/0x2e0 [ 472.075308] mutex_lock+0x68/0xc0 [ 472.075308] l2cap_chan_timeout+0x181/0x300 [ 472.075308] process_one_work+0x5d2/0xe00 [ 472.075308] worker_thread+0xe1d/0x1660 [ 472.075308] ? pr_cont_work+0x5e0/0x5e0 [ 472.075308] kthread+0x2b7/0x350 [ 472.075308] ? pr_cont_work+0x5e0/0x5e0 [ 472.075308] ? kthread_blkcg+0xd0/0xd0 [ 472.075308] ret_from_fork+0x4d/0x80 [ 472.075308] ? kthread_blkcg+0xd0/0xd0 [ 472.075308] ret_from_fork_asm+0x11/0x20 [ 472.075308] [ 472.075308] ================================================================== [ 472.094860] Disabling lock debugging due to kernel taint [ 472.096136] BUG: kernel NULL pointer dereference, address: 0000000000000158 [ 472.096136] #PF: supervisor write access in kernel mode [ 472.096136] #PF: error_code(0x0002) - not-present page [ 472.096136] PGD 0 P4D 0 [ 472.096136] Oops: 0002 [#1] PREEMPT SMP KASAN NOPTI [ 472.096136] CPU: 0 PID: 7 Comm: kworker/0:0 Tainted: G B 6.9.0-rc5-00356-g78c0094a146b #36 [ 472.096136] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu4 [ 472.096136] Workqueue: events l2cap_chan_timeout [ 472.096136] RIP: 0010:mutex_lock+0x88/0xc0 [ 472.096136] Code: be 08 00 00 00 e8 f8 23 1f fd 4c 89 f7 be 08 00 00 00 e8 eb 23 1f fd 42 80 3c 23 00 74 08 48 88 [ 472.096136] RSP: 0018:ffff88800744fc78 EFLAGS: 00000246 [ 472.096136] RAX: 0000000000000000 RBX: 1ffff11000e89f8f RCX: ffffffff8457c865 [ 472.096136] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88800744fc78 [ 472.096136] RBP: 0000000000000158 R08: ffff88800744fc7f R09: 1ffff11000e89f8f [ 472.096136] R10: dffffc0000000000 R11: ffffed1000e89f90 R12: dffffc0000000000 [ 472.096136] R13: 0000000000000158 R14: ffff88800744fc78 R15: ffff888007405a00 [ 472.096136] FS: 0000000000000000(0000) GS:ffff88806d200000(0000) knlGS:0000000000000000 [ 472.096136] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 472.096136] CR2: 0000000000000158 CR3: 000000000da32000 CR4: 00000000000006f0 [ 472.096136] Call Trace: [ 472.096136] [ 472.096136] ? __die_body+0x8d/0xe0 [ 472.096136] ? page_fault_oops+0x6b8/0x9a0 [ 472.096136] ? kernelmode_fixup_or_oops+0x20c/0x2a0 [ 472.096136] ? do_user_addr_fault+0x1027/0x1340 [ 472.096136] ? _printk+0x7a/0xa0 [ 472.096136] ? mutex_lock+0x68/0xc0 [ 472.096136] ? add_taint+0x42/0xd0 [ 472.096136] ? exc_page_fault+0x6a/0x1b0 [ 472.096136] ? asm_exc_page_fault+0x26/0x30 [ 472.096136] ? mutex_lock+0x75/0xc0 [ 472.096136] ? mutex_lock+0x88/0xc0 [ 472.096136] ? mutex_lock+0x75/0xc0 [ 472.096136] l2cap_chan_timeout+0x181/0x300 [ 472.096136] process_one_work+0x5d2/0xe00 [ 472.096136] worker_thread+0xe1d/0x1660 [ 472.096136] ? pr_cont_work+0x5e0/0x5e0 [ 472.096136] kthread+0x2b7/0x350 [ 472.096136] ? pr_cont_work+0x5e0/0x5e0 [ 472.096136] ? kthread_blkcg+0xd0/0xd0 [ 472.096136] ret_from_fork+0x4d/0x80 [ 472.096136] ? kthread_blkcg+0xd0/0xd0 [ 472.096136] ret_from_fork_asm+0x11/0x20 [ 472.096136] [ 472.096136] Modules linked in: [ 472.096136] CR2: 0000000000000158 [ 472.096136] ---[ end trace 0000000000000000 ]--- [ 472.096136] RIP: 0010:mutex_lock+0x88/0xc0 [ 472.096136] Code: be 08 00 00 00 e8 f8 23 1f fd 4c 89 f7 be 08 00 00 00 e8 eb 23 1f fd 42 80 3c 23 00 74 08 48 88 [ 472.096136] RSP: 0018:ffff88800744fc78 EFLAGS: 00000246 [ 472.096136] RAX: 0000000000000000 RBX: 1ffff11000e89f8f RCX: ffffffff8457c865 [ 472.096136] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88800744fc78 [ 472.096136] RBP: 0000000000000158 R08: ffff88800744fc7f R09: 1ffff11000e89f8f [ 472.132932] R10: dffffc0000000000 R11: ffffed1000e89f90 R12: dffffc0000000000 [ 472.132932] R13: 0000000000000158 R14: ffff88800744fc78 R15: ffff888007405a00 [ 472.132932] FS: 0000000000000000(0000) GS:ffff88806d200000(0000) knlGS:0000000000000000 [ 472.132932] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 472.132932] CR2: 0000000000000158 CR3: 000000000da32000 CR4: 00000000000006f0 [ 472.132932] Kernel panic - not syncing: Fatal exception [ 472.132932] Kernel Offset: disabled [ 472.132932] ---[ end Kernel panic - not syncing: Fatal exception ]--- Add a check to judge whether the conn is null in l2cap_chan_timeout() in order to mitigate the bug. Fixes: 3df91ea20e74 ("Bluetooth: Revert to mutexes from RCU list") Signed-off-by: Duoming Zhou Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b4cba55be5ad9..c34011113d4c5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -435,6 +435,9 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); + if (!conn) + return; + mutex_lock(&conn->chan_lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. -- GitLab From 8a3ff43dcbab7c96f9e8cf2bd1049ab8d6e59545 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 2 May 2024 20:32:59 +0200 Subject: [PATCH 2150/2290] net: ks8851: Queue RX packets in IRQ handler instead of disabling BHs [ Upstream commit e0863634bf9f7cf36291ebb5bfa2d16632f79c49 ] Currently the driver uses local_bh_disable()/local_bh_enable() in its IRQ handler to avoid triggering net_rx_action() softirq on exit from netif_rx(). The net_rx_action() could trigger this driver .start_xmit callback, which is protected by the same lock as the IRQ handler, so calling the .start_xmit from netif_rx() from the IRQ handler critical section protected by the lock could lead to an attempt to claim the already claimed lock, and a hang. The local_bh_disable()/local_bh_enable() approach works only in case the IRQ handler is protected by a spinlock, but does not work if the IRQ handler is protected by mutex, i.e. this works for KS8851 with Parallel bus interface, but not for KS8851 with SPI bus interface. Remove the BH manipulation and instead of calling netif_rx() inside the IRQ handler code protected by the lock, queue all the received SKBs in the IRQ handler into a queue first, and once the IRQ handler exits the critical section protected by the lock, dequeue all the queued SKBs and push them all into netif_rx(). At this point, it is safe to trigger the net_rx_action() softirq, since the netif_rx() call is outside of the lock that protects the IRQ handler. Fixes: be0384bf599c ("net: ks8851: Handle softirqs at the end of IRQ thread to fix hang") Tested-by: Ronald Wahl # KS8851 SPI Signed-off-by: Marek Vasut Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240502183436.117117-1-marex@denx.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_common.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index d4cdf3d4f5525..502518cdb4618 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -234,12 +234,13 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt) /** * ks8851_rx_pkts - receive packets from the host * @ks: The device information. + * @rxq: Queue of packets received in this function. * * This is called from the IRQ work queue when the system detects that there * are packets in the receive queue. Find out how many packets there are and * read them from the FIFO. */ -static void ks8851_rx_pkts(struct ks8851_net *ks) +static void ks8851_rx_pkts(struct ks8851_net *ks, struct sk_buff_head *rxq) { struct sk_buff *skb; unsigned rxfc; @@ -299,7 +300,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) ks8851_dbg_dumpkkt(ks, rxpkt); skb->protocol = eth_type_trans(skb, ks->netdev); - __netif_rx(skb); + __skb_queue_tail(rxq, skb); ks->netdev->stats.rx_packets++; ks->netdev->stats.rx_bytes += rxlen; @@ -326,11 +327,11 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) static irqreturn_t ks8851_irq(int irq, void *_ks) { struct ks8851_net *ks = _ks; + struct sk_buff_head rxq; unsigned handled = 0; unsigned long flags; unsigned int status; - - local_bh_disable(); + struct sk_buff *skb; ks8851_lock(ks, &flags); @@ -384,7 +385,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) * from the device so do not bother masking just the RX * from the device. */ - ks8851_rx_pkts(ks); + __skb_queue_head_init(&rxq); + ks8851_rx_pkts(ks, &rxq); } /* if something stopped the rx process, probably due to wanting @@ -408,7 +410,9 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); - local_bh_enable(); + if (status & IRQ_RXI) + while ((skb = __skb_dequeue(&rxq))) + netif_rx(skb); return IRQ_HANDLED; } -- GitLab From 6e4c7193954f4faab92f6e8d88bc5565317b44e7 Mon Sep 17 00:00:00 2001 From: Roded Zats Date: Thu, 2 May 2024 18:57:51 +0300 Subject: [PATCH 2151/2290] rtnetlink: Correct nested IFLA_VF_VLAN_LIST attribute validation [ Upstream commit 1aec77b2bb2ed1db0f5efc61c4c1ca3813307489 ] Each attribute inside a nested IFLA_VF_VLAN_LIST is assumed to be a struct ifla_vf_vlan_info so the size of such attribute needs to be at least of sizeof(struct ifla_vf_vlan_info) which is 14 bytes. The current size validation in do_setvfinfo is against NLA_HDRLEN (4 bytes) which is less than sizeof(struct ifla_vf_vlan_info) so this validation is not enough and a too small attribute might be cast to a struct ifla_vf_vlan_info, this might result in an out of bands read access when accessing the saved (casted) entry in ivvl. Fixes: 79aab093a0b5 ("net: Update API for VF vlan protocol 802.1ad support") Signed-off-by: Roded Zats Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240502155751.75705-1-rzats@paloaltonetworks.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ac379e4590f8d..80169afb888d2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2443,7 +2443,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { if (nla_type(attr) != IFLA_VF_VLAN_INFO || - nla_len(attr) < NLA_HDRLEN) { + nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) { return -EINVAL; } if (len >= MAX_VLAN_LIST_LEN) -- GitLab From 95c5fc883537594b47dd027f0e769cba95cfc20c Mon Sep 17 00:00:00 2001 From: Aleksa Savic Date: Sat, 4 May 2024 11:25:01 +0200 Subject: [PATCH 2152/2290] hwmon: (corsair-cpro) Use a separate buffer for sending commands [ Upstream commit e0cd85dc666cb08e1bd313d560cb4eff4d04219e ] Introduce cmd_buffer, a separate buffer for storing only the command that is sent to the device. Before this separation, the existing buffer was shared for both the command and the report received in ccp_raw_event(), which was copied into it. However, because of hidraw, the raw event parsing may be triggered in the middle of sending a command, resulting in outputting gibberish to the device. Using a separate buffer resolves this. Fixes: 40c3a4454225 ("hwmon: add Corsair Commander Pro driver") Signed-off-by: Aleksa Savic Acked-by: Marius Zachmann Link: https://lore.kernel.org/r/20240504092504.24158-2-savicaleksa83@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/corsair-cpro.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/hwmon/corsair-cpro.c b/drivers/hwmon/corsair-cpro.c index fa6aa4fc8b521..0a9cbb556188f 100644 --- a/drivers/hwmon/corsair-cpro.c +++ b/drivers/hwmon/corsair-cpro.c @@ -79,6 +79,7 @@ struct ccp_device { struct device *hwmon_dev; struct completion wait_input_report; struct mutex mutex; /* whenever buffer is used, lock before send_usb_cmd */ + u8 *cmd_buffer; u8 *buffer; int target[6]; DECLARE_BITMAP(temp_cnct, NUM_TEMP_SENSORS); @@ -111,15 +112,15 @@ static int send_usb_cmd(struct ccp_device *ccp, u8 command, u8 byte1, u8 byte2, unsigned long t; int ret; - memset(ccp->buffer, 0x00, OUT_BUFFER_SIZE); - ccp->buffer[0] = command; - ccp->buffer[1] = byte1; - ccp->buffer[2] = byte2; - ccp->buffer[3] = byte3; + memset(ccp->cmd_buffer, 0x00, OUT_BUFFER_SIZE); + ccp->cmd_buffer[0] = command; + ccp->cmd_buffer[1] = byte1; + ccp->cmd_buffer[2] = byte2; + ccp->cmd_buffer[3] = byte3; reinit_completion(&ccp->wait_input_report); - ret = hid_hw_output_report(ccp->hdev, ccp->buffer, OUT_BUFFER_SIZE); + ret = hid_hw_output_report(ccp->hdev, ccp->cmd_buffer, OUT_BUFFER_SIZE); if (ret < 0) return ret; @@ -492,7 +493,11 @@ static int ccp_probe(struct hid_device *hdev, const struct hid_device_id *id) if (!ccp) return -ENOMEM; - ccp->buffer = devm_kmalloc(&hdev->dev, OUT_BUFFER_SIZE, GFP_KERNEL); + ccp->cmd_buffer = devm_kmalloc(&hdev->dev, OUT_BUFFER_SIZE, GFP_KERNEL); + if (!ccp->cmd_buffer) + return -ENOMEM; + + ccp->buffer = devm_kmalloc(&hdev->dev, IN_BUFFER_SIZE, GFP_KERNEL); if (!ccp->buffer) return -ENOMEM; -- GitLab From a6c70251c984b2df0be4946adb894909af2f866c Mon Sep 17 00:00:00 2001 From: Aleksa Savic Date: Sat, 4 May 2024 11:25:02 +0200 Subject: [PATCH 2153/2290] hwmon: (corsair-cpro) Use complete_all() instead of complete() in ccp_raw_event() [ Upstream commit 3a034a7b0715eb51124a5263890b1ed39978ed3a ] In ccp_raw_event(), the ccp->wait_input_report completion is completed once. Since we're waiting for exactly one report in send_usb_cmd(), use complete_all() instead of complete() to mark the completion as spent. Fixes: 40c3a4454225 ("hwmon: add Corsair Commander Pro driver") Signed-off-by: Aleksa Savic Acked-by: Marius Zachmann Link: https://lore.kernel.org/r/20240504092504.24158-3-savicaleksa83@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/corsair-cpro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/corsair-cpro.c b/drivers/hwmon/corsair-cpro.c index 0a9cbb556188f..543a741fe5473 100644 --- a/drivers/hwmon/corsair-cpro.c +++ b/drivers/hwmon/corsair-cpro.c @@ -140,7 +140,7 @@ static int ccp_raw_event(struct hid_device *hdev, struct hid_report *report, u8 return 0; memcpy(ccp->buffer, data, min(IN_BUFFER_SIZE, size)); - complete(&ccp->wait_input_report); + complete_all(&ccp->wait_input_report); return 0; } -- GitLab From 544895ba0231e15b6886f6e9faeee0cc342f0026 Mon Sep 17 00:00:00 2001 From: Aleksa Savic Date: Sat, 4 May 2024 11:25:03 +0200 Subject: [PATCH 2154/2290] hwmon: (corsair-cpro) Protect ccp->wait_input_report with a spinlock [ Upstream commit d02abd57e79469a026213f7f5827a98d909f236a ] Through hidraw, userspace can cause a status report to be sent from the device. The parsing in ccp_raw_event() may happen in parallel to a send_usb_cmd() call (which resets the completion for tracking the report) if it's running on a different CPU where bottom half interrupts are not disabled. Add a spinlock around the complete_all() in ccp_raw_event() and reinit_completion() in send_usb_cmd() to prevent race issues. Fixes: 40c3a4454225 ("hwmon: add Corsair Commander Pro driver") Signed-off-by: Aleksa Savic Acked-by: Marius Zachmann Link: https://lore.kernel.org/r/20240504092504.24158-4-savicaleksa83@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/corsair-cpro.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/corsair-cpro.c b/drivers/hwmon/corsair-cpro.c index 543a741fe5473..486fb6a8c3566 100644 --- a/drivers/hwmon/corsair-cpro.c +++ b/drivers/hwmon/corsair-cpro.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #define USB_VENDOR_ID_CORSAIR 0x1b1c @@ -77,6 +78,8 @@ struct ccp_device { struct hid_device *hdev; struct device *hwmon_dev; + /* For reinitializing the completion below */ + spinlock_t wait_input_report_lock; struct completion wait_input_report; struct mutex mutex; /* whenever buffer is used, lock before send_usb_cmd */ u8 *cmd_buffer; @@ -118,7 +121,15 @@ static int send_usb_cmd(struct ccp_device *ccp, u8 command, u8 byte1, u8 byte2, ccp->cmd_buffer[2] = byte2; ccp->cmd_buffer[3] = byte3; + /* + * Disable raw event parsing for a moment to safely reinitialize the + * completion. Reinit is done because hidraw could have triggered + * the raw event parsing and marked the ccp->wait_input_report + * completion as done. + */ + spin_lock_bh(&ccp->wait_input_report_lock); reinit_completion(&ccp->wait_input_report); + spin_unlock_bh(&ccp->wait_input_report_lock); ret = hid_hw_output_report(ccp->hdev, ccp->cmd_buffer, OUT_BUFFER_SIZE); if (ret < 0) @@ -136,11 +147,12 @@ static int ccp_raw_event(struct hid_device *hdev, struct hid_report *report, u8 struct ccp_device *ccp = hid_get_drvdata(hdev); /* only copy buffer when requested */ - if (completion_done(&ccp->wait_input_report)) - return 0; - - memcpy(ccp->buffer, data, min(IN_BUFFER_SIZE, size)); - complete_all(&ccp->wait_input_report); + spin_lock(&ccp->wait_input_report_lock); + if (!completion_done(&ccp->wait_input_report)) { + memcpy(ccp->buffer, data, min(IN_BUFFER_SIZE, size)); + complete_all(&ccp->wait_input_report); + } + spin_unlock(&ccp->wait_input_report_lock); return 0; } @@ -515,7 +527,9 @@ static int ccp_probe(struct hid_device *hdev, const struct hid_device_id *id) ccp->hdev = hdev; hid_set_drvdata(hdev, ccp); + mutex_init(&ccp->mutex); + spin_lock_init(&ccp->wait_input_report_lock); init_completion(&ccp->wait_input_report); hid_device_io_start(hdev); -- GitLab From 728a83160f98ee6b60df0d890141b9b7240182fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 May 2024 16:17:00 +0000 Subject: [PATCH 2155/2290] phonet: fix rtm_phonet_notify() skb allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d8cac8568618dcb8a51af3db1103e8d4cc4aeea7 ] fill_route() stores three components in the skb: - struct rtmsg - RTA_DST (u8) - RTA_OIF (u32) Therefore, rtm_phonet_notify() should use NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(1) + nla_total_size(4) Fixes: f062f41d0657 ("Phonet: routing table Netlink interface") Signed-off-by: Eric Dumazet Acked-by: Rémi Denis-Courmont Link: https://lore.kernel.org/r/20240502161700.1804476-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/phonet/pn_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 59aebe2968907..dd4c7e9a634fb 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(1) + nla_total_size(4), GFP_KERNEL); if (skb == NULL) goto errout; -- GitLab From bd7869634807fb89c8e746015d757f0bef08ad69 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 5 May 2024 20:42:38 +0200 Subject: [PATCH 2156/2290] net: bridge: fix corrupted ethernet header on multicast-to-unicast [ Upstream commit 86b29d830ad69eecff25b22dc96c14c6573718e6 ] The change from skb_copy to pskb_copy unfortunately changed the data copying to omit the ethernet header, since it was pulled before reaching this point. Fix this by calling __skb_push/pull around pskb_copy. Fixes: 59c878cbcdd8 ("net: bridge: fix multicast-to-unicast with fraglist GSO") Signed-off-by: Felix Fietkau Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_forward.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 982e7a9ccc41c..9661698e86e40 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -253,6 +253,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; const unsigned char *src = eth_hdr(skb)->h_source; + struct sk_buff *nskb; if (!should_deliver(p, skb)) return; @@ -261,12 +262,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; - skb = pskb_copy(skb, GFP_ATOMIC); - if (!skb) { + __skb_push(skb, ETH_HLEN); + nskb = pskb_copy(skb, GFP_ATOMIC); + __skb_pull(skb, ETH_HLEN); + if (!nskb) { DEV_STATS_INC(dev, tx_dropped); return; } + skb = nskb; + __skb_pull(skb, ETH_HLEN); if (!is_broadcast_ether_addr(addr)) memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); -- GitLab From 7e3242c139c38e60844638e394c2877b16b396b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 May 2024 16:31:45 +0000 Subject: [PATCH 2157/2290] ipv6: fib6_rules: avoid possible NULL dereference in fib6_rule_action() [ Upstream commit d101291b2681e5ab938554e3e323f7a7ee33e3aa ] syzbot is able to trigger the following crash [1], caused by unsafe ip6_dst_idev() use. Indeed ip6_dst_idev() can return NULL, and must always be checked. [1] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 31648 Comm: syz-executor.0 Not tainted 6.9.0-rc4-next-20240417-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 RIP: 0010:__fib6_rule_action net/ipv6/fib6_rules.c:237 [inline] RIP: 0010:fib6_rule_action+0x241/0x7b0 net/ipv6/fib6_rules.c:267 Code: 02 00 00 49 8d 9f d8 00 00 00 48 89 d8 48 c1 e8 03 42 80 3c 20 00 74 08 48 89 df e8 f9 32 bf f7 48 8b 1b 48 89 d8 48 c1 e8 03 <42> 80 3c 20 00 74 08 48 89 df e8 e0 32 bf f7 4c 8b 03 48 89 ef 4c RSP: 0018:ffffc9000fc1f2f0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 1a772f98c8186700 RDX: 0000000000000003 RSI: ffffffff8bcac4e0 RDI: ffffffff8c1f9760 RBP: ffff8880673fb980 R08: ffffffff8fac15ef R09: 1ffffffff1f582bd R10: dffffc0000000000 R11: fffffbfff1f582be R12: dffffc0000000000 R13: 0000000000000080 R14: ffff888076509000 R15: ffff88807a029a00 FS: 00007f55e82ca6c0(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b31d23000 CR3: 0000000022b66000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: fib_rules_lookup+0x62c/0xdb0 net/core/fib_rules.c:317 fib6_rule_lookup+0x1fd/0x790 net/ipv6/fib6_rules.c:108 ip6_route_output_flags_noref net/ipv6/route.c:2637 [inline] ip6_route_output_flags+0x38e/0x610 net/ipv6/route.c:2649 ip6_route_output include/net/ip6_route.h:93 [inline] ip6_dst_lookup_tail+0x189/0x11a0 net/ipv6/ip6_output.c:1120 ip6_dst_lookup_flow+0xb9/0x180 net/ipv6/ip6_output.c:1250 sctp_v6_get_dst+0x792/0x1e20 net/sctp/ipv6.c:326 sctp_transport_route+0x12c/0x2e0 net/sctp/transport.c:455 sctp_assoc_add_peer+0x614/0x15c0 net/sctp/associola.c:662 sctp_connect_new_asoc+0x31d/0x6c0 net/sctp/socket.c:1099 __sctp_connect+0x66d/0xe30 net/sctp/socket.c:1197 sctp_connect net/sctp/socket.c:4819 [inline] sctp_inet_connect+0x149/0x1f0 net/sctp/socket.c:4834 __sys_connect_file net/socket.c:2048 [inline] __sys_connect+0x2df/0x310 net/socket.c:2065 __do_sys_connect net/socket.c:2075 [inline] __se_sys_connect net/socket.c:2072 [inline] __x64_sys_connect+0x7a/0x90 net/socket.c:2072 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 5e5f3f0f8013 ("[IPV6] ADDRCONF: Convert ipv6_get_saddr() to ipv6_dev_get_saddr().") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240507163145.835254-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/fib6_rules.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index be52b18e08a6b..6eeab21512ba9 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -233,8 +233,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, rt = pol_lookup_func(lookup, net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { + struct inet6_dev *idev = ip6_dst_idev(&rt->dst); + + if (!idev) + goto again; err = fib6_rule_saddr(net, rule, flags, flp6, - ip6_dst_idev(&rt->dst)->dev); + idev->dev); if (err == -EAGAIN) goto again; -- GitLab From e2591243ce1cf041bdd6d6eac74f3bd0b143f8df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Nov 2022 21:18:37 +0100 Subject: [PATCH 2158/2290] timers: Get rid of del_singleshot_timer_sync() [ Upstream commit 9a5a305686971f4be10c6d7251c8348d74b3e014 ] del_singleshot_timer_sync() used to be an optimization for deleting timers which are not rearmed from the timer callback function. This optimization turned out to be broken and got mapped to del_timer_sync() about 17 years ago. Get rid of the undocumented indirection and use del_timer_sync() directly. No functional change. Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Reviewed-by: Jacob Keller Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/r/20221123201624.706987932@linutronix.de Stable-dep-of: 4893b8b3ef8d ("hsr: Simplify code for announcing HSR nodes timer setup") Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm-dev-common.c | 4 ++-- drivers/staging/wlan-ng/hfa384x_usb.c | 4 ++-- drivers/staging/wlan-ng/prism2usb.c | 6 +++--- include/linux/timer.h | 2 -- kernel/time/timer.c | 2 +- net/sunrpc/xprt.c | 2 +- 6 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index dc4c0a0a51290..30b4c288c1bbc 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -155,7 +155,7 @@ ssize_t tpm_common_read(struct file *file, char __user *buf, out: if (!priv->response_length) { *off = 0; - del_singleshot_timer_sync(&priv->user_read_timer); + del_timer_sync(&priv->user_read_timer); flush_work(&priv->timeout_work); } mutex_unlock(&priv->buffer_mutex); @@ -262,7 +262,7 @@ __poll_t tpm_common_poll(struct file *file, poll_table *wait) void tpm_common_release(struct file *file, struct file_priv *priv) { flush_work(&priv->async_work); - del_singleshot_timer_sync(&priv->user_read_timer); + del_timer_sync(&priv->user_read_timer); flush_work(&priv->timeout_work); file->private_data = NULL; priv->response_length = 0; diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c index 02fdef7a16c87..c7cd54171d994 100644 --- a/drivers/staging/wlan-ng/hfa384x_usb.c +++ b/drivers/staging/wlan-ng/hfa384x_usb.c @@ -1116,8 +1116,8 @@ cleanup: if (ctlx == get_active_ctlx(hw)) { spin_unlock_irqrestore(&hw->ctlxq.lock, flags); - del_singleshot_timer_sync(&hw->reqtimer); - del_singleshot_timer_sync(&hw->resptimer); + del_timer_sync(&hw->reqtimer); + del_timer_sync(&hw->resptimer); hw->req_timer_done = 1; hw->resp_timer_done = 1; usb_kill_urb(&hw->ctlx_urb); diff --git a/drivers/staging/wlan-ng/prism2usb.c b/drivers/staging/wlan-ng/prism2usb.c index e13da7fadffff..c13f1699e5a2f 100644 --- a/drivers/staging/wlan-ng/prism2usb.c +++ b/drivers/staging/wlan-ng/prism2usb.c @@ -170,9 +170,9 @@ static void prism2sta_disconnect_usb(struct usb_interface *interface) */ prism2sta_ifstate(wlandev, P80211ENUM_ifstate_disable); - del_singleshot_timer_sync(&hw->throttle); - del_singleshot_timer_sync(&hw->reqtimer); - del_singleshot_timer_sync(&hw->resptimer); + del_timer_sync(&hw->throttle); + del_timer_sync(&hw->reqtimer); + del_timer_sync(&hw->resptimer); /* Unlink all the URBs. This "removes the wheels" * from the entire CTLX handling mechanism. diff --git a/include/linux/timer.h b/include/linux/timer.h index 6d18f04ad7039..551fa467726f0 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -198,8 +198,6 @@ static inline int del_timer_sync(struct timer_list *timer) return timer_delete_sync(timer); } -#define del_singleshot_timer_sync(t) del_timer_sync(t) - extern void init_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 59469897432bc..f5f938140ceaf 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1963,7 +1963,7 @@ signed long __sched schedule_timeout(signed long timeout) timer_setup_on_stack(&timer.timer, process_timeout, 0); __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING); schedule(); - del_singleshot_timer_sync(&timer.timer); + del_timer_sync(&timer.timer); /* Remove the timer from the object tracker */ destroy_timer_on_stack(&timer.timer); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 656cec2083718..ab453ede54f0c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1164,7 +1164,7 @@ xprt_request_enqueue_receive(struct rpc_task *task) spin_unlock(&xprt->queue_lock); /* Turn off autodisconnect */ - del_singleshot_timer_sync(&xprt->timer); + del_timer_sync(&xprt->timer); return 0; } -- GitLab From b086d1e82fcd4a815a646e3f4c5fd2b03a817c70 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Nov 2022 21:18:45 +0100 Subject: [PATCH 2159/2290] timers: Rename del_timer() to timer_delete() [ Upstream commit bb663f0f3c396c6d05f6c5eeeea96ced20ff112e ] The timer related functions do not have a strict timer_ prefixed namespace which is really annoying. Rename del_timer() to timer_delete() and provide del_timer() as a wrapper. Document that del_timer() is not for new code. Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Reviewed-by: Steven Rostedt (Google) Reviewed-by: Jacob Keller Reviewed-by: Anna-Maria Behnsen Link: https://lore.kernel.org/r/20221123201625.015535022@linutronix.de Stable-dep-of: 4893b8b3ef8d ("hsr: Simplify code for announcing HSR nodes timer setup") Signed-off-by: Sasha Levin --- include/linux/timer.h | 15 ++++++++++++++- kernel/time/timer.c | 6 +++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 551fa467726f0..e338e173ce8bc 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -169,7 +169,6 @@ static inline int timer_pending(const struct timer_list * timer) } extern void add_timer_on(struct timer_list *timer, int cpu); -extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); extern int timer_reduce(struct timer_list *timer, unsigned long expires); @@ -184,6 +183,7 @@ extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); extern int timer_delete_sync(struct timer_list *timer); +extern int timer_delete(struct timer_list *timer); /** * del_timer_sync - Delete a pending timer and wait for a running callback @@ -198,6 +198,19 @@ static inline int del_timer_sync(struct timer_list *timer) return timer_delete_sync(timer); } +/** + * del_timer - Delete a pending timer + * @timer: The timer to be deleted + * + * See timer_delete() for detailed explanation. + * + * Do not use in new code. Use timer_delete() instead. + */ +static inline int del_timer(struct timer_list *timer) +{ + return timer_delete(timer); +} + extern void init_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index f5f938140ceaf..e09852be4e638 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1255,7 +1255,7 @@ void add_timer_on(struct timer_list *timer, int cpu) EXPORT_SYMBOL_GPL(add_timer_on); /** - * del_timer - Deactivate a timer. + * timer_delete - Deactivate a timer * @timer: The timer to be deactivated * * The function only deactivates a pending timer, but contrary to @@ -1268,7 +1268,7 @@ EXPORT_SYMBOL_GPL(add_timer_on); * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated */ -int del_timer(struct timer_list *timer) +int timer_delete(struct timer_list *timer) { struct timer_base *base; unsigned long flags; @@ -1284,7 +1284,7 @@ int del_timer(struct timer_list *timer) return ret; } -EXPORT_SYMBOL(del_timer); +EXPORT_SYMBOL(timer_delete); /** * try_to_del_timer_sync - Try to deactivate a timer -- GitLab From ca4e781f15e78ee5edf678c389e612210e293189 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:39 +0000 Subject: [PATCH 2160/2290] net-sysfs: convert dev->operstate reads to lockless ones [ Upstream commit 004d138364fd10dd5ff8ceb54cfdc2d792a7b338 ] operstate_show() can omit dev_base_lock acquisition only to read dev->operstate. Annotate accesses to dev->operstate. Writers still acquire dev_base_lock for mutual exclusion. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Stable-dep-of: 4893b8b3ef8d ("hsr: Simplify code for announcing HSR nodes timer setup") Signed-off-by: Sasha Levin --- net/bridge/br_netlink.c | 3 ++- net/core/link_watch.c | 4 ++-- net/core/net-sysfs.c | 4 +--- net/core/rtnetlink.c | 4 ++-- net/hsr/hsr_device.c | 10 +++++----- net/ipv6/addrconf.c | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d38eff27767dc..e9e5c77ef0f4a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -439,7 +439,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, u32 filter_mask, const struct net_device *dev, bool getlink) { - u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + u8 operstate = netif_running(dev) ? READ_ONCE(dev->operstate) : + IF_OPER_DOWN; struct nlattr *af = NULL; struct net_bridge *br; struct ifinfomsg *hdr; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index aa6cb1f90966f..13513efcfbfe8 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -53,7 +53,7 @@ static void rfc2863_policy(struct net_device *dev) { unsigned char operstate = default_operstate(dev); - if (operstate == dev->operstate) + if (operstate == READ_ONCE(dev->operstate)) return; write_lock(&dev_base_lock); @@ -73,7 +73,7 @@ static void rfc2863_policy(struct net_device *dev) break; } - dev->operstate = operstate; + WRITE_ONCE(dev->operstate, operstate); write_unlock(&dev_base_lock); } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8409d41405dfe..fdf3308b03350 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -306,11 +306,9 @@ static ssize_t operstate_show(struct device *dev, const struct net_device *netdev = to_net_dev(dev); unsigned char operstate; - read_lock(&dev_base_lock); - operstate = netdev->operstate; + operstate = READ_ONCE(netdev->operstate); if (!netif_running(netdev)) operstate = IF_OPER_DOWN; - read_unlock(&dev_base_lock); if (operstate >= ARRAY_SIZE(operstates)) return -EINVAL; /* should not happen */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 80169afb888d2..1163226c025c1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -876,9 +876,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition) break; } - if (dev->operstate != operstate) { + if (READ_ONCE(dev->operstate) != operstate) { write_lock(&dev_base_lock); - dev->operstate = operstate; + WRITE_ONCE(dev->operstate, operstate); write_unlock(&dev_base_lock); netdev_state_change(dev); } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 83906d093f0ae..89e694f1c3bd3 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -31,8 +31,8 @@ static bool is_slave_up(struct net_device *dev) static void __hsr_set_operstate(struct net_device *dev, int transition) { write_lock(&dev_base_lock); - if (dev->operstate != transition) { - dev->operstate = transition; + if (READ_ONCE(dev->operstate) != transition) { + WRITE_ONCE(dev->operstate, transition); write_unlock(&dev_base_lock); netdev_state_change(dev); } else { @@ -78,14 +78,14 @@ static void hsr_check_announce(struct net_device *hsr_dev, hsr = netdev_priv(hsr_dev); - if (hsr_dev->operstate == IF_OPER_UP && old_operstate != IF_OPER_UP) { + if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) { /* Went up */ hsr->announce_count = 0; mod_timer(&hsr->announce_timer, jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } - if (hsr_dev->operstate != IF_OPER_UP && old_operstate == IF_OPER_UP) + if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP) /* Went down */ del_timer(&hsr->announce_timer); } @@ -100,7 +100,7 @@ void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) /* netif_stacked_transfer_operstate() cannot be used here since * it doesn't set IF_OPER_LOWERLAYERDOWN (?) */ - old_operstate = master->dev->operstate; + old_operstate = READ_ONCE(master->dev->operstate); has_carrier = hsr_check_carrier(master); hsr_set_operstate(master, has_carrier); hsr_check_announce(master->dev, old_operstate); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3866deaadbb66..16a9a21f6af19 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5979,7 +5979,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, (dev->ifindex != dev_get_iflink(dev) && nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || nla_put_u8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN)) + netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN)) goto nla_put_failure; protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO); if (!protoinfo) -- GitLab From 5136ea7fa50b2518795dae5a5a126773c9e8c2da Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Tue, 7 May 2024 13:12:14 +0200 Subject: [PATCH 2161/2290] hsr: Simplify code for announcing HSR nodes timer setup [ Upstream commit 4893b8b3ef8db2b182d1a1bebf6c7acf91405000 ] Up till now the code to start HSR announce timer, which triggers sending supervisory frames, was assuming that hsr_netdev_notify() would be called at least twice for hsrX interface. This was required to have different values for old and current values of network device's operstate. This is problematic for a case where hsrX interface is already in the operational state when hsr_netdev_notify() is called, so timer is not configured to trigger and as a result the hsrX is not sending supervisory frames to HSR ring. This error has been discovered when hsr_ping.sh script was run. To be more specific - for the hsr1 and hsr2 the hsr_netdev_notify() was called at least twice with different IF_OPER_{LOWERDOWN|DOWN|UP} states assigned in hsr_check_carrier_and_operstate(hsr). As a result there was no issue with sending supervisory frames. However, with hsr3, the notify function was called only once with operstate set to IF_OPER_UP and timer responsible for triggering supervisory frames was not fired. The solution is to use netif_oper_up() and netif_running() helper functions to assess if network hsrX device is up. Only then, when the timer is not already pending, it is started. Otherwise it is deactivated. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Lukasz Majewski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240507111214.3519800-1-lukma@denx.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/hsr/hsr_device.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 89e694f1c3bd3..ad75724b69adf 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -71,39 +71,36 @@ static bool hsr_check_carrier(struct hsr_port *master) return false; } -static void hsr_check_announce(struct net_device *hsr_dev, - unsigned char old_operstate) +static void hsr_check_announce(struct net_device *hsr_dev) { struct hsr_priv *hsr; hsr = netdev_priv(hsr_dev); - - if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) { - /* Went up */ - hsr->announce_count = 0; - mod_timer(&hsr->announce_timer, - jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); + if (netif_running(hsr_dev) && netif_oper_up(hsr_dev)) { + /* Enable announce timer and start sending supervisory frames */ + if (!timer_pending(&hsr->announce_timer)) { + hsr->announce_count = 0; + mod_timer(&hsr->announce_timer, jiffies + + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); + } + } else { + /* Deactivate the announce timer */ + timer_delete(&hsr->announce_timer); } - - if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP) - /* Went down */ - del_timer(&hsr->announce_timer); } void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) { struct hsr_port *master; - unsigned char old_operstate; bool has_carrier; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); /* netif_stacked_transfer_operstate() cannot be used here since * it doesn't set IF_OPER_LOWERLAYERDOWN (?) */ - old_operstate = READ_ONCE(master->dev->operstate); has_carrier = hsr_check_carrier(master); hsr_set_operstate(master, has_carrier); - hsr_check_announce(master->dev, old_operstate); + hsr_check_announce(master->dev); } int hsr_get_max_mtu(struct hsr_priv *hsr) -- GitLab From d7ae8e8502d4fe38cc8e0a3613e6d970e221d5f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:26 +0000 Subject: [PATCH 2162/2290] ipv6: annotate data-races around cnf.disable_ipv6 [ Upstream commit d289ab65b89c1d4d88417cb6c03e923f21f95fae ] disable_ipv6 is read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. v2: do not preload net before rtnl_trylock() in addrconf_disable_ipv6() (Jiri) Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Stable-dep-of: 4db783d68b9b ("ipv6: prevent NULL dereference in ip6_output()") Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 9 +++++---- net/ipv6/ip6_input.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 16a9a21f6af19..22e246ff910ee 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4133,7 +4133,7 @@ static void addrconf_dad_work(struct work_struct *w) if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && ipv6_addr_equal(&ifp->addr, &addr)) { /* DAD failed for link-local based on MAC */ - idev->cnf.disable_ipv6 = 1; + WRITE_ONCE(idev->cnf.disable_ipv6, 1); pr_info("%s: IPv6 being disabled!\n", ifp->idev->dev->name); @@ -6289,7 +6289,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); - idev->cnf.disable_ipv6 = newf; + + WRITE_ONCE(idev->cnf.disable_ipv6, newf); if (changed) dev_disable_change(idev); } @@ -6306,7 +6307,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->disable_ipv6) { rtnl_unlock(); @@ -6314,7 +6315,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->disable_ipv6) { - net->ipv6.devconf_dflt->disable_ipv6 = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); } else if ((!newf) ^ (!old)) dev_disable_change((struct inet6_dev *)table->extra1); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index b8378814532ce..1ba97933c74fb 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,9 +168,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, SKB_DR_SET(reason, NOT_SPECIFIED); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || - !idev || unlikely(idev->cnf.disable_ipv6)) { + !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - if (idev && unlikely(idev->cnf.disable_ipv6)) + if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6))) SKB_DR_SET(reason, IPV6DISABLED); goto drop; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e9ae084d038d1..17fe401bc299a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -224,7 +224,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (unlikely(idev->cnf.disable_ipv6)) { + if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; -- GitLab From ea0cb87402f774b0e1214ffba0f57028b27cf155 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 May 2024 16:18:42 +0000 Subject: [PATCH 2163/2290] ipv6: prevent NULL dereference in ip6_output() [ Upstream commit 4db783d68b9b39a411a96096c10828ff5dfada7a ] According to syzbot, there is a chance that ip6_dst_idev() returns NULL in ip6_output(). Most places in IPv6 stack deal with a NULL idev just fine, but not here. syzbot reported: general protection fault, probably for non-canonical address 0xdffffc00000000bc: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x00000000000005e0-0x00000000000005e7] CPU: 0 PID: 9775 Comm: syz-executor.4 Not tainted 6.9.0-rc5-syzkaller-00157-g6a30653b604a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 RIP: 0010:ip6_output+0x231/0x3f0 net/ipv6/ip6_output.c:237 Code: 3c 1e 00 49 89 df 74 08 4c 89 ef e8 19 58 db f7 48 8b 44 24 20 49 89 45 00 49 89 c5 48 8d 9d e0 05 00 00 48 89 d8 48 c1 e8 03 <42> 0f b6 04 38 84 c0 4c 8b 74 24 28 0f 85 61 01 00 00 8b 1b 31 ff RSP: 0018:ffffc9000927f0d8 EFLAGS: 00010202 RAX: 00000000000000bc RBX: 00000000000005e0 RCX: 0000000000040000 RDX: ffffc900131f9000 RSI: 0000000000004f47 RDI: 0000000000004f48 RBP: 0000000000000000 R08: ffffffff8a1f0b9a R09: 1ffffffff1f51fad R10: dffffc0000000000 R11: fffffbfff1f51fae R12: ffff8880293ec8c0 R13: ffff88805d7fc000 R14: 1ffff1100527d91a R15: dffffc0000000000 FS: 00007f135c6856c0(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000080 CR3: 0000000064096000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: NF_HOOK include/linux/netfilter.h:314 [inline] ip6_xmit+0xefe/0x17f0 net/ipv6/ip6_output.c:358 sctp_v6_xmit+0x9f2/0x13f0 net/sctp/ipv6.c:248 sctp_packet_transmit+0x26ad/0x2ca0 net/sctp/output.c:653 sctp_packet_singleton+0x22c/0x320 net/sctp/outqueue.c:783 sctp_outq_flush_ctrl net/sctp/outqueue.c:914 [inline] sctp_outq_flush+0x6d5/0x3e20 net/sctp/outqueue.c:1212 sctp_side_effects net/sctp/sm_sideeffect.c:1198 [inline] sctp_do_sm+0x59cc/0x60c0 net/sctp/sm_sideeffect.c:1169 sctp_primitive_ASSOCIATE+0x95/0xc0 net/sctp/primitive.c:73 __sctp_connect+0x9cd/0xe30 net/sctp/socket.c:1234 sctp_connect net/sctp/socket.c:4819 [inline] sctp_inet_connect+0x149/0x1f0 net/sctp/socket.c:4834 __sys_connect_file net/socket.c:2048 [inline] __sys_connect+0x2df/0x310 net/socket.c:2065 __do_sys_connect net/socket.c:2075 [inline] __se_sys_connect net/socket.c:2072 [inline] __x64_sys_connect+0x7a/0x90 net/socket.c:2072 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 778d80be5269 ("ipv6: Add disable_ipv6 sysctl to disable IPv6 operaion on specific interface.") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Larysa Zaremba Link: https://lore.kernel.org/r/20240507161842.773961-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 17fe401bc299a..fb26401950e7e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -224,7 +224,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { + if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; -- GitLab From d5a466ab6e78d6f2e0f64435f1e17246c8e941ff Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Tue, 7 May 2024 20:53:31 +0800 Subject: [PATCH 2164/2290] net/smc: fix neighbour and rtable leak in smc_ib_find_route() [ Upstream commit 2ddc0dd7fec86ee53b8928a5cca5fbddd4fc7c06 ] In smc_ib_find_route(), the neighbour found by neigh_lookup() and rtable resolved by ip_route_output_flow() are not released or put before return. It may cause the refcount leak, so fix it. Link: https://lore.kernel.org/r/20240506015439.108739-1-guwen@linux.alibaba.com Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment") Signed-off-by: Wen Gu Link: https://lore.kernel.org/r/20240507125331.2808-1-guwen@linux.alibaba.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/smc/smc_ib.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index ace8611735321..6de53431629ca 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -209,13 +209,18 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, if (IS_ERR(rt)) goto out; if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) - goto out; - neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr); - if (neigh) { - memcpy(nexthop_mac, neigh->ha, ETH_ALEN); - *uses_gateway = rt->rt_uses_gateway; - return 0; - } + goto out_rt; + neigh = dst_neigh_lookup(&rt->dst, &fl4.daddr); + if (!neigh) + goto out_rt; + memcpy(nexthop_mac, neigh->ha, ETH_ALEN); + *uses_gateway = rt->rt_uses_gateway; + neigh_release(neigh); + ip_rt_put(rt); + return 0; + +out_rt: + ip_rt_put(rt); out: return -ENOENT; } -- GitLab From 7d90032f7ca149d61d7f728626dc6633257f7ddf Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 7 May 2024 21:42:18 +0800 Subject: [PATCH 2165/2290] net: hns3: using user configure after hardware reset [ Upstream commit 05eb60e9648cca0beeebdbcd263b599fb58aee48 ] When a reset occurring, it's supposed to recover user's configuration. Currently, the port info(speed, duplex and autoneg) is stored in hclge_mac and will be scheduled updated. Consider the case that reset was happened consecutively. During the first reset, the port info is configured with a temporary value cause the PHY is reset and looking for best link config. Second reset start and use pervious configuration which is not the user's. The specific process is as follows: +------+ +----+ +----+ | USER | | PF | | HW | +---+--+ +-+--+ +-+--+ | ethtool --reset | | +------------------->| reset command | | ethtool --reset +-------------------->| +------------------->| +---+ | +---+ | | | | |reset currently | | HW RESET | | |and wait to do | | | |<--+ | | | | send pervious cfg |<--+ | | (1000M FULL AN_ON) | | +-------------------->| | | read cfg(time task) | | | (10M HALF AN_OFF) +---+ | |<--------------------+ | cfg take effect | | reset command |<--+ | +-------------------->| | | +---+ | | send pervious cfg | | HW RESET | | (10M HALF AN_OFF) |<--+ | +-------------------->| | | read cfg(time task) | | | (10M HALF AN_OFF) +---+ | |<--------------------+ | cfg take effect | | | | | | read cfg(time task) |<--+ | | (10M HALF AN_OFF) | | |<--------------------+ | | | v v v To avoid aboved situation, this patch introduced req_speed, req_duplex, req_autoneg to store user's configuration and it only be used after hardware reset and to recover user's configuration Fixes: f5f2b3e4dcc0 ("net: hns3: add support for imp-controlled PHYs") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 15 +++++++++------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 +++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 9db363fbc34fd..a0ac1748f4ea4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1624,6 +1624,9 @@ static int hclge_configure(struct hclge_dev *hdev) cfg.default_speed, ret); return ret; } + hdev->hw.mac.req_speed = hdev->hw.mac.speed; + hdev->hw.mac.req_autoneg = AUTONEG_ENABLE; + hdev->hw.mac.req_duplex = DUPLEX_FULL; hclge_parse_link_mode(hdev, cfg.speed_ability); @@ -3429,9 +3432,9 @@ hclge_set_phy_link_ksettings(struct hnae3_handle *handle, return ret; } - hdev->hw.mac.autoneg = cmd->base.autoneg; - hdev->hw.mac.speed = cmd->base.speed; - hdev->hw.mac.duplex = cmd->base.duplex; + hdev->hw.mac.req_autoneg = cmd->base.autoneg; + hdev->hw.mac.req_speed = cmd->base.speed; + hdev->hw.mac.req_duplex = cmd->base.duplex; linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising); return 0; @@ -3464,9 +3467,9 @@ static int hclge_tp_port_init(struct hclge_dev *hdev) if (!hnae3_dev_phy_imp_supported(hdev)) return 0; - cmd.base.autoneg = hdev->hw.mac.autoneg; - cmd.base.speed = hdev->hw.mac.speed; - cmd.base.duplex = hdev->hw.mac.duplex; + cmd.base.autoneg = hdev->hw.mac.req_autoneg; + cmd.base.speed = hdev->hw.mac.req_speed; + cmd.base.duplex = hdev->hw.mac.req_duplex; linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising); return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index f6fef790e16c1..5207cb132c33c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -256,11 +256,14 @@ struct hclge_mac { u8 media_type; /* port media type, e.g. fibre/copper/backplane */ u8 mac_addr[ETH_ALEN]; u8 autoneg; + u8 req_autoneg; u8 duplex; + u8 req_duplex; u8 support_autoneg; u8 speed_type; /* 0: sfp speed, 1: active speed */ u8 lane_num; u32 speed; + u32 req_speed; u32 max_speed; u32 speed_ability; /* speed ability supported by current media */ u32 module_type; /* sub media type, e.g. kr/cr/sr/lr */ -- GitLab From 5daf064afee8a7d33457c07a9f377bb28fc88ba8 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 7 May 2024 21:42:19 +0800 Subject: [PATCH 2166/2290] net: hns3: direct return when receive a unknown mailbox message [ Upstream commit 669554c512d2107e2f21616f38e050d40655101f ] Currently, the driver didn't return when receive a unknown mailbox message, and continue checking whether need to generate a response. It's unnecessary and may be incorrect. Fixes: bb5790b71bad ("net: hns3: refactor mailbox response scheme between PF and VF") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 04ff9bf121853..877feee53804f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -1077,12 +1077,13 @@ static void hclge_mbx_request_handling(struct hclge_mbx_ops_param *param) hdev = param->vport->back; cmd_func = hclge_mbx_ops_list[param->req->msg.code]; - if (cmd_func) - ret = cmd_func(param); - else + if (!cmd_func) { dev_err(&hdev->pdev->dev, "un-supported mailbox message, code = %u\n", param->req->msg.code); + return; + } + ret = cmd_func(param); /* PF driver should not reply IMP */ if (hnae3_get_bit(param->req->mbx_need_resp, HCLGE_MBX_NEED_RESP_B) && -- GitLab From 549a2179de334e29b54ec90379d7fb745e87920b Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 7 May 2024 21:42:20 +0800 Subject: [PATCH 2167/2290] net: hns3: change type of numa_node_mask as nodemask_t [ Upstream commit 6639a7b953212ac51aa4baa7d7fb855bf736cf56 ] It provides nodemask_t to describe the numa node mask in kernel. To improve transportability, change the type of numa_node_mask as nodemask_t. Fixes: 38caee9d3ee8 ("net: hns3: Add support of the HNAE3 framework") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++++-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 7 ++++--- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 +- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index c693bb701ba3e..60b8d61af07f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -873,7 +873,7 @@ struct hnae3_handle { struct hnae3_roce_private_info rinfo; }; - u32 numa_node_mask; /* for multi-chip support */ + nodemask_t numa_node_mask; /* for multi-chip support */ enum hnae3_port_base_vlan_state port_base_vlan_state; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index a0ac1748f4ea4..19a0b6c37c909 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1856,7 +1856,8 @@ static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps) nic->pdev = hdev->pdev; nic->ae_algo = &ae_algo; - nic->numa_node_mask = hdev->numa_node_mask; + bitmap_copy(nic->numa_node_mask.bits, hdev->numa_node_mask.bits, + MAX_NUMNODES); nic->kinfo.io_base = hdev->hw.hw.io_base; ret = hclge_knic_setup(vport, num_tqps, @@ -2548,7 +2549,8 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) roce->pdev = nic->pdev; roce->ae_algo = nic->ae_algo; - roce->numa_node_mask = nic->numa_node_mask; + bitmap_copy(roce->numa_node_mask.bits, nic->numa_node_mask.bits, + MAX_NUMNODES); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 5207cb132c33c..fd79bb81b6e07 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -875,7 +875,7 @@ struct hclge_dev { u16 fdir_pf_filter_count; /* Num of guaranteed filters for this PF */ u16 num_alloc_vport; /* Num vports this driver supports */ - u32 numa_node_mask; + nodemask_t numa_node_mask; u16 rx_buf_len; u16 num_tx_desc; /* desc num of per tx queue */ u16 num_rx_desc; /* desc num of per rx queue */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 5a978ea101a90..d26539daf2cba 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -464,7 +464,8 @@ static int hclgevf_set_handle_info(struct hclgevf_dev *hdev) nic->ae_algo = &ae_algovf; nic->pdev = hdev->pdev; - nic->numa_node_mask = hdev->numa_node_mask; + bitmap_copy(nic->numa_node_mask.bits, hdev->numa_node_mask.bits, + MAX_NUMNODES); nic->flags |= HNAE3_SUPPORT_VF; nic->kinfo.io_base = hdev->hw.hw.io_base; @@ -2136,8 +2137,8 @@ static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev) roce->pdev = nic->pdev; roce->ae_algo = nic->ae_algo; - roce->numa_node_mask = nic->numa_node_mask; - + bitmap_copy(roce->numa_node_mask.bits, nic->numa_node_mask.bits, + MAX_NUMNODES); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index d65ace07b4569..976414d00e67a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -236,7 +236,7 @@ struct hclgevf_dev { u16 rss_size_max; /* HW defined max RSS task queue */ u16 num_alloc_vport; /* num vports this driver supports */ - u32 numa_node_mask; + nodemask_t numa_node_mask; u16 rx_buf_len; u16 num_tx_desc; /* desc num of per tx queue */ u16 num_rx_desc; /* desc num of per rx queue */ -- GitLab From 0bb8751de161ecb5ed99ef441f827426bdc169c3 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 7 May 2024 21:42:21 +0800 Subject: [PATCH 2168/2290] net: hns3: release PTP resources if pf initialization failed [ Upstream commit 950aa42399893a170d9b57eda0e4a3ff91fd8b70 ] During the PF initialization process, hclge_update_port_info may return an error code for some reason. At this point, the ptp initialization has been completed. To void memory leaks, the resources that are applied by ptp should be released. Therefore, when hclge_update_port_info returns an error code, hclge_ptp_uninit is called to release the corresponding resources. Fixes: eaf83ae59e18 ("net: hns3: add querying fec ability from firmware") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Hariprasad Kelam Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 19a0b6c37c909..75472fde78f17 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11742,7 +11742,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) ret = hclge_update_port_info(hdev); if (ret) - goto err_mdiobus_unreg; + goto err_ptp_uninit; INIT_KFIFO(hdev->mac_tnl_log); @@ -11788,6 +11788,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) devl_unlock(hdev->devlink); return 0; +err_ptp_uninit: + hclge_ptp_uninit(hdev); err_mdiobus_unreg: if (hdev->hw.mac.phydev) mdiobus_unregister(hdev->hw.mac.mdio_bus); -- GitLab From 98987f78081917d3b74522ed4566c2acd62cb2a7 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 7 May 2024 21:42:22 +0800 Subject: [PATCH 2169/2290] net: hns3: use appropriate barrier function after setting a bit value [ Upstream commit 094c281228529d333458208fd02fcac3b139d93b ] There is a memory barrier in followed case. When set the port down, hclgevf_set_timmer will set DOWN in state. Meanwhile, the service task has different behaviour based on whether the state is DOWN. Thus, to make sure service task see DOWN, use smp_mb__after_atomic after calling set_bit(). CPU0 CPU1 ========================== =================================== hclgevf_set_timer_task() hclgevf_periodic_service_task() set_bit(DOWN,state) test_bit(DOWN,state) pf also has this issue. Fixes: ff200099d271 ("net: hns3: remove unnecessary work in hclgevf_main") Fixes: 1c6dfe6fc6f7 ("net: hns3: remove mailbox and reset work in hclge_main") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 75472fde78f17..646546cf25264 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8051,8 +8051,7 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) /* Set the DOWN flag here to disable link updating */ set_bit(HCLGE_STATE_DOWN, &hdev->state); - /* flush memory to make sure DOWN is seen by service task */ - smp_mb__before_atomic(); + smp_mb__after_atomic(); /* flush memory to make sure DOWN is seen by service task */ hclge_flush_link_update(hdev); } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index d26539daf2cba..1ecf06345526b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2236,8 +2236,7 @@ static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable) } else { set_bit(HCLGEVF_STATE_DOWN, &hdev->state); - /* flush memory to make sure DOWN is seen by service task */ - smp_mb__before_atomic(); + smp_mb__after_atomic(); /* flush memory to make sure DOWN is seen by service task */ hclgevf_flush_link_update(hdev); } } -- GitLab From fa2c7e7646f2740be628e780e8c2be68dd57a12a Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Tue, 7 May 2024 21:42:23 +0800 Subject: [PATCH 2170/2290] net: hns3: fix port vlan filter not disabled issue [ Upstream commit f5db7a3b65c84d723ca5e2bb6e83115180ab6336 ] According to hardware limitation, for device support modify VLAN filter state but not support bypass port VLAN filter, it should always disable the port VLAN filter. but the driver enables port VLAN filter when initializing, if there is no VLAN(except VLAN 0) id added, the driver will disable it in service task. In most time, it works fine. But there is a time window before the service task shceduled and net device being registered. So if user adds VLAN at this time, the driver will not update the VLAN filter state, and the port VLAN filter remains enabled. To fix the problem, if support modify VLAN filter state but not support bypass port VLAN filter, set the port vlan filter to "off". Fixes: 184cd221a863 ("net: hns3: disable port VLAN filter when support function level VLAN filter control") Fixes: 2ba306627f59 ("net: hns3: add support for modify VLAN filter state") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 646546cf25264..a18dc73c69894 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10004,6 +10004,7 @@ static int hclge_set_vlan_protocol_type(struct hclge_dev *hdev) static int hclge_init_vlan_filter(struct hclge_dev *hdev) { struct hclge_vport *vport; + bool enable = true; int ret; int i; @@ -10023,8 +10024,12 @@ static int hclge_init_vlan_filter(struct hclge_dev *hdev) vport->cur_vlan_fltr_en = true; } + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, hdev->ae_dev->caps) && + !test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, hdev->ae_dev->caps)) + enable = false; + return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, - HCLGE_FILTER_FE_INGRESS, true, 0); + HCLGE_FILTER_FE_INGRESS, enable, 0); } static int hclge_init_vlan_type(struct hclge_dev *hdev) -- GitLab From 72ede790f5a03c3957487400a1b72ebce293a2e7 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Tue, 7 May 2024 21:42:24 +0800 Subject: [PATCH 2171/2290] net: hns3: fix kernel crash when devlink reload during initialization [ Upstream commit 35d92abfbad88cf947c010baf34b075e40566095 ] The devlink reload process will access the hardware resources, but the register operation is done before the hardware is initialized. So, processing the devlink reload during initialization may lead to kernel crash. This patch fixes this by registering the devlink after hardware initialization. Fixes: cd6242991d2e ("net: hns3: add support for registering devlink for VF") Fixes: 93305b77ffcb ("net: hns3: fix kernel crash when devlink reload during pf initialization") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 17 +++++------------ .../hisilicon/hns3/hns3vf/hclgevf_main.c | 10 ++++------ 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index a18dc73c69894..a2655adc764cd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11609,16 +11609,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto out; - ret = hclge_devlink_init(hdev); - if (ret) - goto err_pci_uninit; - - devl_lock(hdev->devlink); - /* Firmware command queue initialize */ ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) - goto err_devlink_uninit; + goto err_pci_uninit; /* Firmware command initialize */ ret = hclge_comm_cmd_init(hdev->ae_dev, &hdev->hw.hw, &hdev->fw_version, @@ -11781,6 +11775,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) /* Enable MISC vector(vector0) */ hclge_enable_vector(&hdev->misc_vector, true); + ret = hclge_devlink_init(hdev); + if (ret) + goto err_ptp_uninit; + hclge_state_init(hdev); hdev->last_reset_time = jiffies; @@ -11788,8 +11786,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) HCLGE_DRIVER_NAME); hclge_task_schedule(hdev, round_jiffies_relative(HZ)); - - devl_unlock(hdev->devlink); return 0; err_ptp_uninit: @@ -11803,9 +11799,6 @@ err_msi_uninit: pci_free_irq_vectors(pdev); err_cmd_uninit: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); -err_devlink_uninit: - devl_unlock(hdev->devlink); - hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.hw.io_base); pci_clear_master(pdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 1ecf06345526b..1f5a27fb309aa 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2902,10 +2902,6 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclgevf_devlink_init(hdev); - if (ret) - goto err_devlink_init; - ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) goto err_cmd_queue_init; @@ -2998,6 +2994,10 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) hclgevf_init_rxd_adv_layout(hdev); + ret = hclgevf_devlink_init(hdev); + if (ret) + goto err_config; + set_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state); hdev->last_reset_time = jiffies; @@ -3017,8 +3017,6 @@ err_misc_irq_init: err_cmd_init: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); err_cmd_queue_init: - hclgevf_devlink_uninit(hdev); -err_devlink_init: hclgevf_pci_uninit(hdev); clear_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state); return ret; -- GitLab From be9b56b034cf20b4b4583e0d553b476a70f8de42 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 18:02:53 +0200 Subject: [PATCH 2172/2290] drm/meson: dw-hdmi: power up phy on device init [ Upstream commit 04703bfd7f99c016a823c74712b97f8b5590ce87 ] The phy is not in a useful state right after init. It will become useful, including for auxiliary function such as CEC or ARC, after the first mode is set. This is a problem on systems where the display is using another interface like DSI or CVBS. This change refactor the init and mode change callback to power up the PHY on init and leave only what is necessary for mode changes in the related function. This is enough to fix CEC operation when HDMI display is not enabled. Fixes: 3f68be7d8e96 ("drm/meson: Add support for HDMI encoder and DW-HDMI bridge + PHY") Signed-off-by: Jerome Brunet Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240426160256.3089978-2-jbrunet@baylibre.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240426160256.3089978-2-jbrunet@baylibre.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_dw_hdmi.c | 51 +++++++++------------------ 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index 5cd2b2ebbbd33..f8dd22d6e6c62 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -384,26 +384,6 @@ static int dw_hdmi_phy_init(struct dw_hdmi *hdmi, void *data, drm_mode_is_420_also(display, mode))) mode_is_420 = true; - /* Enable clocks */ - regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL, 0xffff, 0x100); - - /* Bring HDMITX MEM output of power down */ - regmap_update_bits(priv->hhi, HHI_MEM_PD_REG0, 0xff << 8, 0); - - /* Bring out of reset */ - dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_SW_RESET, 0); - - /* Enable internal pixclk, tmds_clk, spdif_clk, i2s_clk, cecclk */ - dw_hdmi_top_write_bits(dw_hdmi, HDMITX_TOP_CLK_CNTL, - 0x3, 0x3); - - /* Enable cec_clk and hdcp22_tmdsclk_en */ - dw_hdmi_top_write_bits(dw_hdmi, HDMITX_TOP_CLK_CNTL, - 0x3 << 4, 0x3 << 4); - - /* Enable normal output to PHY */ - dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_BIST_CNTL, BIT(12)); - /* TMDS pattern setup */ if (mode->clock > 340000 && !mode_is_420) { dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_01, @@ -425,20 +405,6 @@ static int dw_hdmi_phy_init(struct dw_hdmi *hdmi, void *data, /* Setup PHY parameters */ meson_hdmi_phy_setup_mode(dw_hdmi, mode, mode_is_420); - /* Setup PHY */ - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - 0xffff << 16, 0x0390 << 16); - - /* BIT_INVERT */ - if (dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxl-dw-hdmi") || - dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxm-dw-hdmi") || - dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-g12a-dw-hdmi")) - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - BIT(17), 0); - else - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - BIT(17), BIT(17)); - /* Disable clock, fifo, fifo_wr */ regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, 0xf, 0); @@ -656,6 +622,23 @@ static void meson_dw_hdmi_init(struct meson_dw_hdmi *meson_dw_hdmi) meson_dw_hdmi->data->top_write(meson_dw_hdmi, HDMITX_TOP_CLK_CNTL, 0xff); + /* Enable normal output to PHY */ + meson_dw_hdmi->data->top_write(meson_dw_hdmi, HDMITX_TOP_BIST_CNTL, BIT(12)); + + /* Setup PHY */ + regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, + 0xffff << 16, 0x0390 << 16); + + /* BIT_INVERT */ + if (dw_hdmi_is_compatible(meson_dw_hdmi, "amlogic,meson-gxl-dw-hdmi") || + dw_hdmi_is_compatible(meson_dw_hdmi, "amlogic,meson-gxm-dw-hdmi") || + dw_hdmi_is_compatible(meson_dw_hdmi, "amlogic,meson-g12a-dw-hdmi")) + regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, + BIT(17), 0); + else + regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, + BIT(17), BIT(17)); + /* Enable HDMI-TX Interrupt */ meson_dw_hdmi->data->top_write(meson_dw_hdmi, HDMITX_TOP_INTR_STAT_CLR, HDMITX_TOP_INTR_CORE); -- GitLab From 35c614caea4d5586fa3a38344e440d3017a10908 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 18:02:54 +0200 Subject: [PATCH 2173/2290] drm/meson: dw-hdmi: add bandgap setting for g12 [ Upstream commit 08001033121dd92b8297a5b7333636b466c30f13 ] When no mode is set, the utility pin appears to be grounded. No signal is getting through. This is problematic because ARC and eARC use this line and may do so even if no display mode is set. This change enable the bandgap setting on g12 chip, which fix the problem with the utility pin. This is done by restoring init values on PHY init and disable. Fixes: 3b7c1237a72a ("drm/meson: Add G12A support for the DW-HDMI Glue") Signed-off-by: Jerome Brunet Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240426160256.3089978-3-jbrunet@baylibre.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240426160256.3089978-3-jbrunet@baylibre.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_dw_hdmi.c | 43 ++++++++++++++++----------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index f8dd22d6e6c62..2c8e978eb9ab9 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -105,6 +105,8 @@ #define HHI_HDMI_CLK_CNTL 0x1cc /* 0x73 */ #define HHI_HDMI_PHY_CNTL0 0x3a0 /* 0xe8 */ #define HHI_HDMI_PHY_CNTL1 0x3a4 /* 0xe9 */ +#define PHY_CNTL1_INIT 0x03900000 +#define PHY_INVERT BIT(17) #define HHI_HDMI_PHY_CNTL2 0x3a8 /* 0xea */ #define HHI_HDMI_PHY_CNTL3 0x3ac /* 0xeb */ #define HHI_HDMI_PHY_CNTL4 0x3b0 /* 0xec */ @@ -129,6 +131,8 @@ struct meson_dw_hdmi_data { unsigned int addr); void (*dwc_write)(struct meson_dw_hdmi *dw_hdmi, unsigned int addr, unsigned int data); + u32 cntl0_init; + u32 cntl1_init; }; struct meson_dw_hdmi { @@ -458,7 +462,9 @@ static void dw_hdmi_phy_disable(struct dw_hdmi *hdmi, DRM_DEBUG_DRIVER("\n"); - regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0); + /* Fallback to init mode */ + regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL1, dw_hdmi->data->cntl1_init); + regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, dw_hdmi->data->cntl0_init); } static enum drm_connector_status dw_hdmi_read_hpd(struct dw_hdmi *hdmi, @@ -576,11 +582,22 @@ static const struct regmap_config meson_dw_hdmi_regmap_config = { .fast_io = true, }; -static const struct meson_dw_hdmi_data meson_dw_hdmi_gx_data = { +static const struct meson_dw_hdmi_data meson_dw_hdmi_gxbb_data = { .top_read = dw_hdmi_top_read, .top_write = dw_hdmi_top_write, .dwc_read = dw_hdmi_dwc_read, .dwc_write = dw_hdmi_dwc_write, + .cntl0_init = 0x0, + .cntl1_init = PHY_CNTL1_INIT | PHY_INVERT, +}; + +static const struct meson_dw_hdmi_data meson_dw_hdmi_gxl_data = { + .top_read = dw_hdmi_top_read, + .top_write = dw_hdmi_top_write, + .dwc_read = dw_hdmi_dwc_read, + .dwc_write = dw_hdmi_dwc_write, + .cntl0_init = 0x0, + .cntl1_init = PHY_CNTL1_INIT, }; static const struct meson_dw_hdmi_data meson_dw_hdmi_g12a_data = { @@ -588,6 +605,8 @@ static const struct meson_dw_hdmi_data meson_dw_hdmi_g12a_data = { .top_write = dw_hdmi_g12a_top_write, .dwc_read = dw_hdmi_g12a_dwc_read, .dwc_write = dw_hdmi_g12a_dwc_write, + .cntl0_init = 0x000b4242, /* Bandgap */ + .cntl1_init = PHY_CNTL1_INIT, }; static void meson_dw_hdmi_init(struct meson_dw_hdmi *meson_dw_hdmi) @@ -626,18 +645,8 @@ static void meson_dw_hdmi_init(struct meson_dw_hdmi *meson_dw_hdmi) meson_dw_hdmi->data->top_write(meson_dw_hdmi, HDMITX_TOP_BIST_CNTL, BIT(12)); /* Setup PHY */ - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - 0xffff << 16, 0x0390 << 16); - - /* BIT_INVERT */ - if (dw_hdmi_is_compatible(meson_dw_hdmi, "amlogic,meson-gxl-dw-hdmi") || - dw_hdmi_is_compatible(meson_dw_hdmi, "amlogic,meson-gxm-dw-hdmi") || - dw_hdmi_is_compatible(meson_dw_hdmi, "amlogic,meson-g12a-dw-hdmi")) - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - BIT(17), 0); - else - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - BIT(17), BIT(17)); + regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL1, meson_dw_hdmi->data->cntl1_init); + regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, meson_dw_hdmi->data->cntl0_init); /* Enable HDMI-TX Interrupt */ meson_dw_hdmi->data->top_write(meson_dw_hdmi, HDMITX_TOP_INTR_STAT_CLR, @@ -866,11 +875,11 @@ static const struct dev_pm_ops meson_dw_hdmi_pm_ops = { static const struct of_device_id meson_dw_hdmi_of_table[] = { { .compatible = "amlogic,meson-gxbb-dw-hdmi", - .data = &meson_dw_hdmi_gx_data }, + .data = &meson_dw_hdmi_gxbb_data }, { .compatible = "amlogic,meson-gxl-dw-hdmi", - .data = &meson_dw_hdmi_gx_data }, + .data = &meson_dw_hdmi_gxl_data }, { .compatible = "amlogic,meson-gxm-dw-hdmi", - .data = &meson_dw_hdmi_gx_data }, + .data = &meson_dw_hdmi_gxl_data }, { .compatible = "amlogic,meson-g12a-dw-hdmi", .data = &meson_dw_hdmi_g12a_data }, { } -- GitLab From 58cf43f758451933c4b3ccf2b8e98c53ff0ea1bf Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 2 May 2024 15:32:35 -0700 Subject: [PATCH 2174/2290] drm/connector: Add \n to message about demoting connector force-probes [ Upstream commit 6897204ea3df808d342c8e4613135728bc538bcd ] The debug print clearly lacks a \n at the end. Add it. Fixes: 8f86c82aba8b ("drm/connector: demote connector force-probes for non-master clients") Reviewed-by: Abhinav Kumar Reviewed-by: Simon Ser Reviewed-by: Dmitry Baryshkov Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240502153234.1.I2052f01c8d209d9ae9c300b87c6e4f60bd3cc99e@changeid Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_connector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 27de2a97f1d11..3d18d840ef3b6 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -2707,7 +2707,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, dev->mode_config.max_width, dev->mode_config.max_height); else - drm_dbg_kms(dev, "User-space requested a forced probe on [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe", + drm_dbg_kms(dev, "User-space requested a forced probe on [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe\n", connector->base.id, connector->name); } -- GitLab From 12bb8b6a2eeaa0dc0f12fc841efdd061c6899f67 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 May 2024 13:32:17 -0500 Subject: [PATCH 2175/2290] dm/amd/pm: Fix problems with reboot/shutdown for some SMU 13.0.4/13.0.11 users [ Upstream commit cd94d1b182d2986378550c9087571991bfee01d4 ] Limit the workaround introduced by commit 31729e8c21ec ("drm/amd/pm: fixes a random hang in S4 for SMU v13.0.4/11") to only run in the s4 path. Cc: Tim Huang Fixes: 31729e8c21ec ("drm/amd/pm: fixes a random hang in S4 for SMU v13.0.4/11") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3351 Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 21b374d121819..5de31961319a2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -222,7 +222,7 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en) struct amdgpu_device *adev = smu->adev; int ret = 0; - if (!en && !adev->in_s0ix) { + if (!en && adev->in_s4) { /* Adds a GFX reset as workaround just before sending the * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering * an invalid state. -- GitLab From a35ebde68c77a4103aae286e18378003400c63b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 7 Oct 2022 16:44:44 +0300 Subject: [PATCH 2176/2290] gpiolib: cdev: Add missing header(s) [ Upstream commit 52ee7c02f67808afa533c523fa3e4b66c54ea758 ] Do not imply that some of the generic headers may be always included. Instead, include explicitly what we are direct user of. While at it, sort headers alphabetically. Signed-off-by: Andy Shevchenko Rewiewed-by: Kent Gibson Stable-dep-of: ee0166b637a5 ("gpiolib: cdev: fix uninitialised kfifo") Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-cdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index e40c93f0960b4..d2027212901fd 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -20,11 +21,12 @@ #include #include #include +#include #include #include #include #include -#include + #include #include "gpiolib.h" -- GitLab From 9ed256d2949135e1d6f44fa91750dd7c6be54e00 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Tue, 19 Dec 2023 08:41:54 +0800 Subject: [PATCH 2177/2290] gpiolib: cdev: relocate debounce_period_us from struct gpio_desc [ Upstream commit 9344e34e7992fec95ce6210d95ac01437dd327ab ] Store the debounce period for a requested line locally, rather than in the debounce_period_us field in the gpiolib struct gpio_desc. Add a global tree of lines containing supplemental line information to make the debounce period available to be reported by the GPIO_V2_GET_LINEINFO_IOCTL and the line change notifier. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Stable-dep-of: ee0166b637a5 ("gpiolib: cdev: fix uninitialised kfifo") Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-cdev.c | 165 +++++++++++++++++++++++++++++++----- 1 file changed, 142 insertions(+), 23 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index d2027212901fd..6ee1074d49152 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -465,6 +467,7 @@ out_free_lh: /** * struct line - contains the state of a requested line + * @node: to store the object in supinfo_tree if supplemental * @desc: the GPIO descriptor for this line. * @req: the corresponding line request * @irq: the interrupt triggered in response to events on this GPIO @@ -477,6 +480,7 @@ out_free_lh: * @line_seqno: the seqno for the current edge event in the sequence of * events for this line. * @work: the worker that implements software debouncing + * @debounce_period_us: the debounce period in microseconds * @sw_debounced: flag indicating if the software debouncer is active * @level: the current debounced physical level of the line * @hdesc: the Hardware Timestamp Engine (HTE) descriptor @@ -485,6 +489,7 @@ out_free_lh: * @last_seqno: the last sequence number before debounce period expires */ struct line { + struct rb_node node; struct gpio_desc *desc; /* * -- edge detector specific fields -- @@ -518,6 +523,15 @@ struct line { * -- debouncer specific fields -- */ struct delayed_work work; + /* + * debounce_period_us is accessed by debounce_irq_handler() and + * process_hw_ts() which are disabled when modified by + * debounce_setup(), edge_detector_setup() or edge_detector_stop() + * or can live with a stale version when updated by + * edge_detector_update(). + * The modifying functions are themselves mutually exclusive. + */ + unsigned int debounce_period_us; /* * sw_debounce is accessed by linereq_set_config(), which is the * only setter, and linereq_get_values(), which can live with a @@ -550,6 +564,17 @@ struct line { #endif /* CONFIG_HTE */ }; +/* + * a rbtree of the struct lines containing supplemental info. + * Used to populate gpio_v2_line_info with cdev specific fields not contained + * in the struct gpio_desc. + * A line is determined to contain supplemental information by + * line_has_supinfo(). + */ +static struct rb_root supinfo_tree = RB_ROOT; +/* covers supinfo_tree */ +static DEFINE_SPINLOCK(supinfo_lock); + /** * struct linereq - contains the state of a userspace line request * @gdev: the GPIO device the line request pertains to @@ -562,7 +587,8 @@ struct line { * this line request. Note that this is not used when @num_lines is 1, as * the line_seqno is then the same and is cheaper to calculate. * @config_mutex: mutex for serializing ioctl() calls to ensure consistency - * of configuration, particularly multi-step accesses to desc flags. + * of configuration, particularly multi-step accesses to desc flags and + * changes to supinfo status. * @lines: the lines held by this line request, with @num_lines elements. */ struct linereq { @@ -577,6 +603,103 @@ struct linereq { struct line lines[]; }; +static void supinfo_insert(struct line *line) +{ + struct rb_node **new = &(supinfo_tree.rb_node), *parent = NULL; + struct line *entry; + + guard(spinlock)(&supinfo_lock); + + while (*new) { + entry = container_of(*new, struct line, node); + + parent = *new; + if (line->desc < entry->desc) { + new = &((*new)->rb_left); + } else if (line->desc > entry->desc) { + new = &((*new)->rb_right); + } else { + /* this should never happen */ + WARN(1, "duplicate line inserted"); + return; + } + } + + rb_link_node(&line->node, parent, new); + rb_insert_color(&line->node, &supinfo_tree); +} + +static void supinfo_erase(struct line *line) +{ + guard(spinlock)(&supinfo_lock); + + rb_erase(&line->node, &supinfo_tree); +} + +static struct line *supinfo_find(struct gpio_desc *desc) +{ + struct rb_node *node = supinfo_tree.rb_node; + struct line *line; + + while (node) { + line = container_of(node, struct line, node); + if (desc < line->desc) + node = node->rb_left; + else if (desc > line->desc) + node = node->rb_right; + else + return line; + } + return NULL; +} + +static void supinfo_to_lineinfo(struct gpio_desc *desc, + struct gpio_v2_line_info *info) +{ + struct gpio_v2_line_attribute *attr; + struct line *line; + + guard(spinlock)(&supinfo_lock); + + line = supinfo_find(desc); + if (!line) + return; + + attr = &info->attrs[info->num_attrs]; + attr->id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE; + attr->debounce_period_us = READ_ONCE(line->debounce_period_us); + info->num_attrs++; +} + +static inline bool line_has_supinfo(struct line *line) +{ + return READ_ONCE(line->debounce_period_us); +} + +/* + * Checks line_has_supinfo() before and after the change to avoid unnecessary + * supinfo_tree access. + * Called indirectly by linereq_create() or linereq_set_config() so line + * is already protected from concurrent changes. + */ +static void line_set_debounce_period(struct line *line, + unsigned int debounce_period_us) +{ + bool was_suppl = line_has_supinfo(line); + + WRITE_ONCE(line->debounce_period_us, debounce_period_us); + + /* if supinfo status is unchanged then we're done */ + if (line_has_supinfo(line) == was_suppl) + return; + + /* supinfo status has changed, so update the tree */ + if (was_suppl) + supinfo_erase(line); + else + supinfo_insert(line); +} + #define GPIO_V2_LINE_BIAS_FLAGS \ (GPIO_V2_LINE_FLAG_BIAS_PULL_UP | \ GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN | \ @@ -714,7 +837,7 @@ static enum hte_return process_hw_ts(struct hte_ts_data *ts, void *p) line->total_discard_seq++; line->last_seqno = ts->seq; mod_delayed_work(system_wq, &line->work, - usecs_to_jiffies(READ_ONCE(line->desc->debounce_period_us))); + usecs_to_jiffies(READ_ONCE(line->debounce_period_us))); } else { if (unlikely(ts->seq < line->line_seqno)) return HTE_CB_HANDLED; @@ -855,7 +978,7 @@ static irqreturn_t debounce_irq_handler(int irq, void *p) struct line *line = p; mod_delayed_work(system_wq, &line->work, - usecs_to_jiffies(READ_ONCE(line->desc->debounce_period_us))); + usecs_to_jiffies(READ_ONCE(line->debounce_period_us))); return IRQ_HANDLED; } @@ -937,7 +1060,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us) /* try hardware */ ret = gpiod_set_debounce(line->desc, debounce_period_us); if (!ret) { - WRITE_ONCE(line->desc->debounce_period_us, debounce_period_us); + line_set_debounce_period(line, debounce_period_us); return ret; } if (ret != -ENOTSUPP) @@ -1016,8 +1139,7 @@ static void edge_detector_stop(struct line *line) cancel_delayed_work_sync(&line->work); WRITE_ONCE(line->sw_debounced, 0); WRITE_ONCE(line->edflags, 0); - if (line->desc) - WRITE_ONCE(line->desc->debounce_period_us, 0); + line_set_debounce_period(line, 0); /* do not change line->level - see comment in debounced_value() */ } @@ -1042,7 +1164,7 @@ static int edge_detector_setup(struct line *line, ret = debounce_setup(line, debounce_period_us); if (ret) return ret; - WRITE_ONCE(line->desc->debounce_period_us, debounce_period_us); + line_set_debounce_period(line, debounce_period_us); } /* detection disabled or sw debouncer will provide edge detection */ @@ -1084,12 +1206,12 @@ static int edge_detector_update(struct line *line, gpio_v2_line_config_debounce_period(lc, line_idx); if ((active_edflags == edflags) && - (READ_ONCE(line->desc->debounce_period_us) == debounce_period_us)) + (READ_ONCE(line->debounce_period_us) == debounce_period_us)) return 0; /* sw debounced and still will be...*/ if (debounce_period_us && READ_ONCE(line->sw_debounced)) { - WRITE_ONCE(line->desc->debounce_period_us, debounce_period_us); + line_set_debounce_period(line, debounce_period_us); return 0; } @@ -1566,13 +1688,18 @@ static ssize_t linereq_read(struct file *file, char __user *buf, static void linereq_free(struct linereq *lr) { + struct line *line; unsigned int i; for (i = 0; i < lr->num_lines; i++) { - if (lr->lines[i].desc) { - edge_detector_stop(&lr->lines[i]); - gpiod_free(lr->lines[i].desc); - } + line = &lr->lines[i]; + if (!line->desc) + continue; + + edge_detector_stop(line); + if (line_has_supinfo(line)) + supinfo_erase(line); + gpiod_free(line->desc); } kfifo_free(&lr->events); kfree(lr->label); @@ -2239,8 +2366,6 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc, struct gpio_chip *gc = desc->gdev->chip; bool ok_for_pinctrl; unsigned long flags; - u32 debounce_period_us; - unsigned int num_attrs = 0; memset(info, 0, sizeof(*info)); info->offset = gpio_chip_hwgpio(desc); @@ -2307,14 +2432,6 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc, else if (test_bit(FLAG_EVENT_CLOCK_HTE, &desc->flags)) info->flags |= GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE; - debounce_period_us = READ_ONCE(desc->debounce_period_us); - if (debounce_period_us) { - info->attrs[num_attrs].id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE; - info->attrs[num_attrs].debounce_period_us = debounce_period_us; - num_attrs++; - } - info->num_attrs = num_attrs; - spin_unlock_irqrestore(&gpio_lock, flags); } @@ -2420,6 +2537,7 @@ static int lineinfo_get(struct gpio_chardev_data *cdev, void __user *ip, return -EBUSY; } gpio_desc_to_lineinfo(desc, &lineinfo); + supinfo_to_lineinfo(desc, &lineinfo); if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) { if (watch) @@ -2523,6 +2641,7 @@ static int lineinfo_changed_notify(struct notifier_block *nb, chg.event_type = action; chg.timestamp_ns = ktime_get_ns(); gpio_desc_to_lineinfo(desc, &chg.info); + supinfo_to_lineinfo(desc, &chg.info); ret = kfifo_in_spinlocked(&cdev->events, &chg, 1, &cdev->wait.lock); if (ret) -- GitLab From 1a51e24404d77bb3307c1e39eee0d8e86febb1a5 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Fri, 10 May 2024 14:53:42 +0800 Subject: [PATCH 2178/2290] gpiolib: cdev: fix uninitialised kfifo [ Upstream commit ee0166b637a5e376118e9659e5b4148080f1d27e ] If a line is requested with debounce, and that results in debouncing in software, and the line is subsequently reconfigured to enable edge detection then the allocation of the kfifo to contain edge events is overlooked. This results in events being written to and read from an uninitialised kfifo. Read events are returned to userspace. Initialise the kfifo in the case where the software debounce is already active. Fixes: 65cff7046406 ("gpiolib: cdev: support setting debounce") Signed-off-by: Kent Gibson Link: https://lore.kernel.org/r/20240510065342.36191-1-warthog618@gmail.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-cdev.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 6ee1074d49152..97e8335716b01 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1201,6 +1201,8 @@ static int edge_detector_update(struct line *line, struct gpio_v2_line_config *lc, unsigned int line_idx, u64 edflags) { + u64 eflags; + int ret; u64 active_edflags = READ_ONCE(line->edflags); unsigned int debounce_period_us = gpio_v2_line_config_debounce_period(lc, line_idx); @@ -1212,6 +1214,18 @@ static int edge_detector_update(struct line *line, /* sw debounced and still will be...*/ if (debounce_period_us && READ_ONCE(line->sw_debounced)) { line_set_debounce_period(line, debounce_period_us); + /* + * ensure event fifo is initialised if edge detection + * is now enabled. + */ + eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS; + if (eflags && !kfifo_initialized(&line->req->events)) { + ret = kfifo_alloc(&line->req->events, + line->req->event_buffer_size, + GFP_KERNEL); + if (ret) + return ret; + } return 0; } -- GitLab From 02f5300f6827206f6e48a77f51e6264993695e5c Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Tue, 9 Apr 2024 10:38:58 -0400 Subject: [PATCH 2179/2290] drm/amd/display: Atom Integrated System Info v2_2 for DCN35 [ Upstream commit 9a35d205f466501dcfe5625ca313d944d0ac2d60 ] New request from KMD/VBIOS in order to support new UMA carveout model. This fixes a null dereference from accessing Ctx->dc_bios->integrated_info while it was NULL. DAL parses through the BIOS and extracts the necessary integrated_info but was missing a case for the new BIOS version 2.3. Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Gabe Teeger Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 93e40e0a15087..4d2590964a204 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2962,6 +2962,7 @@ static enum bp_result construct_integrated_info( result = get_integrated_info_v2_1(bp, info); break; case 2: + case 3: result = get_integrated_info_v2_2(bp, info); break; default: -- GitLab From f82f7220af4dbc3da1a6b9f8153e7882bdf1d91b Mon Sep 17 00:00:00 2001 From: Leah Rumancik Date: Thu, 9 May 2024 13:17:35 -0700 Subject: [PATCH 2180/2290] MAINTAINERS: add leah to 6.1 MAINTAINERS file I've been trying to get backports rolling to 6.1.y. Update MAINTAINERS file so backports requests / questions can get routed appropriately. Signed-off-by: Leah Rumancik Reviewed-by: Darrick J. Wong Signed-off-by: Sasha Levin --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ecf4d0c8f446e..4b19dfb5d2fd4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22557,6 +22557,7 @@ F: include/xen/swiotlb-xen.h XFS FILESYSTEM C: irc://irc.oftc.net/xfs +M: Leah Rumancik M: Darrick J. Wong L: linux-xfs@vger.kernel.org S: Supported -- GitLab From 5c25b169f9a0b34ee410891a96bc9d7b9ed6f9be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 21 Mar 2024 11:32:02 +0100 Subject: [PATCH 2181/2290] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move() v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d3a9331a6591e9df64791e076f6591f440af51c3 upstream. This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap. The basic problem here is that after the move the old location is simply not available any more. Some fixes were suggested, but essentially we should call the move notification before actually moving things because only this way we have the correct order for DMA-buf and VM move notifications as well. Also rework the statistic handling so that we don't update the eviction counter before the move. v2: add missing NULL check Signed-off-by: Christian König Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171 Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher CC: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 14 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 48 ++++++++++++---------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cde2fd2f71171..9a111988b7f15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1222,14 +1222,18 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space + * @new_mem: new resource for backing the BO * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct ttm_resource *old_mem = bo->resource; struct amdgpu_bo *abo; if (!amdgpu_bo_is_amdgpu_bo(bo)) @@ -1241,12 +1245,12 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) amdgpu_bo_kunmap(abo); if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && - bo->resource->mem_type != TTM_PL_SYSTEM) + old_mem && old_mem->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); - /* remember the eviction */ - if (evict) - atomic64_inc(&adev->num_evictions); + /* move_notify is called before move happens */ + trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, + old_mem ? old_mem->mem_type : -1); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 2ada421e79e4f..6dcd7bab42fbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -312,7 +312,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dfb9d42007730..7afefaa374276 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -483,14 +483,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == TTM_PL_SYSTEM && (new_mem->mem_type == TTM_PL_TT || new_mem->mem_type == AMDGPU_PL_PREEMPT)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if ((old_mem->mem_type == TTM_PL_TT || old_mem->mem_type == AMDGPU_PL_PREEMPT) && @@ -500,9 +502,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == AMDGPU_PL_GDS || @@ -512,8 +515,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, new_mem->mem_type == AMDGPU_PL_GWS || new_mem->mem_type == AMDGPU_PL_OA) { /* Nothing to save here */ + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (bo->type == ttm_bo_type_device && @@ -525,22 +529,23 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; } - if (adev->mman.buffer_funcs_enabled) { - if (((old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_VRAM) || - (old_mem->mem_type == TTM_PL_VRAM && - new_mem->mem_type == TTM_PL_SYSTEM))) { - hop->fpfn = 0; - hop->lpfn = 0; - hop->mem_type = TTM_PL_TT; - hop->flags = TTM_PL_FLAG_TEMPORARY; - return -EMULTIHOP; - } + if (adev->mman.buffer_funcs_enabled && + ((old_mem->mem_type == TTM_PL_SYSTEM && + new_mem->mem_type == TTM_PL_VRAM) || + (old_mem->mem_type == TTM_PL_VRAM && + new_mem->mem_type == TTM_PL_SYSTEM))) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = TTM_PL_TT; + hop->flags = TTM_PL_FLAG_TEMPORARY; + return -EMULTIHOP; + } + amdgpu_bo_move_notify(bo, evict, new_mem); + if (adev->mman.buffer_funcs_enabled) r = amdgpu_move_blit(bo, evict, new_mem, old_mem); - } else { + else r = -ENODEV; - } if (r) { /* Check that all memory is CPU accessible */ @@ -555,11 +560,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); -out: - /* update statistics */ + /* update statistics after the move */ + if (evict) + atomic64_inc(&adev->num_evictions); atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict); return 0; } @@ -1505,7 +1509,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false); + amdgpu_bo_move_notify(bo, false, NULL); } static struct ttm_device_funcs amdgpu_bo_driver = { -- GitLab From 53f2bfce46d8fd5c9d14d0067ea73144da55748b Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 21 Dec 2023 11:47:45 +0300 Subject: [PATCH 2182/2290] btrfs: fix kvcalloc() arguments order in btrfs_ioctl_send() commit 6ff09b6b8c2fb6b3edda4ffaa173153a40653067 upstream. When compiling with gcc version 14.0.0 20231220 (experimental) and W=1, I've noticed the following warning: fs/btrfs/send.c: In function 'btrfs_ioctl_send': fs/btrfs/send.c:8208:44: warning: 'kvcalloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Wcalloc-transposed-args] 8208 | sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots), | ^ Since 'n' and 'size' arguments of 'kvcalloc()' are multiplied to calculate the final size, their actual order doesn't affect the result and so this is not a bug. But it's still worth to fix it. Signed-off-by: Dmitry Antipov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 754a9fb0165fa..ec3db315f5618 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7955,8 +7955,8 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) sctx->rbtree_new_refs = RB_ROOT; sctx->rbtree_deleted_refs = RB_ROOT; - sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots), - arg->clone_sources_count + 1, + sctx->clone_roots = kvcalloc(arg->clone_sources_count + 1, + sizeof(*sctx->clone_roots), GFP_KERNEL); if (!sctx->clone_roots) { ret = -ENOMEM; -- GitLab From 4ee0941da10e8fdcdb34756b877efd3282594c1f Mon Sep 17 00:00:00 2001 From: Thanassis Avgerinos Date: Wed, 17 Apr 2024 11:30:02 -0400 Subject: [PATCH 2183/2290] firewire: nosy: ensure user_length is taken into account when fetching packet contents commit 38762a0763c10c24a4915feee722d7aa6e73eb98 upstream. Ensure that packet_buffer_get respects the user_length provided. If the length of the head packet exceeds the user_length, packet_buffer_get will now return 0 to signify to the user that no data were read and a larger buffer size is required. Helps prevent user space overflows. Signed-off-by: Thanassis Avgerinos Signed-off-by: Takashi Sakamoto Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/nosy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c index b0d671db178a8..ea31ac7ac1ca9 100644 --- a/drivers/firewire/nosy.c +++ b/drivers/firewire/nosy.c @@ -148,10 +148,12 @@ packet_buffer_get(struct client *client, char __user *data, size_t user_length) if (atomic_read(&buffer->size) == 0) return -ENODEV; - /* FIXME: Check length <= user_length. */ + length = buffer->head->length; + + if (length > user_length) + return 0; end = buffer->data + buffer->capacity; - length = buffer->head->length; if (&buffer->head->data[length] < end) { if (copy_to_user(data, buffer->head->data, length)) -- GitLab From b548c53bc3ab83dc6fc86c8e840f013b2032267a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 6 May 2024 13:28:59 -0700 Subject: [PATCH 2184/2290] Reapply "drm/qxl: simplify qxl_fence_wait" commit 3628e0383dd349f02f882e612ab6184e4bb3dc10 upstream. This reverts commit 07ed11afb68d94eadd4ffc082b97c2331307c5ea. Stephen Rostedt reports: "I went to run my tests on my VMs and the tests hung on boot up. Unfortunately, the most I ever got out was: [ 93.607888] Testing event system initcall: OK [ 93.667730] Running tests on all trace events: [ 93.669757] Testing all events: OK [ 95.631064] ------------[ cut here ]------------ Timed out after 60 seconds" and further debugging points to a possible circular locking dependency between the console_owner locking and the worker pool locking. Reverting the commit allows Steve's VM to boot to completion again. [ This may obviously result in the "[TTM] Buffer eviction failed" messages again, which was the reason for that original revert. But at this point this seems preferable to a non-booting system... ] Reported-and-bisected-by: Steven Rostedt Link: https://lore.kernel.org/all/20240502081641.457aa25f@gandalf.local.home/ Acked-by: Maxime Ripard Cc: Alex Constantino Cc: Maxime Ripard Cc: Timo Lindfors Cc: Dave Airlie Cc: Gerd Hoffmann Cc: Maarten Lankhorst Cc: Thomas Zimmermann Cc: Daniel Vetter Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_release.c | 50 ++++--------------------------- include/linux/dma-fence.h | 7 ----- 2 files changed, 5 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 9febc8b73f09e..368d26da0d6a2 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -58,56 +58,16 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct qxl_device *qdev; - struct qxl_release *release; - int count = 0, sc = 0; - bool have_drawable_releases; unsigned long cur, end = jiffies + timeout; qdev = container_of(fence->lock, struct qxl_device, release_lock); - release = container_of(fence, struct qxl_release, base); - have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE; - -retry: - sc++; - - if (dma_fence_is_signaled(fence)) - goto signaled; - - qxl_io_notify_oom(qdev); - - for (count = 0; count < 11; count++) { - if (!qxl_queue_garbage_collect(qdev, true)) - break; - - if (dma_fence_is_signaled(fence)) - goto signaled; - } - - if (dma_fence_is_signaled(fence)) - goto signaled; - if (have_drawable_releases || sc < 4) { - if (sc > 2) - /* back off */ - usleep_range(500, 1000); - - if (time_after(jiffies, end)) - return 0; - - if (have_drawable_releases && sc > 300) { - DMA_FENCE_WARN(fence, - "failed to wait on release %llu after spincount %d\n", - fence->context & ~0xf0000000, sc); - goto signaled; - } - goto retry; - } - /* - * yeah, original sync_obj_wait gave up after 3 spins when - * have_drawable_releases is not set. - */ + if (!wait_event_timeout(qdev->release_event, + (dma_fence_is_signaled(fence) || + (qxl_io_notify_oom(qdev), 0)), + timeout)) + return 0; -signaled: cur = jiffies; if (time_after(cur, end)) return 0; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 5d6a5f3097cd0..b79097b9070b3 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -659,11 +659,4 @@ static inline bool dma_fence_is_container(struct dma_fence *fence) return dma_fence_is_array(fence) || dma_fence_is_chain(fence); } -#define DMA_FENCE_WARN(f, fmt, args...) \ - do { \ - struct dma_fence *__ff = (f); \ - pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ - ##args); \ - } while (0) - #endif /* __LINUX_DMA_FENCE_H */ -- GitLab From 8762bf944a4b5b42e7fae3825988df12150cd467 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Mon, 3 Apr 2023 18:48:10 +0900 Subject: [PATCH 2185/2290] rust: error: Rename to_kernel_errno() -> to_errno() commit 46384d0990bf99ed8b597e8794ea581e2a647710 upstream. This is kernel code, so specifying "kernel" is redundant. Let's simplify things and just call it to_errno(). Reviewed-by: Gary Guo Reviewed-by: Martin Rodriguez Reboredo Signed-off-by: Asahi Lina Link: https://lore.kernel.org/r/20230224-rust-error-v3-1-03779bddc02b@asahilina.net Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/kernel/error.rs | 2 +- rust/macros/module.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 466b2a8fe569b..4a3a6306cfe19 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -25,7 +25,7 @@ pub struct Error(core::ffi::c_int); impl Error { /// Returns the kernel error code. - pub fn to_kernel_errno(self) -> core::ffi::c_int { + pub fn to_errno(self) -> core::ffi::c_int { self.0 } } diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 186a5b8be23cd..49f7ce31c4cfa 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -258,7 +258,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { return 0; }} Err(e) => {{ - return e.to_kernel_errno(); + return e.to_errno(); }} }} }} -- GitLab From 0a0464cd41512c5566fce822524fb447b7e6d737 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 10 Feb 2023 16:26:22 +0100 Subject: [PATCH 2186/2290] rust: fix regexp in scripts/is_rust_module.sh commit ccc4505454db10402d5284f22d8b7db62e636fc5 upstream. nm can use "R" or "r" to show read-only data sections, but scripts/is_rust_module.sh can only recognize "r", so with some versions of binutils it can fail to detect if a module is a Rust module or not. Right now we're using this script only to determine if we need to skip BTF generation (that is disabled globally if CONFIG_RUST is enabled), but it's still nice to fix this script to do the proper job. Moreover, with this patch applied I can also relax the constraint of "RUST depends on !DEBUG_INFO_BTF" and build a kernel with Rust and BTF enabled at the same time (of course BTF generation is still skipped for Rust modules). [ Miguel: The actual reason is likely to be a change on the Rust compiler between 1.61.0 and 1.62.0: echo '#[used] static S: () = ();' | rustup run 1.61.0 rustc --emit=obj --crate-type=lib - && nm rust_out.o echo '#[used] static S: () = ();' | rustup run 1.62.0 rustc --emit=obj --crate-type=lib - && nm rust_out.o Gives: 0000000000000000 r _ZN8rust_out1S17h48027ce0da975467E 0000000000000000 R _ZN8rust_out1S17h58e1f3d9c0e97cefE See https://godbolt.org/z/KE6jneoo4. ] Signed-off-by: Andrea Righi Reviewed-by: Vincenzo Palazzo Reviewed-by: Eric Curtin Reviewed-by: Martin Rodriguez Reboredo Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- scripts/is_rust_module.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/is_rust_module.sh b/scripts/is_rust_module.sh index 28b3831a7593f..464761a7cf7f2 100755 --- a/scripts/is_rust_module.sh +++ b/scripts/is_rust_module.sh @@ -13,4 +13,4 @@ set -e # # In the future, checking for the `.comment` section may be another # option, see https://github.com/rust-lang/rust/pull/97550. -${NM} "$*" | grep -qE '^[0-9a-fA-F]+ r _R[^[:space:]]+16___IS_RUST_MODULE[^[:space:]]*$' +${NM} "$*" | grep -qE '^[0-9a-fA-F]+ [Rr] _R[^[:space:]]+16___IS_RUST_MODULE[^[:space:]]*$' -- GitLab From 15eb8edb09bb73dbbfaa7f491d77d669f2470318 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 4 Jul 2023 07:21:36 +0200 Subject: [PATCH 2187/2290] btf, scripts: rust: drop is_rust_module.sh commit 41bdc6decda074afc4d8f8ba44c69b08d0e9aff6 upstream. With commit c1177979af9c ("btf, scripts: Exclude Rust CUs with pahole") we are now able to use pahole directly to identify Rust compilation units (CUs) and exclude them from generating BTF debugging information (when DEBUG_INFO_BTF is enabled). And if pahole doesn't support the --lang-exclude flag, we can't enable both RUST and DEBUG_INFO_BTF at the same time. So, in any case, the script is_rust_module.sh is just redundant and we can drop it. NOTE: we may also be able to drop the "Rust loadable module" mark inside Rust modules, but it seems safer to keep it for now to make sure we are not breaking any external tool that may potentially rely on it. Signed-off-by: Andrea Righi Reviewed-by: Nathan Chancellor Tested-by: Eric Curtin Reviewed-by: Eric Curtin Reviewed-by: Neal Gompa Reviewed-by: Masahiro Yamada Reviewed-by: Martin Rodriguez Reboredo Acked-by: Daniel Xu Link: https://lore.kernel.org/r/20230704052136.155445-1-andrea.righi@canonical.com [ Picked the `Reviewed-by`s from the old patch too. ] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/macros/module.rs | 2 +- scripts/Makefile.modfinal | 2 -- scripts/is_rust_module.sh | 16 ---------------- 3 files changed, 1 insertion(+), 19 deletions(-) delete mode 100755 scripts/is_rust_module.sh diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 49f7ce31c4cfa..ee9674c50a32c 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -179,7 +179,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { /// Used by the printing macros, e.g. [`info!`]. const __LOG_PREFIX: &[u8] = b\"{name}\\0\"; - /// The \"Rust loadable module\" mark, for `scripts/is_rust_module.sh`. + /// The \"Rust loadable module\" mark. // // This may be best done another way later on, e.g. as a new modinfo // key or a new section. For the moment, keep it simple. diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 650d59388336f..0faee3a477057 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -41,8 +41,6 @@ quiet_cmd_btf_ko = BTF [M] $@ cmd_btf_ko = \ if [ ! -f vmlinux ]; then \ printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ - elif [ -n "$(CONFIG_RUST)" ] && $(srctree)/scripts/is_rust_module.sh $@; then \ - printf "Skipping BTF generation for %s because it's a Rust module\n" $@ 1>&2; \ else \ LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) --btf_base vmlinux $@; \ $(RESOLVE_BTFIDS) -b vmlinux $@; \ diff --git a/scripts/is_rust_module.sh b/scripts/is_rust_module.sh deleted file mode 100755 index 464761a7cf7f2..0000000000000 --- a/scripts/is_rust_module.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# is_rust_module.sh module.ko -# -# Returns `0` if `module.ko` is a Rust module, `1` otherwise. - -set -e - -# Using the `16_` prefix ensures other symbols with the same substring -# are not picked up (even if it would be unlikely). The last part is -# used just in case LLVM decides to use the `.` suffix. -# -# In the future, checking for the `.comment` section may be another -# option, see https://github.com/rust-lang/rust/pull/97550. -${NM} "$*" | grep -qE '^[0-9a-fA-F]+ [Rr] _R[^[:space:]]+16___IS_RUST_MODULE[^[:space:]]*$' -- GitLab From ca99731c13e4f2f8ebc1a7afbd54aad66f1bf175 Mon Sep 17 00:00:00 2001 From: Thomas Bertschinger Date: Tue, 6 Feb 2024 08:38:06 -0700 Subject: [PATCH 2188/2290] rust: module: place generated init_module() function in .init.text commit 1b6170ff7a203a5e8354f19b7839fe8b897a9c0d upstream. Currently Rust kernel modules have their init code placed in the `.text` section of the .ko file. I don't think this causes any real problems for Rust modules as long as all code called during initialization lives in `.text`. However, if a Rust `init_module()` function (that lives in `.text`) calls a function marked with `__init` (in C) or `#[link_section = ".init.text"]` (in Rust), then a warning is generated by modpost because that function lives in `.init.text`. For example: WARNING: modpost: fs/bcachefs/bcachefs: section mismatch in reference: init_module+0x6 (section: .text) -> _RNvXCsj7d3tFpT5JS_15bcachefs_moduleNtB2_8BcachefsNtCsjDtqRIL3JAG_6kernel6Module4init (section: .init.text) I ran into this while experimenting with converting the bcachefs kernel module from C to Rust. The module's `init()`, written in Rust, calls C functions like `bch2_vfs_init()` which are placed in `.init.text`. This patch places the macro-generated `init_module()` Rust function in the `.init.text` section. It also marks `init_module()` as unsafe--now it may not be called after module initialization completes because it may be freed already. Note that this is not enough on its own to actually get all the module initialization code in that section. The module author must still add the `#[link_section = ".init.text"]` attribute to the Rust `init()` in the `impl kernel::Module` block in order to then call `__init` functions. However, this patch enables module authors do so, when previously it would not be possible (without warnings). Signed-off-by: Thomas Bertschinger Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20240206153806.567055-1-tahbertschinger@gmail.com [ Reworded title to add prefix. ] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/macros/module.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rust/macros/module.rs b/rust/macros/module.rs index ee9674c50a32c..76471161ffa66 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -202,10 +202,15 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { }}; // Loadable modules need to export the `{{init,cleanup}}_module` identifiers. + /// # Safety + /// + /// This function must not be called after module initialization, because it may be + /// freed after that completes. #[cfg(MODULE)] #[doc(hidden)] #[no_mangle] - pub extern \"C\" fn init_module() -> core::ffi::c_int {{ + #[link_section = \".init.text\"] + pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{ __init() }} -- GitLab From 398248fc45bf2694a61d4fda9dfd07b8eb3ff96c Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Mon, 1 Apr 2024 18:52:50 +0000 Subject: [PATCH 2189/2290] rust: macros: fix soundness issue in `module!` macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7044dcff8301b29269016ebd17df27c4736140d2 upstream. The `module!` macro creates glue code that are called by C to initialize the Rust modules using the `Module::init` function. Part of this glue code are the local functions `__init` and `__exit` that are used to initialize/destroy the Rust module. These functions are safe and also visible to the Rust mod in which the `module!` macro is invoked. This means that they can be called by other safe Rust code. But since they contain `unsafe` blocks that rely on only being called at the right time, this is a soundness issue. Wrap these generated functions inside of two private modules, this guarantees that the public functions cannot be called from the outside. Make the safe functions `unsafe` and add SAFETY comments. Cc: stable@vger.kernel.org Reported-by: Björn Roy Baron Closes: https://github.com/Rust-for-Linux/linux/issues/629 Fixes: 1fbde52bde73 ("rust: add `macros` crate") Signed-off-by: Benno Lossin Reviewed-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20240401185222.12015-1-benno.lossin@proton.me [ Moved `THIS_MODULE` out of the private-in-private modules since it should remain public, as Dirk Behme noticed [1]. Capitalized comments, avoided newline in non-list SAFETY comments and reworded to add Reported-by and newline. ] Link: https://rust-for-linux.zulipchat.com/#narrow/stream/291565-Help/topic/x/near/433512583 [1] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/macros/module.rs | 190 +++++++++++++++++++++++++----------------- 1 file changed, 115 insertions(+), 75 deletions(-) diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 76471161ffa66..031028b3dc41b 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -179,17 +179,6 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { /// Used by the printing macros, e.g. [`info!`]. const __LOG_PREFIX: &[u8] = b\"{name}\\0\"; - /// The \"Rust loadable module\" mark. - // - // This may be best done another way later on, e.g. as a new modinfo - // key or a new section. For the moment, keep it simple. - #[cfg(MODULE)] - #[doc(hidden)] - #[used] - static __IS_RUST_MODULE: () = (); - - static mut __MOD: Option<{type_}> = None; - // SAFETY: `__this_module` is constructed by the kernel at load time and will not be // freed until the module is unloaded. #[cfg(MODULE)] @@ -201,81 +190,132 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { kernel::ThisModule::from_ptr(core::ptr::null_mut()) }}; - // Loadable modules need to export the `{{init,cleanup}}_module` identifiers. - /// # Safety - /// - /// This function must not be called after module initialization, because it may be - /// freed after that completes. - #[cfg(MODULE)] - #[doc(hidden)] - #[no_mangle] - #[link_section = \".init.text\"] - pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{ - __init() - }} - - #[cfg(MODULE)] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn cleanup_module() {{ - __exit() - }} + // Double nested modules, since then nobody can access the public items inside. + mod __module_init {{ + mod __module_init {{ + use super::super::{type_}; + + /// The \"Rust loadable module\" mark. + // + // This may be best done another way later on, e.g. as a new modinfo + // key or a new section. For the moment, keep it simple. + #[cfg(MODULE)] + #[doc(hidden)] + #[used] + static __IS_RUST_MODULE: () = (); + + static mut __MOD: Option<{type_}> = None; + + // Loadable modules need to export the `{{init,cleanup}}_module` identifiers. + /// # Safety + /// + /// This function must not be called after module initialization, because it may be + /// freed after that completes. + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + #[link_section = \".init.text\"] + pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{ + // SAFETY: This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name. + unsafe {{ __init() }} + }} - // Built-in modules are initialized through an initcall pointer - // and the identifiers need to be unique. - #[cfg(not(MODULE))] - #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] - #[doc(hidden)] - #[link_section = \"{initcall_section}\"] - #[used] - pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init; + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn cleanup_module() {{ + // SAFETY: + // - This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name, + // - furthermore it is only called after `init_module` has returned `0` + // (which delegates to `__init`). + unsafe {{ __exit() }} + }} - #[cfg(not(MODULE))] - #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] - core::arch::global_asm!( - r#\".section \"{initcall_section}\", \"a\" - __{name}_initcall: - .long __{name}_init - . - .previous - \"# - ); + // Built-in modules are initialized through an initcall pointer + // and the identifiers need to be unique. + #[cfg(not(MODULE))] + #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] + #[doc(hidden)] + #[link_section = \"{initcall_section}\"] + #[used] + pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init; + + #[cfg(not(MODULE))] + #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] + core::arch::global_asm!( + r#\".section \"{initcall_section}\", \"a\" + __{name}_initcall: + .long __{name}_init - . + .previous + \"# + ); + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{ + // SAFETY: This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // placement above in the initcall section. + unsafe {{ __init() }} + }} - #[cfg(not(MODULE))] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{ - __init() - }} + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_exit() {{ + // SAFETY: + // - This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name, + // - furthermore it is only called after `__{name}_init` has returned `0` + // (which delegates to `__init`). + unsafe {{ __exit() }} + }} - #[cfg(not(MODULE))] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn __{name}_exit() {{ - __exit() - }} + /// # Safety + /// + /// This function must only be called once. + unsafe fn __init() -> core::ffi::c_int {{ + match <{type_} as kernel::Module>::init(&super::super::THIS_MODULE) {{ + Ok(m) => {{ + // SAFETY: No data race, since `__MOD` can only be accessed by this + // module and there only `__init` and `__exit` access it. These + // functions are only called once and `__exit` cannot be called + // before or during `__init`. + unsafe {{ + __MOD = Some(m); + }} + return 0; + }} + Err(e) => {{ + return e.to_errno(); + }} + }} + }} - fn __init() -> core::ffi::c_int {{ - match <{type_} as kernel::Module>::init(&THIS_MODULE) {{ - Ok(m) => {{ + /// # Safety + /// + /// This function must + /// - only be called once, + /// - be called after `__init` has been called and returned `0`. + unsafe fn __exit() {{ + // SAFETY: No data race, since `__MOD` can only be accessed by this module + // and there only `__init` and `__exit` access it. These functions are only + // called once and `__init` was already called. unsafe {{ - __MOD = Some(m); + // Invokes `drop()` on `__MOD`, which should be used for cleanup. + __MOD = None; }} - return 0; - }} - Err(e) => {{ - return e.to_errno(); }} - }} - }} - fn __exit() {{ - unsafe {{ - // Invokes `drop()` on `__MOD`, which should be used for cleanup. - __MOD = None; + {modinfo} }} }} - - {modinfo} ", type_ = info.type_, name = info.name, -- GitLab From 91a7af8cb322523259adda97f9b8fd09c649f78f Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Wed, 20 Mar 2024 08:39:23 +0100 Subject: [PATCH 2190/2290] usb: typec: ucsi: Check for notifications after init commit 808a8b9e0b87bbc72bcc1f7ddfe5d04746e7ce56 upstream. The completion notification for the final SET_NOTIFICATION_ENABLE command during initialization can include a connector change notification. However, at the time this completion notification is processed, the ucsi struct is not ready to handle this notification. As a result the notification is ignored and the controller never sends an interrupt again. Re-check CCI for a pending connector state change after initialization is complete. Adjust the corresponding debug message accordingly. Fixes: 71a1fa0df2a3 ("usb: typec: ucsi: Store the notification mask") Cc: stable@vger.kernel.org Signed-off-by: Christian A. Ehrhardt Reviewed-by: Heikki Krogerus Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240320073927.1641788-3-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 98f335cbbcdea..1fd4aaf348047 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -855,7 +855,7 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num) struct ucsi_connector *con = &ucsi->connector[num - 1]; if (!(ucsi->ntfy & UCSI_ENABLE_NTFY_CONNECTOR_CHANGE)) { - dev_dbg(ucsi->dev, "Bogus connector change event\n"); + dev_dbg(ucsi->dev, "Early connector change event\n"); return; } @@ -1248,6 +1248,7 @@ static int ucsi_init(struct ucsi *ucsi) { struct ucsi_connector *con, *connector; u64 command, ntfy; + u32 cci; int ret; int i; @@ -1300,6 +1301,13 @@ static int ucsi_init(struct ucsi *ucsi) ucsi->connector = connector; ucsi->ntfy = ntfy; + + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + if (ret) + return ret; + if (UCSI_CCI_CONNECTOR(READ_ONCE(cci))) + ucsi_connector_change(ucsi, cci); + return 0; err_unregister: -- GitLab From 85e6aa4f6e2b5320b223d5dd4aaae405a13d0c90 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Mon, 1 Apr 2024 23:05:15 +0200 Subject: [PATCH 2191/2290] usb: typec: ucsi: Fix connector check on init commit ce4c8d21054ae9396cd759fe6e8157b525616dc4 upstream. Fix issues when initially checking for a connector change: - Use the correct connector number not the entire CCI. - Call ->read under the PPM lock. - Remove a bogus READ_ONCE. Fixes: 808a8b9e0b87 ("usb: typec: ucsi: Check for notifications after init") Cc: stable@kernel.org Signed-off-by: Christian A. Ehrhardt Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240401210515.1902048-1-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 1fd4aaf348047..a163218fdc749 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1302,11 +1302,13 @@ static int ucsi_init(struct ucsi *ucsi) ucsi->connector = connector; ucsi->ntfy = ntfy; + mutex_lock(&ucsi->ppm_lock); ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + mutex_unlock(&ucsi->ppm_lock); if (ret) return ret; - if (UCSI_CCI_CONNECTOR(READ_ONCE(cci))) - ucsi_connector_change(ucsi, cci); + if (UCSI_CCI_CONNECTOR(cci)) + ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci)); return 0; -- GitLab From 32a22b9f6c42ee29de9c59e163412d51542734b5 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 30 Apr 2024 10:33:48 -0400 Subject: [PATCH 2192/2290] usb: Fix regression caused by invalid ep0 maxpacket in virtual SuperSpeed device commit c78c3644b772e356ca452ae733a3c4de0fb11dc8 upstream. A virtual SuperSpeed device in the FreeBSD BVCP package (https://bhyve.npulse.net/) presents an invalid ep0 maxpacket size of 256. It stopped working with Linux following a recent commit because now we check these sizes more carefully than before. Fix this regression by using the bMaxpacketSize0 value in the device descriptor for SuperSpeed or faster devices, even if it is invalid. This is a very simple-minded change; we might want to check more carefully for values that actually make some sense (for instance, no smaller than 64). Signed-off-by: Alan Stern Reported-and-tested-by: Roger Whittaker Closes: https://bugzilla.suse.com/show_bug.cgi?id=1220569 Link: https://lore.kernel.org/linux-usb/9efbd569-7059-4575-983f-0ea30df41871@suse.com/ Fixes: 59cf44575456 ("USB: core: Fix oversight in SuperSpeed initialization") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/4058ac05-237c-4db4-9ecc-5af42bdb4501@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index dea110241ee71..50a5608c204f4 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5069,9 +5069,10 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, } if (usb_endpoint_maxp(&udev->ep0.desc) == i) { ; /* Initial ep0 maxpacket guess is right */ - } else if ((udev->speed == USB_SPEED_FULL || + } else if (((udev->speed == USB_SPEED_FULL || udev->speed == USB_SPEED_HIGH) && - (i == 8 || i == 16 || i == 32 || i == 64)) { + (i == 8 || i == 16 || i == 32 || i == 64)) || + (udev->speed >= USB_SPEED_SUPER && i > 0)) { /* Initial guess is wrong; use the descriptor's value */ if (udev->speed == USB_SPEED_FULL) dev_dbg(&udev->dev, "ep0 maxpacket = %d\n", i); -- GitLab From 3e7bbab8bba5a5917e002a9f74d9afb70e8bd287 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 29 Apr 2024 08:40:10 -0700 Subject: [PATCH 2193/2290] usb: ohci: Prevent missed ohci interrupts commit fe81f354841641c7f71163b84912b25c169ed8ec upstream. Testing ohci functionality with qemu's pci-ohci emulation often results in ohci interface stalls, resulting in hung task timeouts. The problem is caused by lost interrupts between the emulation and the Linux kernel code. Additional interrupts raised while the ohci interrupt handler in Linux is running and before the handler clears the interrupt status are not handled. The fix for a similar problem in ehci suggests that the problem is likely caused by edge-triggered MSI interrupts. See commit 0b60557230ad ("usb: ehci: Prevent missed ehci interrupts with edge-triggered MSI") for details. Ensure that the ohci interrupt code handles all pending interrupts before returning to solve the problem. Cc: Gerd Hoffmann Cc: David Laight Cc: stable@vger.kernel.org Fixes: 306c54d0edb6 ("usb: hcd: Try MSI interrupts on PCI devices") Signed-off-by: Guenter Roeck Reviewed-by: Alan Stern Reviewed-by: Gerd Hoffmann Link: https://lore.kernel.org/r/20240429154010.1507366-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hcd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 0457dd9f6c19a..ab175e181e55c 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -888,6 +888,7 @@ static irqreturn_t ohci_irq (struct usb_hcd *hcd) /* Check for an all 1's result which is a typical consequence * of dead, unclocked, or unplugged (CardBus...) devices */ +again: if (ints == ~(u32)0) { ohci->rh_state = OHCI_RH_HALTED; ohci_dbg (ohci, "device removed!\n"); @@ -982,6 +983,13 @@ static irqreturn_t ohci_irq (struct usb_hcd *hcd) } spin_unlock(&ohci->lock); + /* repeat until all enabled interrupts are handled */ + if (ohci->rh_state != OHCI_RH_HALTED) { + ints = ohci_readl(ohci, ®s->intrstatus); + if (ints && (ints & ohci_readl(ohci, ®s->intrenable))) + goto again; + } + return IRQ_HANDLED; } -- GitLab From 5f1d68ef5ddac27c6b997adccd1c339cef1e6848 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 18 Apr 2024 11:13:13 -0400 Subject: [PATCH 2194/2290] USB: core: Fix access violation during port device removal commit a4b46d450c49f32e9d4247b421e58083fde304ce upstream. Testing with KASAN and syzkaller revealed a bug in port.c:disable_store(): usb_hub_to_struct_hub() can return NULL if the hub that the port belongs to is concurrently removed, but the function does not check for this possibility before dereferencing the returned value. It turns out that the first dereference is unnecessary, since hub->intfdev is the parent of the port device, so it can be changed easily. Adding a check for hub == NULL prevents further problems. The same bug exists in the disable_show() routine, and it can be fixed the same way. Signed-off-by: Alan Stern Reported-and-tested-by: Yue Sun Reported-by: xingwei lee Link: https://lore.kernel.org/linux-usb/CAEkJfYON+ry7xPx=AiLR9jzUNT+i_Va68ACajOC3HoacOfL1ig@mail.gmail.com/ Fixes: f061f43d7418 ("usb: hub: port: add sysfs entry to switch port power") CC: Michael Grzeschik CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/393aa580-15a5-44ca-ad3b-6462461cd313@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 93a63b7f164d1..0007031fad0de 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -23,13 +23,15 @@ static ssize_t disable_show(struct device *dev, struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); - struct usb_interface *intf = to_usb_interface(hub->intfdev); + struct usb_interface *intf = to_usb_interface(dev->parent); int port1 = port_dev->portnum; u16 portstatus, unused; bool disabled; int rc; struct kernfs_node *kn; + if (!hub) + return -ENODEV; hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) @@ -73,12 +75,14 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); - struct usb_interface *intf = to_usb_interface(hub->intfdev); + struct usb_interface *intf = to_usb_interface(dev->parent); int port1 = port_dev->portnum; bool disabled; int rc; struct kernfs_node *kn; + if (!hub) + return -ENODEV; rc = strtobool(buf, &disabled); if (rc) return rc; -- GitLab From 9dac7678e16be3a4fab30dcddafb67054da8aa93 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Thu, 4 Apr 2024 12:06:35 +0200 Subject: [PATCH 2195/2290] usb: gadget: composite: fix OS descriptors w_value logic commit ec6ce7075ef879b91a8710829016005dc8170f17 upstream. The OS descriptors logic had the high/low byte of w_value inverted, causing the extended properties to not be accessible for interface != 0. >From the Microsoft documentation: https://learn.microsoft.com/en-us/windows-hardware/drivers/usbcon/microsoft-os-1-0-descriptors-specification OS_Desc_CompatID.doc (w_index = 0x4): - wValue: High Byte = InterfaceNumber. InterfaceNumber is set to the number of the interface or function that is associated with the descriptor, typically 0x00. Because a device can have only one extended compat ID descriptor, it should ignore InterfaceNumber, regardless of the value, and simply return the descriptor. Low Byte = 0. PageNumber is used to retrieve descriptors that are larger than 64 KB. The header section is 16 bytes, so PageNumber is set to 0 for this request. We currently do not support >64KB compat ID descriptors, so verify that the low byte is 0. OS_Desc_Ext_Prop.doc (w_index = 0x5): - wValue: High byte = InterfaceNumber. The high byte of wValue is set to the number of the interface or function that is associated with the descriptor. Low byte = PageNumber. The low byte of wValue is used to retrieve descriptors that are larger than 64 KB. The header section is 10 bytes, so PageNumber is set to 0 for this request. We also don't support >64KB extended properties, so verify that the low byte is 0 and use the high byte for the interface number. Fixes: 37a3a533429e ("usb: gadget: OS Feature Descriptors support") Cc: stable Signed-off-by: Peter Korsgaard Link: https://lore.kernel.org/r/20240404100635.3215340-1-peter@korsgaard.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 247cca46cdfae..f10e43a948fd8 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1993,7 +1993,7 @@ unknown: buf[5] = 0x01; switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_DEVICE: - if (w_index != 0x4 || (w_value >> 8)) + if (w_index != 0x4 || (w_value & 0xff)) break; buf[6] = w_index; /* Number of ext compat interfaces */ @@ -2009,9 +2009,9 @@ unknown: } break; case USB_RECIP_INTERFACE: - if (w_index != 0x5 || (w_value >> 8)) + if (w_index != 0x5 || (w_value & 0xff)) break; - interface = w_value & 0xFF; + interface = w_value >> 8; if (interface >= MAX_CONFIG_INTERFACES || !os_desc_cfg->interface[interface]) break; -- GitLab From 4e2ae9ff795fd416ded18bda6015bdb6cc24ed89 Mon Sep 17 00:00:00 2001 From: Chris Wulff Date: Tue, 23 Apr 2024 18:02:15 +0000 Subject: [PATCH 2196/2290] usb: gadget: f_fs: Fix a race condition when processing setup packets. commit 0aea736ddb877b93f6d2dd8cf439840d6b4970a9 upstream. If the USB driver passes a pointer into the TRB buffer for creq, this buffer can be overwritten with the status response as soon as the event is queued. This can make the final check return USB_GADGET_DELAYED_STATUS when it shouldn't. Instead use the stored wLength. Fixes: 4d644abf2569 ("usb: gadget: f_fs: Only return delayed status when len is 0") Cc: stable Signed-off-by: Chris Wulff Link: https://lore.kernel.org/r/CO1PR17MB5419BD664264A558B2395E28E1112@CO1PR17MB5419.namprd17.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 3e59055aa5040..b2da74bb107af 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3414,7 +3414,7 @@ static int ffs_func_setup(struct usb_function *f, __ffs_event_add(ffs, FUNCTIONFS_SETUP); spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags); - return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; + return ffs->ev.setup.wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; } static bool ffs_func_req_match(struct usb_function *f, -- GitLab From bf3b0ab6fbaea95fe6c5469c7f96e80d53a53ab9 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 17 Apr 2024 23:14:30 +0000 Subject: [PATCH 2197/2290] usb: xhci-plat: Don't include xhci.h commit 4a237d55446ff67655dc3eed2d4a41997536fc4c upstream. The xhci_plat.h should not need to include the entire xhci.h header. This can cause redefinition in dwc3 if it selectively includes some xHCI definitions. This is a prerequisite change for a fix to disable suspend during initialization for dwc3. Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/310acfa01c957a10d9feaca3f7206269866ba2eb.1713394973.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h index 1fb149d1fbcea..f3abce238207e 100644 --- a/drivers/usb/host/xhci-plat.h +++ b/drivers/usb/host/xhci-plat.h @@ -8,7 +8,9 @@ #ifndef _XHCI_PLAT_H #define _XHCI_PLAT_H -#include "xhci.h" /* for hcd_to_xhci() */ +struct device; +struct platform_device; +struct usb_hcd; struct xhci_plat_priv { const char *firmware_name; -- GitLab From d9efd3c899ec924f9f8bc6eead68c76c8025c058 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 17 Apr 2024 23:14:36 +0000 Subject: [PATCH 2198/2290] usb: dwc3: core: Prevent phy suspend during init commit 6d735722063a945de56472bdc6bfcb170fd43b86 upstream. GUSB3PIPECTL.SUSPENDENABLE and GUSB2PHYCFG.SUSPHY should be cleared during initialization. Suspend during initialization can result in undefined behavior due to clock synchronization failure, which often seen as core soft reset timeout. The programming guide recommended these bits to be cleared during initialization for DWC_usb3.0 version 1.94 and above (along with DWC_usb31 and DWC_usb32). The current check in the driver does not account if it's set by default setting from coreConsultant. This is especially the case for DRD when switching mode to ensure the phy clocks are available to change mode. Depending on the platforms/design, some may be affected more than others. This is noted in the DWC_usb3x programming guide under the above registers. Let's just disable them during driver load and mode switching. Restore them when the controller initialization completes. Note that some platforms workaround this issue by disabling phy suspend through "snps,dis_u3_susphy_quirk" and "snps,dis_u2_susphy_quirk" when they should not need to. Cc: stable@vger.kernel.org Fixes: 9ba3aca8fe82 ("usb: dwc3: Disable phy suspend after power-on reset") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/20da4e5a0c4678c9587d3da23f83bdd6d77353e9.1713394973.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 90 +++++++++++++++++---------------------- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/gadget.c | 2 + drivers/usb/dwc3/host.c | 27 ++++++++++++ 4 files changed, 68 insertions(+), 52 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 011a3909f9ad1..3b5482621e5e0 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -104,6 +104,27 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc) return 0; } +void dwc3_enable_susphy(struct dwc3 *dwc, bool enable) +{ + u32 reg; + + reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); + if (enable && !dwc->dis_u3_susphy_quirk) + reg |= DWC3_GUSB3PIPECTL_SUSPHY; + else + reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; + + dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg); + + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (enable && !dwc->dis_u2_susphy_quirk) + reg |= DWC3_GUSB2PHYCFG_SUSPHY; + else + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); +} + void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) { u32 reg; @@ -669,11 +690,8 @@ static int dwc3_core_ulpi_init(struct dwc3 *dwc) */ static int dwc3_phy_setup(struct dwc3 *dwc) { - unsigned int hw_mode; u32 reg; - hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); - reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); /* @@ -683,21 +701,16 @@ static int dwc3_phy_setup(struct dwc3 *dwc) reg &= ~DWC3_GUSB3PIPECTL_UX_EXIT_PX; /* - * Above 1.94a, it is recommended to set DWC3_GUSB3PIPECTL_SUSPHY - * to '0' during coreConsultant configuration. So default value - * will be '0' when the core is reset. Application needs to set it - * to '1' after the core initialization is completed. - */ - if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) - reg |= DWC3_GUSB3PIPECTL_SUSPHY; - - /* - * For DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be cleared after - * power-on reset, and it can be set after core initialization, which is - * after device soft-reset during initialization. + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB3PIPECTL_SUSPHY to '0' during coreConsultant configuration. + * So default value will be '0' when the core is reset. Application + * needs to set it to '1' after the core initialization is completed. + * + * Similarly for DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be + * cleared after power-on reset, and it can be set after core + * initialization. */ - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD) - reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; + reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; if (dwc->u2ss_inp3_quirk) reg |= DWC3_GUSB3PIPECTL_U2SSINP3OK; @@ -723,9 +736,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc) if (dwc->tx_de_emphasis_quirk) reg |= DWC3_GUSB3PIPECTL_TX_DEEPH(dwc->tx_de_emphasis); - if (dwc->dis_u3_susphy_quirk) - reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; - if (dwc->dis_del_phy_power_chg_quirk) reg &= ~DWC3_GUSB3PIPECTL_DEPOCHANGE; @@ -773,24 +783,15 @@ static int dwc3_phy_setup(struct dwc3 *dwc) } /* - * Above 1.94a, it is recommended to set DWC3_GUSB2PHYCFG_SUSPHY to - * '0' during coreConsultant configuration. So default value will - * be '0' when the core is reset. Application needs to set it to - * '1' after the core initialization is completed. - */ - if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) - reg |= DWC3_GUSB2PHYCFG_SUSPHY; - - /* - * For DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared after - * power-on reset, and it can be set after core initialization, which is - * after device soft-reset during initialization. + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB2PHYCFG_SUSPHY to '0' during coreConsultant configuration. + * So default value will be '0' when the core is reset. Application + * needs to set it to '1' after the core initialization is completed. + * + * Similarly for DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared + * after power-on reset, and it can be set after core initialization. */ - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD) - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - - if (dwc->dis_u2_susphy_quirk) - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; if (dwc->dis_enblslpm_quirk) reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; @@ -1238,21 +1239,6 @@ static int dwc3_core_init(struct dwc3 *dwc) if (ret) goto err1; - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD && - !DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) { - if (!dwc->dis_u3_susphy_quirk) { - reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); - reg |= DWC3_GUSB3PIPECTL_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg); - } - - if (!dwc->dis_u2_susphy_quirk) { - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - reg |= DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - } - dwc3_core_setup_global_control(dwc); dwc3_core_num_eps(dwc); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 889c122dad457..472a6a7e1558a 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1558,6 +1558,7 @@ int dwc3_event_buffers_setup(struct dwc3 *dwc); void dwc3_event_buffers_cleanup(struct dwc3 *dwc); int dwc3_core_soft_reset(struct dwc3 *dwc); +void dwc3_enable_susphy(struct dwc3 *dwc, bool enable); #if IS_ENABLED(CONFIG_USB_DWC3_HOST) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) int dwc3_host_init(struct dwc3 *dwc); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b134110cc2ed5..2d7ac92ce9b84 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2831,6 +2831,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) dwc3_ep0_out_start(dwc); dwc3_gadget_enable_irq(dwc); + dwc3_enable_susphy(dwc, true); return 0; @@ -4573,6 +4574,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc) if (!dwc->gadget) return; + dwc3_enable_susphy(dwc, false); usb_del_gadget(dwc->gadget); dwc3_gadget_free_endpoints(dwc); usb_put_gadget(dwc->gadget); diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index f4d8e80c4c347..c0dba453f1b8e 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -10,9 +10,30 @@ #include #include #include +#include +#include +#include "../host/xhci-plat.h" #include "core.h" +static void dwc3_xhci_plat_start(struct usb_hcd *hcd) +{ + struct platform_device *pdev; + struct dwc3 *dwc; + + if (!usb_hcd_is_primary_hcd(hcd)) + return; + + pdev = to_platform_device(hcd->self.controller); + dwc = dev_get_drvdata(pdev->dev.parent); + + dwc3_enable_susphy(dwc, true); +} + +static const struct xhci_plat_priv dwc3_xhci_plat_quirk = { + .plat_start = dwc3_xhci_plat_start, +}; + static void dwc3_host_fill_xhci_irq_res(struct dwc3 *dwc, int irq, char *name) { @@ -122,6 +143,11 @@ int dwc3_host_init(struct dwc3 *dwc) } } + ret = platform_device_add_data(xhci, &dwc3_xhci_plat_quirk, + sizeof(struct xhci_plat_priv)); + if (ret) + goto err; + ret = platform_device_add(xhci); if (ret) { dev_err(dwc->dev, "failed to register xHCI device\n"); @@ -136,6 +162,7 @@ err: void dwc3_host_exit(struct dwc3 *dwc) { + dwc3_enable_susphy(dwc, false); platform_device_unregister(dwc->xhci); dwc->xhci = NULL; } -- GitLab From cfcd544a9974c6b6fb37ca385146e4796dcaf66d Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Wed, 24 Apr 2024 15:32:16 -0700 Subject: [PATCH 2199/2290] usb: typec: tcpm: unregister existing source caps before re-registration commit 230ecdf71a644c9c73e0e6735b33173074ae3f94 upstream. Check and unregister existing source caps in tcpm_register_source_caps function before registering new ones. This change fixes following warning when port partner resends source caps after negotiating PD contract for the purpose of re-negotiation. [ 343.135030][ T151] sysfs: cannot create duplicate filename '/devices/virtual/usb_power_delivery/pd1/source-capabilities' [ 343.135071][ T151] Call trace: [ 343.135076][ T151] dump_backtrace+0xe8/0x108 [ 343.135099][ T151] show_stack+0x18/0x24 [ 343.135106][ T151] dump_stack_lvl+0x50/0x6c [ 343.135119][ T151] dump_stack+0x18/0x24 [ 343.135126][ T151] sysfs_create_dir_ns+0xe0/0x140 [ 343.135137][ T151] kobject_add_internal+0x228/0x424 [ 343.135146][ T151] kobject_add+0x94/0x10c [ 343.135152][ T151] device_add+0x1b0/0x4c0 [ 343.135187][ T151] device_register+0x20/0x34 [ 343.135195][ T151] usb_power_delivery_register_capabilities+0x90/0x20c [ 343.135209][ T151] tcpm_pd_rx_handler+0x9f0/0x15b8 [ 343.135216][ T151] kthread_worker_fn+0x11c/0x260 [ 343.135227][ T151] kthread+0x114/0x1bc [ 343.135235][ T151] ret_from_fork+0x10/0x20 [ 343.135265][ T151] kobject: kobject_add_internal failed for source-capabilities with -EEXIST, don't try to register things with the same name in the same directory. Fixes: 8203d26905ee ("usb: typec: tcpm: Register USB Power Delivery Capabilities") Cc: linux-usb@vger.kernel.org Cc: stable@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Mark Brown Signed-off-by: Amit Sunil Dhamne Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240424223227.1807844-1-amitsd@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index bf615dc8085e9..5f3950ae36a48 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2416,7 +2416,7 @@ static int tcpm_register_sink_caps(struct tcpm_port *port) { struct usb_power_delivery_desc desc = { port->negotiated_rev }; struct usb_power_delivery_capabilities_desc caps = { }; - struct usb_power_delivery_capabilities *cap; + struct usb_power_delivery_capabilities *cap = port->partner_source_caps; if (!port->partner_pd) port->partner_pd = usb_power_delivery_register(NULL, &desc); @@ -2426,6 +2426,9 @@ static int tcpm_register_sink_caps(struct tcpm_port *port) memcpy(caps.pdo, port->sink_caps, sizeof(u32) * port->nr_sink_caps); caps.role = TYPEC_SINK; + if (cap) + usb_power_delivery_unregister_capabilities(cap); + cap = usb_power_delivery_register_capabilities(port->partner_pd, &caps); if (IS_ERR(cap)) return PTR_ERR(cap); -- GitLab From d56d2ca03cc22123fd7626967d096d8661324e57 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sat, 27 Apr 2024 20:28:12 +0000 Subject: [PATCH 2200/2290] usb: typec: tcpm: Check for port partner validity before consuming it commit ae11f04b452b5205536e1c02d31f8045eba249dd upstream. typec_register_partner() does not guarantee partner registration to always succeed. In the event of failure, port->partner is set to the error value or NULL. Given that port->partner validity is not checked, this results in the following crash: Unable to handle kernel NULL pointer dereference at virtual address xx pc : run_state_machine+0x1bc8/0x1c08 lr : run_state_machine+0x1b90/0x1c08 .. Call trace: run_state_machine+0x1bc8/0x1c08 tcpm_state_machine_work+0x94/0xe4 kthread_worker_fn+0x118/0x328 kthread+0x1d0/0x23c ret_from_fork+0x10/0x20 To prevent the crash, check for port->partner validity before derefencing it in all the call sites. Cc: stable@vger.kernel.org Fixes: c97cd0b4b54e ("usb: typec: tcpm: set initial svdm version based on pd revision") Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240427202812.3435268-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 5f3950ae36a48..bbcc0e0aa070a 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1479,7 +1479,8 @@ static void svdm_consume_identity(struct tcpm_port *port, const u32 *p, int cnt) port->partner_ident.cert_stat = p[VDO_INDEX_CSTAT]; port->partner_ident.product = product; - typec_partner_set_identity(port->partner); + if (port->partner) + typec_partner_set_identity(port->partner); tcpm_log(port, "Identity: %04x:%04x.%04x", PD_IDH_VID(vdo), @@ -1567,6 +1568,9 @@ static void tcpm_register_partner_altmodes(struct tcpm_port *port) struct typec_altmode *altmode; int i; + if (!port->partner) + return; + for (i = 0; i < modep->altmodes; i++) { altmode = typec_partner_register_altmode(port->partner, &modep->altmode_desc[i]); @@ -3638,7 +3642,10 @@ static int tcpm_init_vconn(struct tcpm_port *port) static void tcpm_typec_connect(struct tcpm_port *port) { + struct typec_partner *partner; + if (!port->connected) { + port->connected = true; /* Make sure we don't report stale identity information */ memset(&port->partner_ident, 0, sizeof(port->partner_ident)); port->partner_desc.usb_pd = port->pd_capable; @@ -3648,9 +3655,13 @@ static void tcpm_typec_connect(struct tcpm_port *port) port->partner_desc.accessory = TYPEC_ACCESSORY_AUDIO; else port->partner_desc.accessory = TYPEC_ACCESSORY_NONE; - port->partner = typec_register_partner(port->typec_port, - &port->partner_desc); - port->connected = true; + partner = typec_register_partner(port->typec_port, &port->partner_desc); + if (IS_ERR(partner)) { + dev_err(port->dev, "Failed to register partner (%ld)\n", PTR_ERR(partner)); + return; + } + + port->partner = partner; typec_partner_set_usb_power_delivery(port->partner, port->partner_pd); } } @@ -3720,9 +3731,11 @@ out_disable_mux: static void tcpm_typec_disconnect(struct tcpm_port *port) { if (port->connected) { - typec_partner_set_usb_power_delivery(port->partner, NULL); - typec_unregister_partner(port->partner); - port->partner = NULL; + if (port->partner) { + typec_partner_set_usb_power_delivery(port->partner, NULL); + typec_unregister_partner(port->partner); + port->partner = NULL; + } port->connected = false; } } @@ -3938,6 +3951,9 @@ static enum typec_cc_status tcpm_pwr_opmode_to_rp(enum typec_pwr_opmode opmode) static void tcpm_set_initial_svdm_version(struct tcpm_port *port) { + if (!port->partner) + return; + switch (port->negotiated_rev) { case PD_REV30: break; -- GitLab From f0c6aae9e474ac2d67133ab994386e116479f326 Mon Sep 17 00:00:00 2001 From: Aman Dhoot Date: Mon, 22 Apr 2024 18:08:23 +0530 Subject: [PATCH 2201/2290] ALSA: hda/realtek: Fix mute led of HP Laptop 15-da3001TU commit 2d5af3ab9e6f1cf1468b2a5221b5c1f7f46c3333 upstream. This patch simply add SND_PCI_QUIRK for HP Laptop 15-da3001TU to fixed mute led of laptop. Signed-off-by: Aman Dhoot Cc: Link: https://lore.kernel.org/r/CAMTp=B+3NG65Z684xMwHqdXDJhY+DJK-kuSw4adn6xwnG+b5JA@mail.gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 77c40063d63a7..f0b939862a2a6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9660,6 +9660,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x86c1, "HP Laptop 15-da3001TU", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), -- GitLab From 25090e9bb042406a0f9e2ae6a2b9214ca055c80e Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Fri, 19 Apr 2024 11:22:48 +0900 Subject: [PATCH 2202/2290] btrfs: add missing mutex_unlock in btrfs_relocate_sys_chunks() commit 9af503d91298c3f2945e73703f0e00995be08c30 upstream. The previous patch that replaced BUG_ON by error handling forgot to unlock the mutex in the error path. Link: https://lore.kernel.org/all/Zh%2fHpAGFqa7YAFuM@duo.ucw.cz Reported-by: Pavel Machek Fixes: 7411055db5ce ("btrfs: handle chunk tree lookup error in btrfs_relocate_sys_chunks()") CC: stable@vger.kernel.org Reviewed-by: Pavel Machek Signed-off-by: Dominique Martinet Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a92069fbc0287..8c7e74499ed17 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3397,6 +3397,7 @@ again: * alignment and size). */ ret = -EUCLEAN; + mutex_unlock(&fs_info->reclaim_bgs_lock); goto error; } -- GitLab From 9f6eb0ab4f95240589ee85fd9886a944cd3645b2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 28 Apr 2024 17:26:44 +0300 Subject: [PATCH 2203/2290] mm/slab: make __free(kfree) accept error pointers commit cd7eb8f83fcf258f71e293f7fc52a70be8ed0128 upstream. Currently, if an automatically freed allocation is an error pointer that will lead to a crash. An example of this is in wm831x_gpio_dbg_show(). 171 char *label __free(kfree) = gpiochip_dup_line_label(chip, i); 172 if (IS_ERR(label)) { 173 dev_err(wm831x->dev, "Failed to duplicate label\n"); 174 continue; 175 } The auto clean up function should check for error pointers as well, otherwise we're going to keep hitting issues like this. Fixes: 54da6a092431 ("locking: Introduce __cleanup() based infrastructure") Cc: Signed-off-by: Dan Carpenter Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index cb4b5deca9a9c..b8e77ffc38929 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -198,7 +198,7 @@ void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); -DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) +DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) /** * ksize - Report actual allocation size of associated object -- GitLab From 39ca83ed73db9edcc6d70c0dc7a73085a4725012 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 29 Apr 2024 20:00:31 +0200 Subject: [PATCH 2204/2290] mptcp: ensure snd_nxt is properly initialized on connect commit fb7a0d334894206ae35f023a82cad5a290fd7386 upstream. Christoph reported a splat hinting at a corrupted snd_una: WARNING: CPU: 1 PID: 38 at net/mptcp/protocol.c:1005 __mptcp_clean_una+0x4b3/0x620 net/mptcp/protocol.c:1005 Modules linked in: CPU: 1 PID: 38 Comm: kworker/1:1 Not tainted 6.9.0-rc1-gbbeac67456c9 #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Workqueue: events mptcp_worker RIP: 0010:__mptcp_clean_una+0x4b3/0x620 net/mptcp/protocol.c:1005 Code: be 06 01 00 00 bf 06 01 00 00 e8 a8 12 e7 fe e9 00 fe ff ff e8 8e 1a e7 fe 0f b7 ab 3e 02 00 00 e9 d3 fd ff ff e8 7d 1a e7 fe <0f> 0b 4c 8b bb e0 05 00 00 e9 74 fc ff ff e8 6a 1a e7 fe 0f 0b e9 RSP: 0018:ffffc9000013fd48 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8881029bd280 RCX: ffffffff82382fe4 RDX: ffff8881003cbd00 RSI: ffffffff823833c3 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888138ba8000 R13: 0000000000000106 R14: ffff8881029bd908 R15: ffff888126560000 FS: 0000000000000000(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f604a5dae38 CR3: 0000000101dac002 CR4: 0000000000170ef0 Call Trace: __mptcp_clean_una_wakeup net/mptcp/protocol.c:1055 [inline] mptcp_clean_una_wakeup net/mptcp/protocol.c:1062 [inline] __mptcp_retrans+0x7f/0x7e0 net/mptcp/protocol.c:2615 mptcp_worker+0x434/0x740 net/mptcp/protocol.c:2767 process_one_work+0x1e0/0x560 kernel/workqueue.c:3254 process_scheduled_works kernel/workqueue.c:3335 [inline] worker_thread+0x3c7/0x640 kernel/workqueue.c:3416 kthread+0x121/0x170 kernel/kthread.c:388 ret_from_fork+0x44/0x50 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 When fallback to TCP happens early on a client socket, snd_nxt is not yet initialized and any incoming ack will copy such value into snd_una. If the mptcp worker (dumbly) tries mptcp-level re-injection after such ack, that would unconditionally trigger a send buffer cleanup using 'bad' snd_una values. We could easily disable re-injection for fallback sockets, but such dumb behavior already helped catching a few subtle issues and a very low to zero impact in practice. Instead address the issue always initializing snd_nxt (and write_seq, for consistency) at connect time. Fixes: 8fd738049ac3 ("mptcp: fallback in case of simultaneous connect") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/485 Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240429-upstream-net-20240429-mptcp-snd_nxt-init-connect-v1-1-59ceac0a7dcb@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c652c8ca765c2..b6815610a6fa1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3754,6 +3754,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); } + + WRITE_ONCE(msk->write_seq, subflow->idsn); + WRITE_ONCE(msk->snd_nxt, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); -- GitLab From 4b71dbe4827f9b67e6265d612bbc4a023f0a96fc Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sat, 16 Mar 2024 23:56:57 +0100 Subject: [PATCH 2205/2290] dt-bindings: iio: health: maxim,max30102: fix compatible check commit 89384a2b656b9dace4c965432a209d5c9c3a2a6f upstream. The "maxim,green-led-current-microamp" property is only available for the max30105 part (it provides an extra green LED), and must be set to false for the max30102 part. Instead, the max30100 part has been used for that, which is not supported by this binding (it has its own binding). This error was introduced during the txt to yaml conversion. Fixes: 5a6a65b11e3a ("dt-bindings:iio:health:maxim,max30102: txt to yaml conversion") Signed-off-by: Javier Carrasco Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240316-max30102_binding_fix-v1-1-e8e58f69ef8a@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/iio/health/maxim,max30102.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml b/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml index c13c10c8d65da..eed0df9d3a232 100644 --- a/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml +++ b/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml @@ -42,7 +42,7 @@ allOf: properties: compatible: contains: - const: maxim,max30100 + const: maxim,max30102 then: properties: maxim,green-led-current-microamp: false -- GitLab From 50fa09df1a92ad756db604968f4fdeb1ff861a45 Mon Sep 17 00:00:00 2001 From: Ramona Gradinariu Date: Fri, 5 Apr 2024 07:53:09 +0300 Subject: [PATCH 2206/2290] iio:imu: adis16475: Fix sync mode setting commit 74a72baf204fd509bbe8b53eec35e39869d94341 upstream. Fix sync mode setting by applying the necessary shift bits. Fixes: fff7352bf7a3 ("iio: imu: Add support for adis16475") Signed-off-by: Ramona Gradinariu Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240405045309.816328-2-ramona.bolboaca13@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis16475.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16475.c b/drivers/iio/imu/adis16475.c index 2d939773445d7..e2931ea8af1f4 100644 --- a/drivers/iio/imu/adis16475.c +++ b/drivers/iio/imu/adis16475.c @@ -1126,6 +1126,7 @@ static int adis16475_config_sync_mode(struct adis16475 *st) struct device *dev = &st->adis.spi->dev; const struct adis16475_sync *sync; u32 sync_mode; + u16 val; /* default to internal clk */ st->clk_freq = st->info->int_clk * 1000; @@ -1187,8 +1188,9 @@ static int adis16475_config_sync_mode(struct adis16475 *st) * I'm keeping this for simplicity and avoiding extra variables * in chip_info. */ + val = ADIS16475_SYNC_MODE(sync->sync_mode); ret = __adis_update_bits(&st->adis, ADIS16475_REG_MSG_CTRL, - ADIS16475_SYNC_MODE_MASK, sync->sync_mode); + ADIS16475_SYNC_MODE_MASK, val); if (ret) return ret; -- GitLab From b56d4991cfd9f436e1bad3e56e5165c227656f31 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 26 Mar 2024 12:36:59 +0100 Subject: [PATCH 2207/2290] iio: accel: mxc4005: Interrupt handling fixes commit 57a1592784d622ecee0b71940c65429173996b33 upstream. There are 2 issues with interrupt handling in the mxc4005 driver: 1. mxc4005_set_trigger_state() writes MXC4005_REG_INT_MASK1_BIT_DRDYE (0x01) to INT_MASK1 to enable the interrupt, but to disable the interrupt it writes ~MXC4005_REG_INT_MASK1_BIT_DRDYE which is 0xfe, so it enables all other interrupt sources in the INT_SRC1 register. On the MXC4005 this is not an issue because only bit 0 of the register is used. On the MXC6655 OTOH this is a problem since bit7 is used as TC (Temperature Compensation) disable bit and writing 1 to this disables Temperature Compensation which should only be done when running self-tests on the chip. Write 0 instead of ~MXC4005_REG_INT_MASK1_BIT_DRDYE to disable the interrupts to fix this. 2. The datasheets for the MXC4005 / MXC6655 do not state what the reset value for the INT_MASK0 and INT_MASK1 registers is and since these are write only we also cannot learn this from the hw. Presumably the reset value for both is all 0, which means all interrupts disabled. Explicitly set both registers to 0 from mxc4005_chip_init() to ensure both masks are actually set to 0. Fixes: 79846e33aac1 ("iio: accel: mxc4005: add support for mxc6655") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240326113700.56725-2-hdegoede@redhat.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/mxc4005.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index df600d2917c0a..ffae30e5eb5be 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -27,9 +27,13 @@ #define MXC4005_REG_ZOUT_UPPER 0x07 #define MXC4005_REG_ZOUT_LOWER 0x08 +#define MXC4005_REG_INT_MASK0 0x0A + #define MXC4005_REG_INT_MASK1 0x0B #define MXC4005_REG_INT_MASK1_BIT_DRDYE 0x01 +#define MXC4005_REG_INT_CLR0 0x00 + #define MXC4005_REG_INT_CLR1 0x01 #define MXC4005_REG_INT_CLR1_BIT_DRDYC 0x01 @@ -113,7 +117,9 @@ static bool mxc4005_is_readable_reg(struct device *dev, unsigned int reg) static bool mxc4005_is_writeable_reg(struct device *dev, unsigned int reg) { switch (reg) { + case MXC4005_REG_INT_CLR0: case MXC4005_REG_INT_CLR1: + case MXC4005_REG_INT_MASK0: case MXC4005_REG_INT_MASK1: case MXC4005_REG_CONTROL: return true; @@ -330,17 +336,13 @@ static int mxc4005_set_trigger_state(struct iio_trigger *trig, { struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); struct mxc4005_data *data = iio_priv(indio_dev); + unsigned int val; int ret; mutex_lock(&data->mutex); - if (state) { - ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, - MXC4005_REG_INT_MASK1_BIT_DRDYE); - } else { - ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, - ~MXC4005_REG_INT_MASK1_BIT_DRDYE); - } + val = state ? MXC4005_REG_INT_MASK1_BIT_DRDYE : 0; + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, val); if (ret < 0) { mutex_unlock(&data->mutex); dev_err(data->dev, "failed to update reg_int_mask1"); @@ -382,6 +384,14 @@ static int mxc4005_chip_init(struct mxc4005_data *data) dev_dbg(data->dev, "MXC4005 chip id %02x\n", reg); + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK0, 0); + if (ret < 0) + return dev_err_probe(data->dev, ret, "writing INT_MASK0\n"); + + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, 0); + if (ret < 0) + return dev_err_probe(data->dev, ret, "writing INT_MASK1\n"); + return 0; } -- GitLab From 50a436d05fdc467ec7d43e9f2582c99ed9e4d613 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 26 Apr 2024 11:16:22 +0200 Subject: [PATCH 2208/2290] kmsan: compiler_types: declare __no_sanitize_or_inline commit 90d1f14cbb9ddbfc532e2da13bf6e0ed8320e792 upstream. It turned out that KMSAN instruments READ_ONCE_NOCHECK(), resulting in false positive reports, because __no_sanitize_or_inline enforced inlining. Properly declare __no_sanitize_or_inline under __SANITIZE_MEMORY__, so that it does not __always_inline the annotated function. Link: https://lkml.kernel.org/r/20240426091622.3846771-1-glider@google.com Fixes: 5de0ce85f5a4 ("kmsan: mark noinstr as __no_sanitize_memory") Signed-off-by: Alexander Potapenko Reported-by: syzbot+355c5bb8c1445c871ee8@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/000000000000826ac1061675b0e3@google.com Cc: Reviewed-by: Marco Elver Cc: Dmitry Vyukov Cc: Miguel Ojeda Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler_types.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 574b4121ebe3e..8f50c589ad5f4 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -226,6 +226,17 @@ struct ftrace_likely_data { # define __no_kcsan #endif +#ifdef __SANITIZE_MEMORY__ +/* + * Similarly to KASAN and KCSAN, KMSAN loses function attributes of inlined + * functions, therefore disabling KMSAN checks also requires disabling inlining. + * + * __no_sanitize_or_inline effectively prevents KMSAN from reporting errors + * within the function and marks all its outputs as initialized. + */ +# define __no_sanitize_or_inline __no_kmsan_checks notrace __maybe_unused +#endif + #ifndef __no_sanitize_or_inline #define __no_sanitize_or_inline __always_inline #endif -- GitLab From 21ea04aad8a0839b4ec27ef1691ca480620e8e14 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Apr 2024 15:53:37 +0200 Subject: [PATCH 2209/2290] tipc: fix UAF in error path commit 080cbb890286cd794f1ee788bbc5463e2deb7c2b upstream. Sam Page (sam4k) working with Trend Micro Zero Day Initiative reported a UAF in the tipc_buf_append() error path: BUG: KASAN: slab-use-after-free in kfree_skb_list_reason+0x47e/0x4c0 linux/net/core/skbuff.c:1183 Read of size 8 at addr ffff88804d2a7c80 by task poc/8034 CPU: 1 PID: 8034 Comm: poc Not tainted 6.8.2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 Call Trace: __dump_stack linux/lib/dump_stack.c:88 dump_stack_lvl+0xd9/0x1b0 linux/lib/dump_stack.c:106 print_address_description linux/mm/kasan/report.c:377 print_report+0xc4/0x620 linux/mm/kasan/report.c:488 kasan_report+0xda/0x110 linux/mm/kasan/report.c:601 kfree_skb_list_reason+0x47e/0x4c0 linux/net/core/skbuff.c:1183 skb_release_data+0x5af/0x880 linux/net/core/skbuff.c:1026 skb_release_all linux/net/core/skbuff.c:1094 __kfree_skb linux/net/core/skbuff.c:1108 kfree_skb_reason+0x12d/0x210 linux/net/core/skbuff.c:1144 kfree_skb linux/./include/linux/skbuff.h:1244 tipc_buf_append+0x425/0xb50 linux/net/tipc/msg.c:186 tipc_link_input+0x224/0x7c0 linux/net/tipc/link.c:1324 tipc_link_rcv+0x76e/0x2d70 linux/net/tipc/link.c:1824 tipc_rcv+0x45f/0x10f0 linux/net/tipc/node.c:2159 tipc_udp_recv+0x73b/0x8f0 linux/net/tipc/udp_media.c:390 udp_queue_rcv_one_skb+0xad2/0x1850 linux/net/ipv4/udp.c:2108 udp_queue_rcv_skb+0x131/0xb00 linux/net/ipv4/udp.c:2186 udp_unicast_rcv_skb+0x165/0x3b0 linux/net/ipv4/udp.c:2346 __udp4_lib_rcv+0x2594/0x3400 linux/net/ipv4/udp.c:2422 ip_protocol_deliver_rcu+0x30c/0x4e0 linux/net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2e4/0x520 linux/net/ipv4/ip_input.c:233 NF_HOOK linux/./include/linux/netfilter.h:314 NF_HOOK linux/./include/linux/netfilter.h:308 ip_local_deliver+0x18e/0x1f0 linux/net/ipv4/ip_input.c:254 dst_input linux/./include/net/dst.h:461 ip_rcv_finish linux/net/ipv4/ip_input.c:449 NF_HOOK linux/./include/linux/netfilter.h:314 NF_HOOK linux/./include/linux/netfilter.h:308 ip_rcv+0x2c5/0x5d0 linux/net/ipv4/ip_input.c:569 __netif_receive_skb_one_core+0x199/0x1e0 linux/net/core/dev.c:5534 __netif_receive_skb+0x1f/0x1c0 linux/net/core/dev.c:5648 process_backlog+0x101/0x6b0 linux/net/core/dev.c:5976 __napi_poll.constprop.0+0xba/0x550 linux/net/core/dev.c:6576 napi_poll linux/net/core/dev.c:6645 net_rx_action+0x95a/0xe90 linux/net/core/dev.c:6781 __do_softirq+0x21f/0x8e7 linux/kernel/softirq.c:553 do_softirq linux/kernel/softirq.c:454 do_softirq+0xb2/0xf0 linux/kernel/softirq.c:441 __local_bh_enable_ip+0x100/0x120 linux/kernel/softirq.c:381 local_bh_enable linux/./include/linux/bottom_half.h:33 rcu_read_unlock_bh linux/./include/linux/rcupdate.h:851 __dev_queue_xmit+0x871/0x3ee0 linux/net/core/dev.c:4378 dev_queue_xmit linux/./include/linux/netdevice.h:3169 neigh_hh_output linux/./include/net/neighbour.h:526 neigh_output linux/./include/net/neighbour.h:540 ip_finish_output2+0x169f/0x2550 linux/net/ipv4/ip_output.c:235 __ip_finish_output linux/net/ipv4/ip_output.c:313 __ip_finish_output+0x49e/0x950 linux/net/ipv4/ip_output.c:295 ip_finish_output+0x31/0x310 linux/net/ipv4/ip_output.c:323 NF_HOOK_COND linux/./include/linux/netfilter.h:303 ip_output+0x13b/0x2a0 linux/net/ipv4/ip_output.c:433 dst_output linux/./include/net/dst.h:451 ip_local_out linux/net/ipv4/ip_output.c:129 ip_send_skb+0x3e5/0x560 linux/net/ipv4/ip_output.c:1492 udp_send_skb+0x73f/0x1530 linux/net/ipv4/udp.c:963 udp_sendmsg+0x1a36/0x2b40 linux/net/ipv4/udp.c:1250 inet_sendmsg+0x105/0x140 linux/net/ipv4/af_inet.c:850 sock_sendmsg_nosec linux/net/socket.c:730 __sock_sendmsg linux/net/socket.c:745 __sys_sendto+0x42c/0x4e0 linux/net/socket.c:2191 __do_sys_sendto linux/net/socket.c:2203 __se_sys_sendto linux/net/socket.c:2199 __x64_sys_sendto+0xe0/0x1c0 linux/net/socket.c:2199 do_syscall_x64 linux/arch/x86/entry/common.c:52 do_syscall_64+0xd8/0x270 linux/arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x6f/0x77 linux/arch/x86/entry/entry_64.S:120 RIP: 0033:0x7f3434974f29 Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 37 8f 0d 00 f7 d8 64 89 01 48 RSP: 002b:00007fff9154f2b8 EFLAGS: 00000212 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3434974f29 RDX: 00000000000032c8 RSI: 00007fff9154f300 RDI: 0000000000000003 RBP: 00007fff915532e0 R08: 00007fff91553360 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000212 R12: 000055ed86d261d0 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 In the critical scenario, either the relevant skb is freed or its ownership is transferred into a frag_lists. In both cases, the cleanup code must not free it again: we need to clear the skb reference earlier. Fixes: 1149557d64c9 ("tipc: eliminate unnecessary linearization of incoming buffers") Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-23852 Acked-by: Xin Long Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/752f1ccf762223d109845365d07f55414058e5a3.1714484273.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tipc/msg.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index c52ab423082cd..76284fc538ebd 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (!head) goto err; + /* Either the input skb ownership is transferred to headskb + * or the input skb is freed, clear the reference to avoid + * bad access on error path. + */ + *buf = NULL; if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { @@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *headbuf = NULL; return 1; } - *buf = NULL; return 0; err: kfree_skb(*buf); -- GitLab From 9c301fd3b0f3dde8d24439f91608410869ff4b02 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Fri, 5 Apr 2024 10:43:06 +0000 Subject: [PATCH 2210/2290] ASoC: tegra: Fix DSPK 16-bit playback commit 2e93a29b48a017c777d4fcbfcc51aba4e6a90d38 upstream. DSPK configuration is wrong for 16-bit playback and this happens because the client config is always fixed at 24-bit in hw_params(). Fix this by updating the client config to 16-bit for the respective playback. Fixes: 327ef6470266 ("ASoC: tegra: Add Tegra186 based DSPK driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Acked-by: Thierry Reding Link: https://msgid.link/r/20240405104306.551036-1-spujar@nvidia.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/tegra/tegra186_dspk.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c index a74c980ee7753..d5a74e25371d2 100644 --- a/sound/soc/tegra/tegra186_dspk.c +++ b/sound/soc/tegra/tegra186_dspk.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // tegra186_dspk.c - Tegra186 DSPK driver -// -// Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. #include #include @@ -241,14 +240,14 @@ static int tegra186_dspk_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - cif_conf.client_bits = TEGRA_ACIF_BITS_24; - switch (params_format(params)) { case SNDRV_PCM_FORMAT_S16_LE: cif_conf.audio_bits = TEGRA_ACIF_BITS_16; + cif_conf.client_bits = TEGRA_ACIF_BITS_16; break; case SNDRV_PCM_FORMAT_S32_LE: cif_conf.audio_bits = TEGRA_ACIF_BITS_32; + cif_conf.client_bits = TEGRA_ACIF_BITS_24; break; default: dev_err(dev, "unsupported format!\n"); -- GitLab From 0b47bbc91f1a2a7a4c4c9ff71e81e3bd24fa54a1 Mon Sep 17 00:00:00 2001 From: Joao Paulo Goncalves Date: Wed, 17 Apr 2024 15:41:38 -0300 Subject: [PATCH 2211/2290] ASoC: ti: davinci-mcasp: Fix race condition during probe commit d18ca8635db2f88c17acbdf6412f26d4f6aff414 upstream. When using davinci-mcasp as CPU DAI with simple-card, there are some conditions that cause simple-card to finish registering a sound card before davinci-mcasp finishes registering all sound components. This creates a non-working sound card from userspace with no problem indication apart from not being able to play/record audio on a PCM stream. The issue arises during simultaneous probe execution of both drivers. Specifically, the simple-card driver, awaiting a CPU DAI, proceeds as soon as davinci-mcasp registers its DAI. However, this process can lead to the client mutex lock (client_mutex in soc-core.c) being held or davinci-mcasp being preempted before PCM DMA registration on davinci-mcasp finishes. This situation occurs when the probes of both drivers run concurrently. Below is the code path for this condition. To solve the issue, defer davinci-mcasp CPU DAI registration to the last step in the audio part of it. This way, simple-card CPU DAI parsing will be deferred until all audio components are registered. Fail Code Path: simple-card.c: probe starts simple-card.c: simple_dai_link_of: simple_parse_node(..,cpu,..) returns EPROBE_DEFER, no CPU DAI yet davinci-mcasp.c: probe starts davinci-mcasp.c: devm_snd_soc_register_component() register CPU DAI simple-card.c: probes again, finish CPU DAI parsing and call devm_snd_soc_register_card() simple-card.c: finish probe davinci-mcasp.c: *dma_pcm_platform_register() register PCM DMA davinci-mcasp.c: probe finish Cc: stable@vger.kernel.org Fixes: 9fbd58cf4ab0 ("ASoC: davinci-mcasp: Choose PCM driver based on configured DMA controller") Signed-off-by: Joao Paulo Goncalves Acked-by: Peter Ujfalusi Reviewed-by: Jai Luthra Link: https://lore.kernel.org/r/20240417184138.1104774-1-jpaulo.silvagoncalves@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/ti/davinci-mcasp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index ca5d1bb6ac59e..4edf5b27e136b 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -2416,12 +2416,6 @@ static int davinci_mcasp_probe(struct platform_device *pdev) mcasp_reparent_fck(pdev); - ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component, - &davinci_mcasp_dai[mcasp->op_mode], 1); - - if (ret != 0) - goto err; - ret = davinci_mcasp_get_dma_type(mcasp); switch (ret) { case PCM_EDMA: @@ -2448,6 +2442,12 @@ static int davinci_mcasp_probe(struct platform_device *pdev) goto err; } + ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component, + &davinci_mcasp_dai[mcasp->op_mode], 1); + + if (ret != 0) + goto err; + no_audio: ret = davinci_mcasp_init_gpiochip(mcasp); if (ret) { -- GitLab From a66c869b17c4c4dcf81d273b02cb0efe88e127ab Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Mon, 29 Apr 2024 13:31:11 -0600 Subject: [PATCH 2212/2290] dyndbg: fix old BUG_ON in >control parser commit 00e7d3bea2ce7dac7bee1cf501fb071fd0ea8f6c upstream. Fix a BUG_ON from 2009. Even if it looks "unreachable" (I didn't really look), lets make sure by removing it, doing pr_err and return -EINVAL instead. Cc: stable Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20240429193145.66543-2-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 009f2ead09c1e..939678ea930e0 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -301,7 +301,11 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) } else { for (end = buf; *end && !isspace(*end); end++) ; - BUG_ON(end == buf); + if (end == buf) { + pr_err("parse err after word:%d=%s\n", nwords, + nwords ? words[nwords - 1] : ""); + return -EINVAL; + } } /* `buf' is start of word, `end' is one past its end */ -- GitLab From eedaabee28099c326cb20e637407f6bc43cdc57a Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Tue, 30 Apr 2024 10:12:38 +0100 Subject: [PATCH 2213/2290] slimbus: qcom-ngd-ctrl: Add timeout for wait operation commit 98241a774db49988f25b7b3657026ce51ccec293 upstream. In current driver qcom_slim_ngd_up_worker() indefinitely waiting for ctrl->qmi_up completion object. This is resulting in workqueue lockup on Kthread. Added wait_for_completion_interruptible_timeout to allow the thread to wait for specific timeout period and bail out instead waiting infinitely. Fixes: a899d324863a ("slimbus: qcom-ngd-ctrl: add Sub System Restart support") Cc: stable@vger.kernel.org Reviewed-by: Konrad Dybcio Signed-off-by: Viken Dadhaniya Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240430091238.35209-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 76c5e446d2433..da8555f3b6ca5 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1376,7 +1376,11 @@ static void qcom_slim_ngd_up_worker(struct work_struct *work) ctrl = container_of(work, struct qcom_slim_ngd_ctrl, ngd_up_work); /* Make sure qmi service is up before continuing */ - wait_for_completion_interruptible(&ctrl->qmi_up); + if (!wait_for_completion_interruptible_timeout(&ctrl->qmi_up, + msecs_to_jiffies(MSEC_PER_SEC))) { + dev_err(ctrl->dev, "QMI wait timeout\n"); + return; + } mutex_lock(&ctrl->ssr_lock); qcom_slim_ngd_enable(ctrl, true); -- GitLab From c42a8c6baa85139a3f138f1a5196df6d82e95770 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 21 Apr 2024 16:56:31 +0300 Subject: [PATCH 2214/2290] mei: me: add lunar lake point M DID commit 4108a30f1097eead0f6bd5d885e6bf093b4d460f upstream. Add Lunar (Point) Lake M device id. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240421135631.223362-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 3390ff5111033..d3c03d4edbeff 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -115,6 +115,8 @@ #define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ #define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */ +#define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index a617f64a351dc..a4bdc41284582 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -122,6 +122,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; -- GitLab From 89fffbdf535ce659c1a26b51ad62070566e33b28 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 14 Apr 2024 13:06:39 -0400 Subject: [PATCH 2215/2290] drm/amdkfd: don't allow mapping the MMIO HDP page with large pages commit be4a2a81b6b90d1a47eaeaace4cc8e2cb57b96c7 upstream. We don't get the right offset in that case. The GPU has an unused 4K area of the register BAR space into which you can remap registers. We remap the HDP flush registers into this space to allow userspace (CPU or GPU) to flush the HDP when it updates VRAM. However, on systems with >4K pages, we end up exposing PAGE_SIZE of MMIO space. Fixes: d8e408a82704 ("drm/amdkfd: Expose HDP registers to user space") Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 3f403afd6de83..b0f475d51ae7e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1106,7 +1106,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } offset = dev->adev->rmmio_remap.bus_addr; - if (!offset) { + if (!offset || (PAGE_SIZE > 4096)) { err = -ENOMEM; goto err_unlock; } @@ -2215,7 +2215,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, return -EINVAL; } offset = pdd->dev->adev->rmmio_remap.bus_addr; - if (!offset) { + if (!offset || (PAGE_SIZE > 4096)) { pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n"); return -ENOMEM; } @@ -2886,6 +2886,9 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; + if (PAGE_SIZE > 4096) + return -EINVAL; + address = dev->adev->rmmio_remap.bus_addr; vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | -- GitLab From 0dbfc73670b357456196130551e586345ca48e1b Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 25 Apr 2024 15:27:48 -0400 Subject: [PATCH 2216/2290] drm/vmwgfx: Fix invalid reads in fence signaled events commit a37ef7613c00f2d72c8fc08bd83fb6cc76926c8c upstream. Correctly set the length of the drm_event to the size of the structure that's actually used. The length of the drm_event was set to the parent structure instead of to the drm_vmw_event_fence which is supposed to be read. drm_read uses the length parameter to copy the event to the user space thus resuling in oob reads. Signed-off-by: Zack Rusin Fixes: 8b7de6aa8468 ("vmwgfx: Rework fence event action") Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-23566 Cc: David Airlie CC: Daniel Vetter Cc: Zack Rusin Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Cc: # v3.4+ Reviewed-by: Maaz Mombasawala Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240425192748.1761522-1-zack.rusin@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 66cc35dc223e7..95344735d00e6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -991,7 +991,7 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv, } event->event.base.type = DRM_VMW_EVENT_FENCE_SIGNALED; - event->event.base.length = sizeof(*event); + event->event.base.length = sizeof(event->event); event->event.user_data = user_data; ret = drm_event_reserve_init(dev, file_priv, &event->base, &event->event.base); -- GitLab From d8cdbd0f6cc12147c873f78445fe8cb1b72a0b80 Mon Sep 17 00:00:00 2001 From: Karthikeyan Ramasubramanian Date: Wed, 21 Feb 2024 18:06:24 -0700 Subject: [PATCH 2217/2290] drm/i915/bios: Fix parsing backlight BDB data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 43b26bdd2ee5cfca80939be910d5b23a50cd7f9d upstream. Starting BDB version 239, hdr_dpcd_refresh_timeout is introduced to backlight BDB data. Commit 700034566d68 ("drm/i915/bios: Define more BDB contents") updated the backlight BDB data accordingly. This broke the parsing of backlight BDB data in VBT for versions 236 - 238 (both inclusive) and hence the backlight controls are not responding on units with the concerned BDB version. backlight_control information has been present in backlight BDB data from at least BDB version 191 onwards, if not before. Hence this patch extracts the backlight_control information for BDB version 191 or newer. Tested on Chromebooks using Jasperlake SoC (reports bdb->version = 236). Tested on Chromebooks using Raptorlake SoC (reports bdb->version = 251). v2: removed checking the block size of the backlight BDB data [vsyrjala: this is completely safe thanks to commit e163cfb4c96d ("drm/i915/bios: Make copies of VBT data blocks")] Fixes: 700034566d68 ("drm/i915/bios: Define more BDB contents") Cc: stable@vger.kernel.org Cc: Jani Nikula Cc: Ville Syrjälä Signed-off-by: Karthikeyan Ramasubramanian Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240221180622.v2.1.I0690aa3e96a83a43b3fc33f50395d334b2981826@changeid Signed-off-by: Ville Syrjälä (cherry picked from commit c286f6a973c66c0d993ecab9f7162c790e7064c8) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_bios.c | 19 ++++--------------- drivers/gpu/drm/i915/display/intel_vbt_defs.h | 5 ----- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 9cc1ef2ca72cc..efbb0cffd3bc9 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1034,22 +1034,11 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.type = INTEL_BACKLIGHT_DISPLAY_DDI; if (i915->display.vbt.version >= 191) { - size_t exp_size; + const struct lfp_backlight_control_method *method; - if (i915->display.vbt.version >= 236) - exp_size = sizeof(struct bdb_lfp_backlight_data); - else if (i915->display.vbt.version >= 234) - exp_size = EXP_BDB_LFP_BL_DATA_SIZE_REV_234; - else - exp_size = EXP_BDB_LFP_BL_DATA_SIZE_REV_191; - - if (get_blocksize(backlight_data) >= exp_size) { - const struct lfp_backlight_control_method *method; - - method = &backlight_data->backlight_control[panel_type]; - panel->vbt.backlight.type = method->type; - panel->vbt.backlight.controller = method->controller; - } + method = &backlight_data->backlight_control[panel_type]; + panel->vbt.backlight.type = method->type; + panel->vbt.backlight.controller = method->controller; } panel->vbt.backlight.pwm_freq_hz = entry->pwm_freq_hz; diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index a9f44abfc9fc2..b50cd0dcabda9 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -897,11 +897,6 @@ struct lfp_brightness_level { u16 reserved; } __packed; -#define EXP_BDB_LFP_BL_DATA_SIZE_REV_191 \ - offsetof(struct bdb_lfp_backlight_data, brightness_level) -#define EXP_BDB_LFP_BL_DATA_SIZE_REV_234 \ - offsetof(struct bdb_lfp_backlight_data, brightness_precision_bits) - struct bdb_lfp_backlight_data { u8 entry_size; struct lfp_backlight_data_entry data[16]; -- GitLab From ce740545c0c9a144e489a85363d13a9c03dd08e9 Mon Sep 17 00:00:00 2001 From: George Shen Date: Thu, 16 Sep 2021 19:55:39 -0400 Subject: [PATCH 2218/2290] drm/amd/display: Handle Y carry-over in VCP X.Y calculation commit 719564737a9ac3d0b49c314450b56cf6f7d71358 upstream. Theoretically rare corner case where ceil(Y) results in rounding up to an integer. If this happens, the 1 should be carried over to the X value. CC: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c index 80dfaa4d4d81e..eb3a4624f781b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c @@ -393,6 +393,12 @@ void dcn31_hpo_dp_link_enc_set_throttled_vcp_size( x), 25)); + // If y rounds up to integer, carry it over to x. + if (y >> 25) { + x += 1; + y = 0; + } + switch (stream_encoder_inst) { case 0: REG_SET_2(DP_DPHY_SYM32_VC_RATE_CNTL0, 0, -- GitLab From 2d60ff5874aefd006717ca5e22ac1e25eac29c42 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 2 May 2024 10:20:06 -0300 Subject: [PATCH 2219/2290] net: fix out-of-bounds access in ops_init commit a26ff37e624d12e28077e5b24d2b264f62764ad6 upstream. net_alloc_generic is called by net_alloc, which is called without any locking. It reads max_gen_ptrs, which is changed under pernet_ops_rwsem. It is read twice, first to allocate an array, then to set s.len, which is later used to limit the bounds of the array access. It is possible that the array is allocated and another thread is registering a new pernet ops, increments max_gen_ptrs, which is then used to set s.len with a larger than allocated length for the variable array. Fix it by reading max_gen_ptrs only once in net_alloc_generic. If max_gen_ptrs is later incremented, it will be caught in net_assign_generic. Signed-off-by: Thadeu Lima de Souza Cascardo Fixes: 073862ba5d24 ("netns: fix net_alloc_generic()") Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240502132006.3430840-1-cascardo@igalia.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/core/net_namespace.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4c1707d0eb9b0..c33930a171629 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -68,12 +68,15 @@ DEFINE_COOKIE(net_cookie); static struct net_generic *net_alloc_generic(void) { + unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); + unsigned int generic_size; struct net_generic *ng; - unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + generic_size = offsetof(struct net_generic, ptr[gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) - ng->s.len = max_gen_ptrs; + ng->s.len = gen_ptrs; return ng; } @@ -1217,7 +1220,11 @@ static int register_pernet_operations(struct list_head *list, if (error < 0) return error; *ops->id = error; - max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); + /* This does not require READ_ONCE as writers already hold + * pernet_ops_rwsem. But WRITE_ONCE is needed to protect + * net_alloc_generic. + */ + WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1)); } error = __register_pernet_operations(list, ops); if (error) { -- GitLab From a2740fe937765b21a0718ff920c659b846dde511 Mon Sep 17 00:00:00 2001 From: Lakshmi Yadlapati Date: Tue, 7 May 2024 14:46:03 -0500 Subject: [PATCH 2220/2290] hwmon: (pmbus/ucd9000) Increase delay from 250 to 500us commit 26e8383b116d0dbe74e28f86646563ab46d66d83 upstream. Following the failure observed with a delay of 250us, experiments were conducted with various delays. It was found that a delay of 350us effectively mitigated the issue. To provide a more optimal solution while still allowing a margin for stability, the delay is being adjusted to 500us. Signed-off-by: Lakshmi Yadlapati Link: https://lore.kernel.org/r/20240507194603.1305750-1-lakshmiy@us.ibm.com Fixes: 8d655e6523764 ("hwmon: (ucd90320) Add minimum delay between bus accesses") Reviewed-by: Eddie James Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/ucd9000.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 3daaf22378322..d6dfa268f31b8 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -80,11 +80,11 @@ struct ucd9000_debugfs_entry { * It has been observed that the UCD90320 randomly fails register access when * doing another access right on the back of a register write. To mitigate this * make sure that there is a minimum delay between a write access and the - * following access. The 250us is based on experimental data. At a delay of - * 200us the issue seems to go away. Add a bit of extra margin to allow for + * following access. The 500 is based on experimental data. At a delay of + * 350us the issue seems to go away. Add a bit of extra margin to allow for * system to system differences. */ -#define UCD90320_WAIT_DELAY_US 250 +#define UCD90320_WAIT_DELAY_US 500 static inline void ucd90320_wait(const struct ucd9000_data *data) { -- GitLab From 7629ef6dda1564098aadeef38e5fbd11ee8627c4 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 26 Apr 2024 19:29:38 +0800 Subject: [PATCH 2221/2290] mm: use memalloc_nofs_save() in page_cache_ra_order() commit 30153e4466647a17eebfced13eede5cbe4290e69 upstream. See commit f2c817bed58d ("mm: use memalloc_nofs_save in readahead path"), ensure that page_cache_ra_order() do not attempt to reclaim file-backed pages too, or it leads to a deadlock, found issue when test ext4 large folio. INFO: task DataXceiver for:7494 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:DataXceiver for state:D stack:0 pid:7494 ppid:1 flags:0x00000200 Call trace: __switch_to+0x14c/0x240 __schedule+0x82c/0xdd0 schedule+0x58/0xf0 io_schedule+0x24/0xa0 __folio_lock+0x130/0x300 migrate_pages_batch+0x378/0x918 migrate_pages+0x350/0x700 compact_zone+0x63c/0xb38 compact_zone_order+0xc0/0x118 try_to_compact_pages+0xb0/0x280 __alloc_pages_direct_compact+0x98/0x248 __alloc_pages+0x510/0x1110 alloc_pages+0x9c/0x130 folio_alloc+0x20/0x78 filemap_alloc_folio+0x8c/0x1b0 page_cache_ra_order+0x174/0x308 ondemand_readahead+0x1c8/0x2b8 page_cache_async_ra+0x68/0xb8 filemap_readahead.isra.0+0x64/0xa8 filemap_get_pages+0x3fc/0x5b0 filemap_splice_read+0xf4/0x280 ext4_file_splice_read+0x2c/0x48 [ext4] vfs_splice_read.part.0+0xa8/0x118 splice_direct_to_actor+0xbc/0x288 do_splice_direct+0x9c/0x108 do_sendfile+0x328/0x468 __arm64_sys_sendfile64+0x8c/0x148 invoke_syscall+0x4c/0x118 el0_svc_common.constprop.0+0xc8/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x4c/0x1f8 el0t_64_sync_handler+0xc0/0xc8 el0t_64_sync+0x188/0x190 Link: https://lkml.kernel.org/r/20240426112938.124740-1-wangkefeng.wang@huawei.com Fixes: 793917d997df ("mm/readahead: Add large folio readahead") Signed-off-by: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Zhang Yi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/readahead.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/readahead.c b/mm/readahead.c index e4b772bb70e68..794d8ddc06972 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -504,6 +504,7 @@ void page_cache_ra_order(struct readahead_control *ractl, pgoff_t index = readahead_index(ractl); pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT; pgoff_t mark = index + ra->size - ra->async_size; + unsigned int nofs; int err = 0; gfp_t gfp = readahead_gfp_mask(mapping); @@ -520,6 +521,8 @@ void page_cache_ra_order(struct readahead_control *ractl, new_order--; } + /* See comment in page_cache_ra_unbounded() */ + nofs = memalloc_nofs_save(); filemap_invalidate_lock_shared(mapping); while (index <= limit) { unsigned int order = new_order; @@ -548,6 +551,7 @@ void page_cache_ra_order(struct readahead_control *ractl, read_pages(ractl); filemap_invalidate_unlock_shared(mapping); + memalloc_nofs_restore(nofs); /* * If there were already pages in the page cache, then we may have -- GitLab From 07b933a1b62d0f12b7921d6d69346c9a35e9ea97 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 9 May 2024 15:33:04 +0200 Subject: [PATCH 2222/2290] regulator: core: fix debugfs creation regression commit 2a4b49bb58123bad6ec0e07b02845f74c23d5e04 upstream. regulator_get() may sometimes be called more than once for the same consumer device, something which before commit dbe954d8f163 ("regulator: core: Avoid debugfs: Directory ... already present! error") resulted in errors being logged. A couple of recent commits broke the handling of such cases so that attributes are now erroneously created in the debugfs root directory the second time a regulator is requested and the log is filled with errors like: debugfs: File 'uA_load' in directory '/' already present! debugfs: File 'min_uV' in directory '/' already present! debugfs: File 'max_uV' in directory '/' already present! debugfs: File 'constraint_flags' in directory '/' already present! on any further calls. Fixes: 2715bb11cfff ("regulator: core: Fix more error checking for debugfs_create_dir()") Fixes: 08880713ceec ("regulator: core: Streamline debugfs operations") Cc: stable@vger.kernel.org Cc: Geert Uytterhoeven Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240509133304.8883-1-johan+linaro@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index c8702011b7613..ff11f37e28c71 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1916,19 +1916,24 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, } } - if (err != -EEXIST) + if (err != -EEXIST) { regulator->debugfs = debugfs_create_dir(supply_name, rdev->debugfs); - if (IS_ERR(regulator->debugfs)) - rdev_dbg(rdev, "Failed to create debugfs directory\n"); + if (IS_ERR(regulator->debugfs)) { + rdev_dbg(rdev, "Failed to create debugfs directory\n"); + regulator->debugfs = NULL; + } + } - debugfs_create_u32("uA_load", 0444, regulator->debugfs, - ®ulator->uA_load); - debugfs_create_u32("min_uV", 0444, regulator->debugfs, - ®ulator->voltage[PM_SUSPEND_ON].min_uV); - debugfs_create_u32("max_uV", 0444, regulator->debugfs, - ®ulator->voltage[PM_SUSPEND_ON].max_uV); - debugfs_create_file("constraint_flags", 0444, regulator->debugfs, - regulator, &constraint_flags_fops); + if (regulator->debugfs) { + debugfs_create_u32("uA_load", 0444, regulator->debugfs, + ®ulator->uA_load); + debugfs_create_u32("min_uV", 0444, regulator->debugfs, + ®ulator->voltage[PM_SUSPEND_ON].min_uV); + debugfs_create_u32("max_uV", 0444, regulator->debugfs, + ®ulator->voltage[PM_SUSPEND_ON].max_uV); + debugfs_create_file("constraint_flags", 0444, regulator->debugfs, + regulator, &constraint_flags_fops); + } /* * Check now if the regulator is an always on regulator - if -- GitLab From 4e73c01b612bed98557f36032bd12260a62cc34f Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 8 May 2024 16:46:51 +0100 Subject: [PATCH 2223/2290] spi: microchip-core-qspi: fix setting spi bus clock rate commit ef13561d2b163ac0ae6befa53bca58a26dc3320b upstream. Before ORing the new clock rate with the control register value read from the hardware, the existing clock rate needs to be masked off as otherwise the existing value will interfere with the new one. CC: stable@vger.kernel.org Fixes: 8596124c4c1b ("spi: microchip-core-qspi: Add support for microchip fpga qspi controllers") Signed-off-by: Conor Dooley Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20240508-fox-unpiloted-b97e1535627b@spud Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-microchip-core-qspi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index 19a6a46829f6d..620c5d19031e2 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c @@ -283,6 +283,7 @@ static int mchp_coreqspi_setup_clock(struct mchp_coreqspi *qspi, struct spi_devi } control = readl_relaxed(qspi->regs + REG_CONTROL); + control &= ~CONTROL_CLKRATE_MASK; control |= baud_rate_val << CONTROL_CLKRATE_SHIFT; writel_relaxed(control, qspi->regs + REG_CONTROL); control = readl_relaxed(qspi->regs + REG_CONTROL); -- GitLab From a86743458b24703a63ab3a47d4bc098f0b76f636 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 May 2024 21:41:50 +0900 Subject: [PATCH 2224/2290] ksmbd: off ipv6only for both ipv4/ipv6 binding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cc00bc83f26eb8f2d8d9f56b949b62fd774d8432 upstream. ΕΛΕΝΗ reported that ksmbd binds to the IPV6 wildcard (::) by default for ipv4 and ipv6 binding. So IPV4 connections are successful only when the Linux system parameter bindv6only is set to 0 [default value]. If this parameter is set to 1, then the ipv6 wildcard only represents any IPV6 address. Samba creates different sockets for ipv4 and ipv6 by default. This patch off sk_ipv6only to support IPV4/IPV6 connections without creating two sockets. Cc: stable@vger.kernel.org Reported-by: ΕΛΕΝΗ ΤΖΑΒΕΛΛΑ Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/transport_tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 9d4222154dcc0..0012919309f11 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -446,6 +446,10 @@ static int create_socket(struct interface *iface) sin6.sin6_family = PF_INET6; sin6.sin6_addr = in6addr_any; sin6.sin6_port = htons(server_conf.tcp_port); + + lock_sock(ksmbd_socket->sk); + ksmbd_socket->sk->sk_ipv6only = false; + release_sock(ksmbd_socket->sk); } ksmbd_tcp_nodelay(ksmbd_socket); -- GitLab From 3ae4f87ac2c696e784ea4c2657f00a62616248eb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 May 2024 21:44:02 +0900 Subject: [PATCH 2225/2290] ksmbd: avoid to send duplicate lease break notifications commit 97c2ec64667bacc49881d2b2dd9afd4d1c3fbaeb upstream. This patch fixes generic/011 when enable smb2 leases. if ksmbd sends multiple notifications for a file, cifs increments the reference count of the file but it does not decrement the count by the failure of queue_work. So even if the file is closed, cifs does not send a SMB2_CLOSE request. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/oplock.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 1253e9bde34c8..4a128da383b91 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -612,13 +612,23 @@ static int oplock_break_pending(struct oplock_info *opinfo, int req_op_level) if (opinfo->op_state == OPLOCK_CLOSING) return -ENOENT; - else if (!opinfo->is_lease && opinfo->level <= req_op_level) - return 1; + else if (opinfo->level <= req_op_level) { + if (opinfo->is_lease && + opinfo->o_lease->state != + (SMB2_LEASE_HANDLE_CACHING_LE | + SMB2_LEASE_READ_CACHING_LE)) + return 1; + } } - if (!opinfo->is_lease && opinfo->level <= req_op_level) { - wake_up_oplock_break(opinfo); - return 1; + if (opinfo->level <= req_op_level) { + if (opinfo->is_lease && + opinfo->o_lease->state != + (SMB2_LEASE_HANDLE_CACHING_LE | + SMB2_LEASE_READ_CACHING_LE)) { + wake_up_oplock_break(opinfo); + return 1; + } } return 0; } @@ -886,7 +896,6 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) struct lease *lease = brk_opinfo->o_lease; atomic_inc(&brk_opinfo->breaking_cnt); - err = oplock_break_pending(brk_opinfo, req_op_level); if (err) return err < 0 ? err : 0; -- GitLab From 94eb9f83a45dac66de76e9c2a30ab9d592b4c9bd Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 May 2024 21:58:15 +0900 Subject: [PATCH 2226/2290] ksmbd: do not grant v2 lease if parent lease key and epoch are not set commit 691aae4f36f9825df6781da4399a1e718951085a upstream. This patch fix xfstests generic/070 test with smb2 leases = yes. cifs.ko doesn't set parent lease key and epoch in create context v2 lease. ksmbd suppose that parent lease and epoch are vaild if data length is v2 lease context size and handle directory lease using this values. ksmbd should hanle it as v1 lease not v2 lease if parent lease key and epoch are not set in create context v2 lease. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/oplock.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 4a128da383b91..1b98796499d78 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1208,7 +1208,9 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, /* Only v2 leases handle the directory */ if (S_ISDIR(file_inode(fp->filp)->i_mode)) { - if (!lctx || lctx->version != 2) + if (!lctx || lctx->version != 2 || + (lctx->flags != SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE && + !lctx->epoch)) return 0; } @@ -1470,8 +1472,9 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) buf->lcontext.LeaseFlags = lease->flags; buf->lcontext.Epoch = cpu_to_le16(lease->epoch); buf->lcontext.LeaseState = lease->state; - memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, - SMB2_LEASE_KEY_SIZE); + if (lease->flags == SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) + memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, + SMB2_LEASE_KEY_SIZE); buf->ccontext.DataOffset = cpu_to_le16(offsetof (struct create_lease_v2, lcontext)); buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2)); @@ -1536,8 +1539,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir) lreq->flags = lc->lcontext.LeaseFlags; lreq->epoch = lc->lcontext.Epoch; lreq->duration = lc->lcontext.LeaseDuration; - memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, - SMB2_LEASE_KEY_SIZE); + if (lreq->flags == SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) + memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, + SMB2_LEASE_KEY_SIZE); lreq->version = 2; } else { struct create_lease *lc = (struct create_lease *)cc; -- GitLab From 1caceadfb50432dbf6d808796cb6c34ebb6d662c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Apr 2024 19:07:39 +0200 Subject: [PATCH 2227/2290] Bluetooth: qca: add missing firmware sanity checks commit 2e4edfa1e2bd821a317e7d006517dcf2f3fac68d upstream. Add the missing sanity checks when parsing the firmware files before downloading them to avoid accessing and corrupting memory beyond the vmalloced buffer. Fixes: 83e81961ff7e ("Bluetooth: btqca: Introduce generic QCA ROME support") Cc: stable@vger.kernel.org # 4.10 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 19cfc342fc7bb..a56d166f9e04d 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -265,9 +265,10 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) } EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd); -static void qca_tlv_check_data(struct hci_dev *hdev, +static int qca_tlv_check_data(struct hci_dev *hdev, struct qca_fw_config *config, - u8 *fw_data, enum qca_btsoc_type soc_type) + u8 *fw_data, size_t fw_size, + enum qca_btsoc_type soc_type) { const u8 *data; u32 type_len; @@ -283,6 +284,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, switch (config->type) { case ELF_TYPE_PATCH: + if (fw_size < 7) + return -EINVAL; + config->dnld_mode = QCA_SKIP_EVT_VSE_CC; config->dnld_type = QCA_SKIP_EVT_VSE_CC; @@ -291,6 +295,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, bt_dev_dbg(hdev, "File version : 0x%x", fw_data[6]); break; case TLV_TYPE_PATCH: + if (fw_size < sizeof(struct tlv_type_hdr) + sizeof(struct tlv_type_patch)) + return -EINVAL; + tlv = (struct tlv_type_hdr *)fw_data; type_len = le32_to_cpu(tlv->type_len); tlv_patch = (struct tlv_type_patch *)tlv->data; @@ -330,6 +337,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; case TLV_TYPE_NVM: + if (fw_size < sizeof(struct tlv_type_hdr)) + return -EINVAL; + tlv = (struct tlv_type_hdr *)fw_data; type_len = le32_to_cpu(tlv->type_len); @@ -338,17 +348,26 @@ static void qca_tlv_check_data(struct hci_dev *hdev, BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff); BT_DBG("Length\t\t : %d bytes", length); + if (fw_size < length + (tlv->data - fw_data)) + return -EINVAL; + idx = 0; data = tlv->data; - while (idx < length) { + while (idx < length - sizeof(struct tlv_type_nvm)) { tlv_nvm = (struct tlv_type_nvm *)(data + idx); tag_id = le16_to_cpu(tlv_nvm->tag_id); tag_len = le16_to_cpu(tlv_nvm->tag_len); + if (length < idx + sizeof(struct tlv_type_nvm) + tag_len) + return -EINVAL; + /* Update NVM tags as needed */ switch (tag_id) { case EDL_TAG_ID_HCI: + if (tag_len < 3) + return -EINVAL; + /* HCI transport layer parameters * enabling software inband sleep * onto controller side. @@ -364,6 +383,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; case EDL_TAG_ID_DEEP_SLEEP: + if (tag_len < 1) + return -EINVAL; + /* Sleep enable mask * enabling deep sleep feature on controller. */ @@ -372,14 +394,16 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; } - idx += (sizeof(u16) + sizeof(u16) + 8 + tag_len); + idx += sizeof(struct tlv_type_nvm) + tag_len; } break; default: BT_ERR("Unknown TLV type %d", config->type); - break; + return -EINVAL; } + + return 0; } static int qca_tlv_send_segment(struct hci_dev *hdev, int seg_size, @@ -529,7 +553,9 @@ static int qca_download_firmware(struct hci_dev *hdev, memcpy(data, fw->data, size); release_firmware(fw); - qca_tlv_check_data(hdev, config, data, soc_type); + ret = qca_tlv_check_data(hdev, config, data, size, soc_type); + if (ret) + return ret; segment = data; remain = size; -- GitLab From 29a475688aebe47f23a94d6cbfd320311a368f65 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Apr 2024 19:07:40 +0200 Subject: [PATCH 2228/2290] Bluetooth: qca: fix NVM configuration parsing commit a112d3c72a227f2edbb6d8094472cc6e503e52af upstream. The NVM configuration files used by WCN3988 and WCN3990/1/8 have two sets of configuration tags that are enclosed by a type-length header of type four which the current parser fails to account for. Instead the driver happily parses random data as if it were valid tags, something which can lead to the configuration data being corrupted if it ever encounters the words 0x0011 or 0x001b. As is clear from commit b63882549b2b ("Bluetooth: btqca: Fix the NVM baudrate tag offcet for wcn3991") the intention has always been to process the configuration data also for WCN3991 and WCN3998 which encodes the baud rate at a different offset. Fix the parser so that it can handle the WCN3xxx configuration files, which has an enclosing type-length header of type four and two sets of TLV tags enclosed by a type-length header of type two and three, respectively. Note that only the first set, which contains the tags the driver is currently looking for, will be parsed for now. With the parser fixed, the software in-band sleep bit will now be set for WCN3991 and WCN3998 (as it is for later controllers) and the default baud rate 3200000 may be updated by the driver also for WCN3xxx controllers. Notably the deep-sleep feature bit is already set by default in all configuration files in linux-firmware. Fixes: 4219d4686875 ("Bluetooth: btqca: Add wcn3990 firmware download support.") Cc: stable@vger.kernel.org # 4.19 Cc: Matthias Kaehlcke Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index a56d166f9e04d..a09b2fe6fff16 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -278,6 +278,7 @@ static int qca_tlv_check_data(struct hci_dev *hdev, struct tlv_type_patch *tlv_patch; struct tlv_type_nvm *tlv_nvm; uint8_t nvm_baud_rate = config->user_baud_rate; + u8 type; config->dnld_mode = QCA_SKIP_EVT_NONE; config->dnld_type = QCA_SKIP_EVT_NONE; @@ -343,11 +344,30 @@ static int qca_tlv_check_data(struct hci_dev *hdev, tlv = (struct tlv_type_hdr *)fw_data; type_len = le32_to_cpu(tlv->type_len); - length = (type_len >> 8) & 0x00ffffff; + length = type_len >> 8; + type = type_len & 0xff; - BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff); + /* Some NVM files have more than one set of tags, only parse + * the first set when it has type 2 for now. When there is + * more than one set there is an enclosing header of type 4. + */ + if (type == 4) { + if (fw_size < 2 * sizeof(struct tlv_type_hdr)) + return -EINVAL; + + tlv++; + + type_len = le32_to_cpu(tlv->type_len); + length = type_len >> 8; + type = type_len & 0xff; + } + + BT_DBG("TLV Type\t\t : 0x%x", type); BT_DBG("Length\t\t : %d bytes", length); + if (type != 2) + break; + if (fw_size < length + (tlv->data - fw_data)) return -EINVAL; -- GitLab From bcccdc947d2ca5972b1e92d0dea10803ddc08ceb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 May 2024 14:34:53 +0200 Subject: [PATCH 2229/2290] Bluetooth: qca: fix info leak when fetching board id commit 0adcf6be1445ed50bfd4a451a7a782568f270197 upstream. Add the missing sanity check when fetching the board id to avoid leaking slab data when later requesting the firmware. Fixes: a7f8dedb4be2 ("Bluetooth: qca: add support for QCA2066") Cc: stable@vger.kernel.org # 6.7 Cc: Tim Jiang Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index a09b2fe6fff16..04cb4ce48aa4d 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -235,6 +235,11 @@ static int qca_read_fw_board_id(struct hci_dev *hdev, u16 *bid) goto out; } + if (skb->len < 3) { + err = -EILSEQ; + goto out; + } + *bid = (edl->data[1] << 8) + edl->data[2]; bt_dev_dbg(hdev, "%s: bid = %x", __func__, *bid); -- GitLab From 57062aa13e87b1a78a4a8f6cb5fab6ba24f5f488 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 May 2024 14:34:52 +0200 Subject: [PATCH 2230/2290] Bluetooth: qca: fix info leak when fetching fw build id commit cda0d6a198e2a7ec6f176c36173a57bdd8af7af2 upstream. Add the missing sanity checks and move the 255-byte build-id buffer off the stack to avoid leaking stack data through debugfs in case the build-info reply is malformed. Fixes: c0187b0bd3e9 ("Bluetooth: btqca: Add support to read FW build version for WCN3991 BTSoC") Cc: stable@vger.kernel.org # 5.12 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 25 +++++++++++++++++++++---- drivers/bluetooth/btqca.h | 1 - 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 04cb4ce48aa4d..9239fa60dedd7 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -99,7 +99,8 @@ static int qca_read_fw_build_info(struct hci_dev *hdev) { struct sk_buff *skb; struct edl_event_hdr *edl; - char cmd, build_label[QCA_FW_BUILD_VER_LEN]; + char *build_label; + char cmd; int build_lbl_len, err = 0; bt_dev_dbg(hdev, "QCA read fw build info"); @@ -114,6 +115,11 @@ static int qca_read_fw_build_info(struct hci_dev *hdev) return err; } + if (skb->len < sizeof(*edl)) { + err = -EILSEQ; + goto out; + } + edl = (struct edl_event_hdr *)(skb->data); if (!edl) { bt_dev_err(hdev, "QCA read fw build info with no header"); @@ -129,14 +135,25 @@ static int qca_read_fw_build_info(struct hci_dev *hdev) goto out; } + if (skb->len < sizeof(*edl) + 1) { + err = -EILSEQ; + goto out; + } + build_lbl_len = edl->data[0]; - if (build_lbl_len <= QCA_FW_BUILD_VER_LEN - 1) { - memcpy(build_label, edl->data + 1, build_lbl_len); - *(build_label + build_lbl_len) = '\0'; + + if (skb->len < sizeof(*edl) + 1 + build_lbl_len) { + err = -EILSEQ; + goto out; } + build_label = kstrndup(&edl->data[1], build_lbl_len, GFP_KERNEL); + if (!build_label) + goto out; + hci_set_fw_info(hdev, "%s", build_label); + kfree(build_label); out: kfree_skb(skb); return err; diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index dc31984f71dc1..38e2fbc950248 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -47,7 +47,6 @@ #define get_soc_ver(soc_id, rom_ver) \ ((le32_to_cpu(soc_id) << 16) | (le16_to_cpu(rom_ver))) -#define QCA_FW_BUILD_VER_LEN 255 #define QCA_HSP_GF_SOC_ID 0x1200 #define QCA_HSP_GF_SOC_MASK 0x0000ff00 -- GitLab From 064688d70c33bb5b49dde6e972b9379a8b045d8a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 May 2024 08:37:40 +0200 Subject: [PATCH 2231/2290] Bluetooth: qca: fix firmware check error path commit 40d442f969fb1e871da6fca73d3f8aef1f888558 upstream. A recent commit fixed the code that parses the firmware files before downloading them to the controller but introduced a memory leak in case the sanity checks ever fail. Make sure to free the firmware buffer before returning on errors. Fixes: f905ae0be4b7 ("Bluetooth: qca: add missing firmware sanity checks") Cc: stable@vger.kernel.org # 4.19 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 9239fa60dedd7..a0fadde993d70 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -597,7 +597,7 @@ static int qca_download_firmware(struct hci_dev *hdev, ret = qca_tlv_check_data(hdev, config, data, size, soc_type); if (ret) - return ret; + goto out; segment = data; remain = size; -- GitLab From 9ff3c42aa37c2e6989e9b73d466737ef13d74969 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 21 Mar 2024 19:44:07 +0000 Subject: [PATCH 2232/2290] VFIO: Add the SPR_DSA and SPR_IAX devices to the denylist commit 95feb3160eef0caa6018e175a5560b816aee8e79 upstream. Due to an erratum with the SPR_DSA and SPR_IAX devices, it is not secure to assign these devices to virtual machines. Add the PCI IDs of these devices to the VFIO denylist to ensure that this is handled appropriately by the VFIO subsystem. The SPR_DSA and SPR_IAX devices are on-SOC devices for the Sapphire Rapids (and related) family of products that perform data movement and compression. Signed-off-by: Arjan van de Ven Signed-off-by: Greg Kroah-Hartman --- drivers/dma/idxd/registers.h | 3 --- drivers/vfio/pci/vfio_pci.c | 2 ++ include/linux/pci_ids.h | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index fe3b8d04f9db1..fdfe7930f1834 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -4,9 +4,6 @@ #define _IDXD_REGISTERS_H_ /* PCI Config */ -#define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 -#define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe - #define DEVICE_VERSION_1 0x100 #define DEVICE_VERSION_2 0x200 diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1d4919edfbde4..a3ed9ab477486 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -71,6 +71,8 @@ static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) case PCI_DEVICE_ID_INTEL_QAT_C62X_VF: case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF: + case PCI_DEVICE_ID_INTEL_DSA_SPR0: + case PCI_DEVICE_ID_INTEL_IAX_SPR0: return true; default: return false; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9e9794d03c9fc..2c1371320c295 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2664,7 +2664,9 @@ #define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095e #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 +#define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60 +#define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe #define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 #define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085 #define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108f -- GitLab From 3e4368832e5478bec54c0b83e7c3fb390518bb96 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 24 Apr 2024 14:43:22 +0000 Subject: [PATCH 2233/2290] dmaengine: idxd: add a new security check to deal with a hardware erratum commit e11452eb071b2a8e6ba52892b2e270bbdaa6640d upstream. On Sapphire Rapids and related platforms, the DSA and IAA devices have an erratum that causes direct access (for example, by using the ENQCMD or MOVDIR64 instructions) from untrusted applications to be a security problem. To solve this, add a flag to the PCI device enumeration and device structures to indicate the presence/absence of this security exposure. In the mmap() method of the device, this flag is then used to enforce that the user has the CAP_SYS_RAWIO capability. In a future patch, a write() based method will be added that allows untrusted applications submit work to the accelerator, where the kernel can do sanity checking on the user input to ensure secure operation of the accelerator. Signed-off-by: Arjan van de Ven Signed-off-by: Greg Kroah-Hartman --- drivers/dma/idxd/cdev.c | 12 ++++++++++++ drivers/dma/idxd/idxd.h | 3 +++ drivers/dma/idxd/init.c | 4 ++++ 3 files changed, 19 insertions(+) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index a9b96b18772f3..2138c993f2077 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -198,6 +198,18 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) int rc; dev_dbg(&pdev->dev, "%s called\n", __func__); + + /* + * Due to an erratum in some of the devices supported by the driver, + * direct user submission to the device can be unsafe. + * (See the INTEL-SA-01084 security advisory) + * + * For the devices that exhibit this behavior, require that the user + * has CAP_SYS_RAWIO capabilities. + */ + if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO)) + return -EPERM; + rc = check_vma(wq, vma, __func__); if (rc < 0) return rc; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7ced8d283d98b..14c6ef987fede 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -258,6 +258,7 @@ struct idxd_driver_data { struct device_type *dev_type; int compl_size; int align; + bool user_submission_safe; }; struct idxd_device { @@ -316,6 +317,8 @@ struct idxd_device { struct idxd_pmu *idxd_pmu; unsigned long *opcap_bmap; + + bool user_submission_safe; }; /* IDXD software descriptor */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index e0f49545d89ff..30193195c8133 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -48,6 +48,7 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct dsa_completion_record), .align = 32, .dev_type = &dsa_device_type, + .user_submission_safe = false, /* See INTEL-SA-01084 security advisory */ }, [IDXD_TYPE_IAX] = { .name_prefix = "iax", @@ -55,6 +56,7 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct iax_completion_record), .align = 64, .dev_type = &iax_device_type, + .user_submission_safe = false, /* See INTEL-SA-01084 security advisory */ }, }; @@ -663,6 +665,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", idxd->hw.version); + idxd->user_submission_safe = data->user_submission_safe; + return 0; err_dev_register: -- GitLab From 5056d238937dc7ef9fdf7c06a5bf64f9b04091c0 Mon Sep 17 00:00:00 2001 From: Nikhil Rao Date: Wed, 24 Apr 2024 15:16:12 +0000 Subject: [PATCH 2234/2290] dmaengine: idxd: add a write() method for applications to submit work commit 6827738dc684a87ad54ebba3ae7f3d7c977698eb upstream. After the patch to restrict the use of mmap() to CAP_SYS_RAWIO for the currently existing devices, most applications can no longer make use of the accelerators as in production "you don't run things as root". To keep the DSA and IAA accelerators usable, hook up a write() method so that applications can still submit work. In the write method, sufficient input validation is performed to avoid the security issue that required the mmap CAP_SYS_RAWIO check. One complication is that the DSA device allows for indirect ("batched") descriptors. There is no reasonable way to do the input validation on these indirect descriptors so the write() method will not allow these to be submitted to the hardware on affected hardware, and the sysfs enumeration of support for the opcode is also removed. Early performance data shows that the performance delta for most common cases is within the noise. Signed-off-by: Nikhil Rao Signed-off-by: Arjan van de Ven Signed-off-by: Greg Kroah-Hartman --- drivers/dma/idxd/cdev.c | 65 ++++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/sysfs.c | 27 +++++++++++++++-- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 2138c993f2077..9f8adb7013eba 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -224,6 +224,70 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_page_prot); } +static int idxd_submit_user_descriptor(struct idxd_user_context *ctx, + struct dsa_hw_desc __user *udesc) +{ + struct idxd_wq *wq = ctx->wq; + struct idxd_dev *idxd_dev = &wq->idxd->idxd_dev; + const uint64_t comp_addr_align = is_dsa_dev(idxd_dev) ? 0x20 : 0x40; + void __iomem *portal = idxd_wq_portal_addr(wq); + struct dsa_hw_desc descriptor __aligned(64); + int rc; + + rc = copy_from_user(&descriptor, udesc, sizeof(descriptor)); + if (rc) + return -EFAULT; + + /* + * DSA devices are capable of indirect ("batch") command submission. + * On devices where direct user submissions are not safe, we cannot + * allow this since there is no good way for us to verify these + * indirect commands. + */ + if (is_dsa_dev(idxd_dev) && descriptor.opcode == DSA_OPCODE_BATCH && + !wq->idxd->user_submission_safe) + return -EINVAL; + /* + * As per the programming specification, the completion address must be + * aligned to 32 or 64 bytes. If this is violated the hardware + * engine can get very confused (security issue). + */ + if (!IS_ALIGNED(descriptor.completion_addr, comp_addr_align)) + return -EINVAL; + + if (wq_dedicated(wq)) + iosubmit_cmds512(portal, &descriptor, 1); + else { + descriptor.priv = 0; + descriptor.pasid = ctx->pasid; + rc = idxd_enqcmds(wq, portal, &descriptor); + if (rc < 0) + return rc; + } + + return 0; +} + +static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t len, + loff_t *unused) +{ + struct dsa_hw_desc __user *udesc = (struct dsa_hw_desc __user *)buf; + struct idxd_user_context *ctx = filp->private_data; + ssize_t written = 0; + int i; + + for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) { + int rc = idxd_submit_user_descriptor(ctx, udesc + i); + + if (rc) + return written ? written : rc; + + written += sizeof(struct dsa_hw_desc); + } + + return written; +} + static __poll_t idxd_cdev_poll(struct file *filp, struct poll_table_struct *wait) { @@ -246,6 +310,7 @@ static const struct file_operations idxd_cdev_fops = { .open = idxd_cdev_open, .release = idxd_cdev_release, .mmap = idxd_cdev_mmap, + .write = idxd_cdev_write, .poll = idxd_cdev_poll, }; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6e1e14b376e65..c811757d0f97f 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1162,12 +1162,35 @@ static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attrib static struct device_attribute dev_attr_wq_enqcmds_retries = __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); +static ssize_t op_cap_show_common(struct device *dev, char *buf, unsigned long *opcap_bmap) +{ + ssize_t pos; + int i; + + pos = 0; + for (i = IDXD_MAX_OPCAP_BITS/64 - 1; i >= 0; i--) { + unsigned long val = opcap_bmap[i]; + + /* On systems where direct user submissions are not safe, we need to clear out + * the BATCH capability from the capability mask in sysfs since we cannot support + * that command on such systems. + */ + if (i == DSA_OPCODE_BATCH/64 && !confdev_to_idxd(dev)->user_submission_safe) + clear_bit(DSA_OPCODE_BATCH % 64, &val); + + pos += sysfs_emit_at(buf, pos, "%*pb", 64, &val); + pos += sysfs_emit_at(buf, pos, "%c", i == 0 ? '\n' : ','); + } + + return pos; +} + static ssize_t wq_op_config_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_wq *wq = confdev_to_wq(dev); - return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, wq->opcap_bmap); + return op_cap_show_common(dev, buf, wq->opcap_bmap); } static int idxd_verify_supported_opcap(struct idxd_device *idxd, unsigned long *opmask) @@ -1381,7 +1404,7 @@ static ssize_t op_cap_show(struct device *dev, { struct idxd_device *idxd = confdev_to_idxd(dev); - return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, idxd->opcap_bmap); + return op_cap_show_common(dev, buf, idxd->opcap_bmap); } static DEVICE_ATTR_RO(op_cap); -- GitLab From e4519a016650e952ad9eb27937f8c447d5a4e06d Mon Sep 17 00:00:00 2001 From: Silvio Gissi Date: Fri, 15 Mar 2024 15:05:39 -0400 Subject: [PATCH 2235/2290] keys: Fix overwrite of key expiration on instantiation commit 9da27fb65a14c18efd4473e2e82b76b53ba60252 upstream. The expiry time of a key is unconditionally overwritten during instantiation, defaulting to turn it permanent. This causes a problem for DNS resolution as the expiration set by user-space is overwritten to TIME64_MAX, disabling further DNS updates. Fix this by restoring the condition that key_set_expiry is only called when the pre-parser sets a specific expiry. Fixes: 39299bdd2546 ("keys, dns: Allow key types (eg. DNS) to be reclaimed immediately on expiry") Signed-off-by: Silvio Gissi cc: David Howells cc: Hazem Mohamed Abuelfotoh cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: keyrings@vger.kernel.org cc: netdev@vger.kernel.org cc: stable@vger.kernel.org Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- security/keys/key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/keys/key.c b/security/keys/key.c index e65240641ca57..f2a84d86eab43 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -464,7 +464,8 @@ static int __key_instantiate_and_link(struct key *key, if (authkey) key_invalidate(authkey); - key_set_expiry(key, prep->expiry); + if (prep->expiry != TIME64_MAX) + key_set_expiry(key, prep->expiry); } } -- GitLab From 0391c9085a5774e8b000cf8b249c834a661a8107 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 26 Mar 2024 09:16:46 +1030 Subject: [PATCH 2236/2290] btrfs: do not wait for short bulk allocation commit 1db7959aacd905e6487d0478ac01d89f86eb1e51 upstream. [BUG] There is a recent report that when memory pressure is high (including cached pages), btrfs can spend most of its time on memory allocation in btrfs_alloc_page_array() for compressed read/write. [CAUSE] For btrfs_alloc_page_array() we always go alloc_pages_bulk_array(), and even if the bulk allocation failed (fell back to single page allocation) we still retry but with extra memalloc_retry_wait(). If the bulk alloc only returned one page a time, we would spend a lot of time on the retry wait. The behavior was introduced in commit 395cb57e8560 ("btrfs: wait between incomplete batch memory allocations"). [FIX] Although the commit mentioned that other filesystems do the wait, it's not the case at least nowadays. All the mainlined filesystems only call memalloc_retry_wait() if they failed to allocate any page (not only for bulk allocation). If there is any progress, they won't call memalloc_retry_wait() at all. For example, xfs_buf_alloc_pages() would only call memalloc_retry_wait() if there is no allocation progress at all, and the call is not for metadata readahead. So I don't believe we should call memalloc_retry_wait() unconditionally for short allocation. Call memalloc_retry_wait() if it fails to allocate any page for tree block allocation (which goes with __GFP_NOFAIL and may not need the special handling anyway), and reduce the latency for btrfs_alloc_page_array(). Reported-by: Julian Taylor Tested-by: Julian Taylor Link: https://lore.kernel.org/all/8966c095-cbe7-4d22-9784-a647d1bf27c3@1und1.de/ Fixes: 395cb57e8560 ("btrfs: wait between incomplete batch memory allocations") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Sweet Tea Dorminy Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 539bc9bdcb93f..5f923c9b773e0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1324,19 +1324,14 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) unsigned int last = allocated; allocated = alloc_pages_bulk_array(GFP_NOFS, nr_pages, page_array); - - if (allocated == nr_pages) - return 0; - - /* - * During this iteration, no page could be allocated, even - * though alloc_pages_bulk_array() falls back to alloc_page() - * if it could not bulk-allocate. So we must be out of memory. - */ - if (allocated == last) + if (unlikely(allocated == last)) { + /* No progress, fail and do cleanup. */ + for (int i = 0; i < allocated; i++) { + __free_page(page_array[i]); + page_array[i] = NULL; + } return -ENOMEM; - - memalloc_retry_wait(GFP_NOFS); + } } return 0; } -- GitLab From 2effe407f7563add41750fd7e03da4ea44b98099 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 19 Apr 2024 16:58:19 +0800 Subject: [PATCH 2237/2290] mm/hugetlb: fix DEBUG_LOCKS_WARN_ON(1) when dissolve_free_hugetlb_folio() commit 52ccdde16b6540abe43b6f8d8e1e1ec90b0983af upstream. When I did memory failure tests recently, below warning occurs: DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 8 PID: 1011 at kernel/locking/lockdep.c:232 __lock_acquire+0xccb/0x1ca0 Modules linked in: mce_inject hwpoison_inject CPU: 8 PID: 1011 Comm: bash Kdump: loaded Not tainted 6.9.0-rc3-next-20240410-00012-gdb69f219f4be #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:__lock_acquire+0xccb/0x1ca0 RSP: 0018:ffffa7a1c7fe3bd0 EFLAGS: 00000082 RAX: 0000000000000000 RBX: eb851eb853975fcf RCX: ffffa1ce5fc1c9c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffffa1ce5fc1c9c0 RBP: ffffa1c6865d3280 R08: ffffffffb0f570a8 R09: 0000000000009ffb R10: 0000000000000286 R11: ffffffffb0f2ad50 R12: ffffa1c6865d3d10 R13: ffffa1c6865d3c70 R14: 0000000000000000 R15: 0000000000000004 FS: 00007ff9f32aa740(0000) GS:ffffa1ce5fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff9f3134ba0 CR3: 00000008484e4000 CR4: 00000000000006f0 Call Trace: lock_acquire+0xbe/0x2d0 _raw_spin_lock_irqsave+0x3a/0x60 hugepage_subpool_put_pages.part.0+0xe/0xc0 free_huge_folio+0x253/0x3f0 dissolve_free_huge_page+0x147/0x210 __page_handle_poison+0x9/0x70 memory_failure+0x4e6/0x8c0 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xbc/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff9f3114887 RSP: 002b:00007ffecbacb458 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007ff9f3114887 RDX: 000000000000000c RSI: 0000564494164e10 RDI: 0000000000000001 RBP: 0000564494164e10 R08: 00007ff9f31d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007ff9f321b780 R14: 00007ff9f3217600 R15: 00007ff9f3216a00 Kernel panic - not syncing: kernel: panic_on_warn set ... CPU: 8 PID: 1011 Comm: bash Kdump: loaded Not tainted 6.9.0-rc3-next-20240410-00012-gdb69f219f4be #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: panic+0x326/0x350 check_panic_on_warn+0x4f/0x50 __warn+0x98/0x190 report_bug+0x18e/0x1a0 handle_bug+0x3d/0x70 exc_invalid_op+0x18/0x70 asm_exc_invalid_op+0x1a/0x20 RIP: 0010:__lock_acquire+0xccb/0x1ca0 RSP: 0018:ffffa7a1c7fe3bd0 EFLAGS: 00000082 RAX: 0000000000000000 RBX: eb851eb853975fcf RCX: ffffa1ce5fc1c9c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffffa1ce5fc1c9c0 RBP: ffffa1c6865d3280 R08: ffffffffb0f570a8 R09: 0000000000009ffb R10: 0000000000000286 R11: ffffffffb0f2ad50 R12: ffffa1c6865d3d10 R13: ffffa1c6865d3c70 R14: 0000000000000000 R15: 0000000000000004 lock_acquire+0xbe/0x2d0 _raw_spin_lock_irqsave+0x3a/0x60 hugepage_subpool_put_pages.part.0+0xe/0xc0 free_huge_folio+0x253/0x3f0 dissolve_free_huge_page+0x147/0x210 __page_handle_poison+0x9/0x70 memory_failure+0x4e6/0x8c0 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xbc/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff9f3114887 RSP: 002b:00007ffecbacb458 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007ff9f3114887 RDX: 000000000000000c RSI: 0000564494164e10 RDI: 0000000000000001 RBP: 0000564494164e10 R08: 00007ff9f31d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007ff9f321b780 R14: 00007ff9f3217600 R15: 00007ff9f3216a00 After git bisecting and digging into the code, I believe the root cause is that _deferred_list field of folio is unioned with _hugetlb_subpool field. In __update_and_free_hugetlb_folio(), folio->_deferred_list is initialized leading to corrupted folio->_hugetlb_subpool when folio is hugetlb. Later free_huge_folio() will use _hugetlb_subpool and above warning happens. But it is assumed hugetlb flag must have been cleared when calling folio_put() in update_and_free_hugetlb_folio(). This assumption is broken due to below race: CPU1 CPU2 dissolve_free_huge_page update_and_free_pages_bulk update_and_free_hugetlb_folio hugetlb_vmemmap_restore_folios folio_clear_hugetlb_vmemmap_optimized clear_flag = folio_test_hugetlb_vmemmap_optimized if (clear_flag) <-- False, it's already cleared. __folio_clear_hugetlb(folio) <-- Hugetlb is not cleared. folio_put free_huge_folio <-- free_the_page is expected. list_for_each_entry() __folio_clear_hugetlb <-- Too late. Fix this issue by checking whether folio is hugetlb directly instead of checking clear_flag to close the race window. Link: https://lkml.kernel.org/r/20240419085819.1901645-1-linmiaohe@huawei.com Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap") Signed-off-by: Miaohe Lin Reviewed-by: Oscar Salvador Cc: Signed-off-by: Andrew Morton Signed-off-by: Miaohe Lin Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e9ae0fc81dfbe..4361dcf70139f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1762,7 +1762,6 @@ static void __update_and_free_page(struct hstate *h, struct page *page) { int i; struct page *subpage; - bool clear_dtor = HPageVmemmapOptimized(page); if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) return; @@ -1797,7 +1796,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * If vmemmap pages were allocated above, then we need to clear the * hugetlb destructor under the hugetlb lock. */ - if (clear_dtor) { + if (PageHuge(page)) { spin_lock_irq(&hugetlb_lock); __clear_hugetlb_destructor(h, page); spin_unlock_irq(&hugetlb_lock); -- GitLab From ea92809e291c251ce9c9154a1c676f3fa4dbbb35 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Sun, 7 Apr 2024 15:05:37 +0200 Subject: [PATCH 2238/2290] mm,swapops: update check in is_pfn_swap_entry for hwpoison entries commit 07a57a338adb6ec9e766d6a6790f76527f45ceb5 upstream. Tony reported that the Machine check recovery was broken in v6.9-rc1, as he was hitting a VM_BUG_ON when injecting uncorrectable memory errors to DRAM. After some more digging and debugging on his side, he realized that this went back to v6.1, with the introduction of 'commit 0d206b5d2e0d ("mm/swap: add swp_offset_pfn() to fetch PFN from swap entry")'. That commit, among other things, introduced swp_offset_pfn(), replacing hwpoison_entry_to_pfn() in its favour. The patch also introduced a VM_BUG_ON() check for is_pfn_swap_entry(), but is_pfn_swap_entry() never got updated to cover hwpoison entries, which means that we would hit the VM_BUG_ON whenever we would call swp_offset_pfn() for such entries on environments with CONFIG_DEBUG_VM set. Fix this by updating the check to cover hwpoison entries as well, and update the comment while we are it. Link: https://lkml.kernel.org/r/20240407130537.16977-1-osalvador@suse.de Fixes: 0d206b5d2e0d ("mm/swap: add swp_offset_pfn() to fetch PFN from swap entry") Signed-off-by: Oscar Salvador Reported-by: Tony Luck Closes: https://lore.kernel.org/all/Zg8kLSl2yAlA3o5D@agluck-desk3/ Tested-by: Tony Luck Reviewed-by: Peter Xu Reviewed-by: David Hildenbrand Acked-by: Miaohe Lin Cc: [6.1.x] Signed-off-by: Andrew Morton Signed-off-by: Miaohe Lin Signed-off-by: Greg Kroah-Hartman --- include/linux/swapops.h | 105 ++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index b07b277d6a166..1f59f9edcc241 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -409,6 +409,55 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) } #endif /* CONFIG_MIGRATION */ +#ifdef CONFIG_MEMORY_FAILURE + +extern atomic_long_t num_poisoned_pages __read_mostly; + +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} + +static inline void num_poisoned_pages_inc(void) +{ + atomic_long_inc(&num_poisoned_pages); +} + +static inline void num_poisoned_pages_sub(long i) +{ + atomic_long_sub(i, &num_poisoned_pages); +} + +#else /* CONFIG_MEMORY_FAILURE */ + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} + +static inline void num_poisoned_pages_inc(void) +{ +} + +static inline void num_poisoned_pages_sub(long i) +{ +} +#endif /* CONFIG_MEMORY_FAILURE */ + typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) @@ -503,8 +552,9 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) /* * A pfn swap entry is a special type of swap entry that always has a pfn stored - * in the swap offset. They are used to represent unaddressable device memory - * and to restrict access to a page undergoing migration. + * in the swap offset. They can either be used to represent unaddressable device + * memory, to restrict access to a page undergoing migration or to represent a + * pfn which has been hwpoisoned and unmapped. */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { @@ -512,7 +562,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry) BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); return is_migration_entry(entry) || is_device_private_entry(entry) || - is_device_exclusive_entry(entry); + is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); } struct page_vma_mapped_walk; @@ -581,55 +631,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd) } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -#ifdef CONFIG_MEMORY_FAILURE - -extern atomic_long_t num_poisoned_pages __read_mostly; - -/* - * Support for hardware poisoned pages - */ -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - BUG_ON(!PageLocked(page)); - return swp_entry(SWP_HWPOISON, page_to_pfn(page)); -} - -static inline int is_hwpoison_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_HWPOISON; -} - -static inline void num_poisoned_pages_inc(void) -{ - atomic_long_inc(&num_poisoned_pages); -} - -static inline void num_poisoned_pages_sub(long i) -{ - atomic_long_sub(i, &num_poisoned_pages); -} - -#else /* CONFIG_MEMORY_FAILURE */ - -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - return swp_entry(0, 0); -} - -static inline int is_hwpoison_entry(swp_entry_t swp) -{ - return 0; -} - -static inline void num_poisoned_pages_inc(void) -{ -} - -static inline void num_poisoned_pages_sub(long i) -{ -} -#endif /* CONFIG_MEMORY_FAILURE */ - static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; -- GitLab From beaf11969fd5cbe6f09cefaa34df1ce8578e8dd9 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Thu, 8 Feb 2024 16:55:56 +0800 Subject: [PATCH 2239/2290] md: fix kmemleak of rdev->serial commit 6cf350658736681b9d6b0b6e58c5c76b235bb4c4 upstream. If kobject_add() is fail in bind_rdev_to_array(), 'rdev->serial' will be alloc not be freed, and kmemleak occurs. unreferenced object 0xffff88815a350000 (size 49152): comm "mdadm", pid 789, jiffies 4294716910 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc f773277a): [<0000000058b0a453>] kmemleak_alloc+0x61/0xe0 [<00000000366adf14>] __kmalloc_large_node+0x15e/0x270 [<000000002e82961b>] __kmalloc_node.cold+0x11/0x7f [<00000000f206d60a>] kvmalloc_node+0x74/0x150 [<0000000034bf3363>] rdev_init_serial+0x67/0x170 [<0000000010e08fe9>] mddev_create_serial_pool+0x62/0x220 [<00000000c3837bf0>] bind_rdev_to_array+0x2af/0x630 [<0000000073c28560>] md_add_new_disk+0x400/0x9f0 [<00000000770e30ff>] md_ioctl+0x15bf/0x1c10 [<000000006cfab718>] blkdev_ioctl+0x191/0x3f0 [<0000000085086a11>] vfs_ioctl+0x22/0x60 [<0000000018b656fe>] __x64_sys_ioctl+0xba/0xe0 [<00000000e54e675e>] do_syscall_64+0x71/0x150 [<000000008b0ad622>] entry_SYSCALL_64_after_hwframe+0x6c/0x74 Fixes: 963c555e75b0 ("md: introduce mddev_create/destroy_wb_pool for the change of member device") Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240208085556.2412922-1-linan666@huaweicloud.com [ mddev_destroy_serial_pool third parameter was removed in mainline, where there is no need to suspend within this function anymore. ] Signed-off-by: Jeremy Bongio Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 788acc81e7a84..506c998c0ca59 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2508,6 +2508,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) fail: pr_warn("md: failed to register dev-%s for %s\n", b, mdname(mddev)); + mddev_destroy_serial_pool(mddev, rdev, false); return err; } -- GitLab From ed804e9d8b9a8bf2d982228779e23185b1572613 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 15 May 2024 10:02:24 -0700 Subject: [PATCH 2240/2290] net: bcmgenet: Clear RGMII_LINK upon link down commit 696450c05181559a35d4d5bee55c465b1ac6fe2e upstream Clear the RGMII_LINK bit upon detecting link down to be consistent with setting the bit upon link up. We also move the clearing of the out-of-band disable to the runtime initialization rather than for each link up/down transition. Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20221118213754.1383364-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 1779ee524dac7..cc3afb605b1ec 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -72,7 +72,6 @@ static void bcmgenet_mac_config(struct net_device *dev) * Receive clock is provided by the PHY. */ reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); - reg &= ~OOB_DISABLE; reg |= RGMII_LINK; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); @@ -100,10 +99,18 @@ static void bcmgenet_mac_config(struct net_device *dev) */ void bcmgenet_mii_setup(struct net_device *dev) { + struct bcmgenet_priv *priv = netdev_priv(dev); struct phy_device *phydev = dev->phydev; + u32 reg; - if (phydev->link) + if (phydev->link) { bcmgenet_mac_config(dev); + } else { + reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); + reg &= ~RGMII_LINK; + bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + } + phy_print_status(phydev); } @@ -264,18 +271,20 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) (priv->phy_interface != PHY_INTERFACE_MODE_MOCA); /* This is an external PHY (xMII), so we need to enable the RGMII - * block for the interface to work + * block for the interface to work, unconditionally clear the + * Out-of-band disable since we do not need it. */ + reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); + reg &= ~OOB_DISABLE; if (priv->ext_phy) { - reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); reg &= ~ID_MODE_DIS; reg |= id_mode_dis; if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) reg |= RGMII_MODE_EN_V123; else reg |= RGMII_MODE_EN; - bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } + bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); if (init) dev_info(kdev, "configuring instance for %s\n", phy_name); -- GitLab From 714e053565d4efc753a16496cd682a0a45cb8297 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 15 May 2024 10:02:25 -0700 Subject: [PATCH 2241/2290] net: bcmgenet: synchronize EXT_RGMII_OOB_CTRL access commit d85cf67a339685beae1d0aee27b7f61da95455be upstream The EXT_RGMII_OOB_CTRL register can be written from different contexts. It is predominantly written from the adjust_link handler which is synchronized by the phydev->lock, but can also be written from a different context when configuring the mii in bcmgenet_mii_config(). The chances of contention are quite low, but it is conceivable that adjust_link could occur during resume when WoL is enabled so use the phydev->lock synchronizer in bcmgenet_mii_config() to be sure. Fixes: afe3f907d20f ("net: bcmgenet: power on MII block for all MII modes") Cc: stable@vger.kernel.org Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index cc3afb605b1ec..07bb7a4e517cd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -2,7 +2,7 @@ /* * Broadcom GENET MDIO routines * - * Copyright (c) 2014-2017 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #include @@ -274,6 +274,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) * block for the interface to work, unconditionally clear the * Out-of-band disable since we do not need it. */ + mutex_lock(&phydev->lock); reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); reg &= ~OOB_DISABLE; if (priv->ext_phy) { @@ -285,6 +286,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) reg |= RGMII_MODE_EN; } bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + mutex_unlock(&phydev->lock); if (init) dev_info(kdev, "configuring instance for %s\n", phy_name); -- GitLab From 9ed299be99989db32950ca414ce67bf618c7f726 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 15 May 2024 10:02:26 -0700 Subject: [PATCH 2242/2290] net: bcmgenet: synchronize use of bcmgenet_set_rx_mode() commit 2dbe5f19368caae63b1f59f5bc2af78c7d522b3a upstream The ndo_set_rx_mode function is synchronized with the netif_addr_lock spinlock and BHs disabled. Since this function is also invoked directly from the driver the same synchronization should be applied. Fixes: 72f96347628e ("net: bcmgenet: set Rx mode before starting netif") Cc: stable@vger.kernel.org Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 4b8574acf848e..541c17cd42d47 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #define pr_fmt(fmt) "bcmgenet: " fmt @@ -3352,7 +3352,9 @@ static void bcmgenet_netif_start(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); /* Start the network engine */ + netif_addr_lock_bh(dev); bcmgenet_set_rx_mode(dev); + netif_addr_unlock_bh(dev); bcmgenet_enable_rx_napi(priv); umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); -- GitLab From 8064a711c4865b69cd47e88f3166ae46c9bcb887 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 15 May 2024 10:02:27 -0700 Subject: [PATCH 2243/2290] net: bcmgenet: synchronize UMAC_CMD access commit 0d5e2a82232605b337972fb2c7d0cbc46898aca1 upstream The UMAC_CMD register is written from different execution contexts and has insufficient synchronization protections to prevent possible corruption. Of particular concern are the acceses from the phy_device delayed work context used by the adjust_link call and the BH context that may be used by the ndo_set_rx_mode call. A spinlock is added to the driver to protect contended register accesses (i.e. reg_lock) and it is used to synchronize accesses to UMAC_CMD. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Cc: stable@vger.kernel.org Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 12 +++++++++++- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 4 +++- drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 8 +++++++- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 ++ 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 541c17cd42d47..f087a97164094 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2468,14 +2468,18 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable) { u32 reg; + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (reg & CMD_SW_RESET) + if (reg & CMD_SW_RESET) { + spin_unlock_bh(&priv->reg_lock); return; + } if (enable) reg |= mask; else reg &= ~mask; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); /* UniMAC stops on a packet boundary, wait for a full-size packet * to be processed @@ -2491,8 +2495,10 @@ static void reset_umac(struct bcmgenet_priv *priv) udelay(10); /* issue soft reset and disable MAC while updating its registers */ + spin_lock_bh(&priv->reg_lock); bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD); udelay(2); + spin_unlock_bh(&priv->reg_lock); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) @@ -3615,16 +3621,19 @@ static void bcmgenet_set_rx_mode(struct net_device *dev) * 3. The number of filters needed exceeds the number filters * supported by the hardware. */ + spin_lock(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); if ((dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) || (nfilter > MAX_MDF_FILTER)) { reg |= CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock(&priv->reg_lock); bcmgenet_umac_writel(priv, 0, UMAC_MDF_CTRL); return; } else { reg &= ~CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock(&priv->reg_lock); } /* update MDF filter */ @@ -4026,6 +4035,7 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } + spin_lock_init(&priv->reg_lock); spin_lock_init(&priv->lock); /* Set default pause parameters */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 1985c0ec4da2a..28e2c94ef835c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #ifndef __BCMGENET_H__ @@ -573,6 +573,8 @@ struct bcmgenet_rxnfc_rule { /* device context */ struct bcmgenet_priv { void __iomem *base; + /* reg_lock: lock to serialize access to shared registers */ + spinlock_t reg_lock; enum bcmgenet_version version; struct net_device *dev; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index f55d9d9c01a85..56781e7214978 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) Wake-on-LAN support * - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #define pr_fmt(fmt) "bcmgenet_wol: " fmt @@ -133,6 +133,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } /* Can't suspend with WoL if MAC is still in reset */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); if (reg & CMD_SW_RESET) reg &= ~CMD_SW_RESET; @@ -140,6 +141,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, /* disable RX */ reg &= ~CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); mdelay(10); if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) { @@ -185,6 +187,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } /* Enable CRC forward */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = 1; reg |= CMD_CRC_FWD; @@ -192,6 +195,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, /* Receiver must be enabled for WOL MP detection */ reg |= CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); reg = UMAC_IRQ_MPD_R; if (hfb_enable) @@ -238,7 +242,9 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, } /* Disable CRC Forward */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~CMD_CRC_FWD; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 07bb7a4e517cd..f21f2aaa6fd91 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -75,6 +75,7 @@ static void bcmgenet_mac_config(struct net_device *dev) reg |= RGMII_LINK; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~((CMD_SPEED_MASK << CMD_SPEED_SHIFT) | CMD_HD_EN | @@ -87,6 +88,7 @@ static void bcmgenet_mac_config(struct net_device *dev) reg |= CMD_TX_EN | CMD_RX_EN; } bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); priv->eee.eee_active = phy_init_eee(phydev, 0) >= 0; bcmgenet_eee_enable_set(dev, -- GitLab From 4078fa637fcd80c8487680ec2e4ef7c58308e9aa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 May 2024 11:56:25 +0200 Subject: [PATCH 2244/2290] Linux 6.1.91 Link: https://lore.kernel.org/r/20240514101020.320785513@linuxfoundation.org Tested-by: Miguel Ojeda Tested-by: Pavel Machek (CIP) Tested-by: Allen Pais Tested-by: Yann Sionneau Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20240515082456.986812732@linuxfoundation.org Tested-by: Salvatore Bonaccorso Tested-by: Conor Dooley Tested-by: Ron Economos Tested-by: Yann Sionneau Link: https://lore.kernel.org/r/20240516091232.619851361@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Mark Brown Tested-by: Jon Hunter Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: kernelci.org bot Tested-by: Allen Pais Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7ae5cf9ec9e55..a7d90996e4125 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 90 +SUBLEVEL = 91 EXTRAVERSION = NAME = Curry Ramen -- GitLab From 91402e0e5de9124a3108db7a14163fcf9a6d322f Mon Sep 17 00:00:00 2001 From: Jose Fernandez Date: Mon, 22 Apr 2024 08:35:44 -0600 Subject: [PATCH 2245/2290] drm/amd/display: Fix division by zero in setup_dsc_config commit 130afc8a886183a94cf6eab7d24f300014ff87ba upstream. When slice_height is 0, the division by slice_height in the calculation of the number of slices will cause a division by zero driver crash. This leaves the kernel in a state that requires a reboot. This patch adds a check to avoid the division by zero. The stack trace below is for the 6.8.4 Kernel. I reproduced the issue on a Z16 Gen 2 Lenovo Thinkpad with a Apple Studio Display monitor connected via Thunderbolt. The amdgpu driver crashed with this exception when I rebooted the system with the monitor connected. kernel: ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447) kernel: ? do_trap (arch/x86/kernel/traps.c:113 arch/x86/kernel/traps.c:154) kernel: ? setup_dsc_config (drivers/gpu/drm/amd/amdgpu/../display/dc/dsc/dc_dsc.c:1053) amdgpu kernel: ? do_error_trap (./arch/x86/include/asm/traps.h:58 arch/x86/kernel/traps.c:175) kernel: ? setup_dsc_config (drivers/gpu/drm/amd/amdgpu/../display/dc/dsc/dc_dsc.c:1053) amdgpu kernel: ? exc_divide_error (arch/x86/kernel/traps.c:194 (discriminator 2)) kernel: ? setup_dsc_config (drivers/gpu/drm/amd/amdgpu/../display/dc/dsc/dc_dsc.c:1053) amdgpu kernel: ? asm_exc_divide_error (./arch/x86/include/asm/idtentry.h:548) kernel: ? setup_dsc_config (drivers/gpu/drm/amd/amdgpu/../display/dc/dsc/dc_dsc.c:1053) amdgpu kernel: dc_dsc_compute_config (drivers/gpu/drm/amd/amdgpu/../display/dc/dsc/dc_dsc.c:1109) amdgpu After applying this patch, the driver no longer crashes when the monitor is connected and the system is rebooted. I believe this is the same issue reported for 3113. Reviewed-by: Rodrigo Siqueira Signed-off-by: Jose Fernandez Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3113 Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index d52cbc0e9b679..5f57bdd597c27 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -924,7 +924,12 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - dsc_cfg->num_slices_v = pic_height/slice_height; + if (slice_height > 0) { + dsc_cfg->num_slices_v = pic_height / slice_height; + } else { + is_dsc_possible = false; + goto done; + } if (target_bandwidth_kbps > 0) { is_dsc_possible = decide_dsc_target_bpp_x16( -- GitLab From 8a94fc9d2072c32e244cdac84bfb86df44c866c7 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Mon, 13 May 2024 16:39:22 +0200 Subject: [PATCH 2246/2290] net: ks8851: Fix another TX stall caused by wrong ISR flag handling commit 317a215d493230da361028ea8a4675de334bfa1a upstream. Under some circumstances it may happen that the ks8851 Ethernet driver stops sending data. Currently the interrupt handler resets the interrupt status flags in the hardware after handling TX. With this approach we may lose interrupts in the time window between handling the TX interrupt and resetting the TX interrupt status bit. When all of the three following conditions are true then transmitting data stops: - TX queue is stopped to wait for room in the hardware TX buffer - no queued SKBs in the driver (txq) that wait for being written to hw - hardware TX buffer is empty and the last TX interrupt was lost This is because reenabling the TX queue happens when handling the TX interrupt status but if the TX status bit has already been cleared then this interrupt will never come. With this commit the interrupt status flags will be cleared before they are handled. That way we stop losing interrupts. The wrong handling of the ISR flags was there from the beginning but with commit 3dc5d4454545 ("net: ks8851: Fix TX stall caused by TX buffer overrun") the issue becomes apparent. Fixes: 3dc5d4454545 ("net: ks8851: Fix TX stall caused by TX buffer overrun") Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Ronald Wahl Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/micrel/ks8851_common.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 502518cdb4618..6453c92f0fa7c 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -328,7 +328,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) { struct ks8851_net *ks = _ks; struct sk_buff_head rxq; - unsigned handled = 0; unsigned long flags; unsigned int status; struct sk_buff *skb; @@ -336,24 +335,17 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) ks8851_lock(ks, &flags); status = ks8851_rdreg16(ks, KS_ISR); + ks8851_wrreg16(ks, KS_ISR, status); netif_dbg(ks, intr, ks->netdev, "%s: status 0x%04x\n", __func__, status); - if (status & IRQ_LCI) - handled |= IRQ_LCI; - if (status & IRQ_LDI) { u16 pmecr = ks8851_rdreg16(ks, KS_PMECR); pmecr &= ~PMECR_WKEVT_MASK; ks8851_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK); - - handled |= IRQ_LDI; } - if (status & IRQ_RXPSI) - handled |= IRQ_RXPSI; - if (status & IRQ_TXI) { unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR); @@ -365,20 +357,12 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (netif_queue_stopped(ks->netdev)) netif_wake_queue(ks->netdev); spin_unlock(&ks->statelock); - - handled |= IRQ_TXI; } - if (status & IRQ_RXI) - handled |= IRQ_RXI; - if (status & IRQ_SPIBEI) { netdev_err(ks->netdev, "%s: spi bus error\n", __func__); - handled |= IRQ_SPIBEI; } - ks8851_wrreg16(ks, KS_ISR, handled); - if (status & IRQ_RXI) { /* the datasheet says to disable the rx interrupt during * packet read-out, however we're masking the interrupt -- GitLab From 59161a21cae02db5f037571892d4faa40da140f6 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 16 Feb 2024 14:06:35 -0800 Subject: [PATCH 2247/2290] ice: pass VSI pointer into ice_vc_isvalid_q_id commit a21605993dd5dfd15edfa7f06705ede17b519026 upstream. The ice_vc_isvalid_q_id() function takes a VSI index and a queue ID. It looks up the VSI from its index, and then validates that the queue number is valid for that VSI. The VSI ID passed is typically a VSI index from the VF. This VSI number is validated by the PF to ensure that it matches the VSI associated with the VF already. In every flow where ice_vc_isvalid_q_id() is called, the PF driver already has a pointer to the VSI associated with the VF. This pointer is obtained using ice_get_vf_vsi(), rather than looking up the VSI using the index sent by the VF. Since we already know which VSI to operate on, we can modify ice_vc_isvalid_q_id() to take a VSI pointer instead of a VSI index. Pass the VSI we found from ice_get_vf_vsi() instead of re-doing the lookup. This removes some unnecessary computation and scanning of the VSI list. It also removes the last place where the driver directly used the VSI number from the VF. This will pave the way for refactoring to communicate relative VSI numbers to the VF instead of absolute numbers from the PF space. Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index e64bef490a174..42d8e5e771b7e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -544,17 +544,15 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) /** * ice_vc_isvalid_q_id - * @vf: pointer to the VF info - * @vsi_id: VSI ID + * @vsi: VSI to check queue ID against * @qid: VSI relative queue ID * * check for the valid queue ID */ -static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid) +static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u8 qid) { - struct ice_vsi *vsi = ice_find_vsi(vf->pf, vsi_id); /* allocated Tx and Rx queues should be always equal for VF VSI */ - return (vsi && (qid < vsi->alloc_txq)); + return qid < vsi->alloc_txq; } /** @@ -1254,7 +1252,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) */ q_map = vqs->rx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1276,7 +1274,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1381,7 +1379,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1407,7 +1405,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); } else if (q_map) { for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1463,7 +1461,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { vsi_q_id = vsi_q_id_idx; - if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id)) + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) return VIRTCHNL_STATUS_ERR_PARAM; q_vector->num_ring_rx++; @@ -1477,7 +1475,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { vsi_q_id = vsi_q_id_idx; - if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id)) + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) return VIRTCHNL_STATUS_ERR_PARAM; q_vector->num_ring_tx++; @@ -1611,7 +1609,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) qpi->txq.headwb_enabled || !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || - !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) { + !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) { goto error_param; } -- GitLab From 90cbd4c081bba04e73bdcc7bee4cd858d5dce506 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 16 Feb 2024 14:06:36 -0800 Subject: [PATCH 2248/2290] ice: remove unnecessary duplicate checks for VF VSI ID commit 363f689600dd010703ce6391bcfc729a97d21840 upstream. The ice_vc_fdir_param_check() function validates that the VSI ID of the virtchnl flow director command matches the VSI number of the VF. This is already checked by the call to ice_vc_isvalid_vsi_id() immediately following this. This check is unnecessary since ice_vc_isvalid_vsi_id() already confirms this by checking that the VSI ID can locate the VSI associated with the VF structure. Furthermore, a following change is going to refactor the ice driver to report VSI IDs using a relative index for each VF instead of reporting the PF VSI number. This additional check would break that logic since it enforces that the VSI ID matches the VSI number. Since this check duplicates the logic in ice_vc_isvalid_vsi_id() and gets in the way of refactoring that logic, remove it. Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 7f72604079723..fb8e856933097 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -107,9 +107,6 @@ ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id) if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)) return -EINVAL; - if (vsi_id != vf->lan_vsi_num) - return -EINVAL; - if (!ice_vc_isvalid_vsi_id(vf, vsi_id)) return -EINVAL; -- GitLab From 026caf92c69fb7c35c307eea2172e82ff46d667e Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Wed, 19 Jul 2023 23:22:52 +0300 Subject: [PATCH 2249/2290] pinctrl: core: handle radix_tree_insert() errors in pinctrl_register_one_pin() commit ecfe9a015d3e1e46504d5b3de7eef1f2d186194a upstream. pinctrl_register_one_pin() doesn't check the result of radix_tree_insert() despite they both may return a negative error code. Linus Walleij said he has copied the radix tree code from kernel/irq/ where the functions calling radix_tree_insert() are *void* themselves; I think it makes more sense to propagate the errors from radix_tree_insert() upstream if we can do that... Found by Linux Verification Center (linuxtesting.org) with the Svace static analysis tool. Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230719202253.13469-3-s.shtylyov@omp.ru Signed-off-by: Linus Walleij Cc: "Hemdan, Hagar Gamal Halim" Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 1ef36a0a7dd20..223482584f54f 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -205,6 +205,7 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev, const struct pinctrl_pin_desc *pin) { struct pin_desc *pindesc; + int error; pindesc = pin_desc_get(pctldev, pin->number); if (pindesc) { @@ -226,18 +227,25 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev, } else { pindesc->name = kasprintf(GFP_KERNEL, "PIN%u", pin->number); if (!pindesc->name) { - kfree(pindesc); - return -ENOMEM; + error = -ENOMEM; + goto failed; } pindesc->dynamic_name = true; } pindesc->drv_data = pin->drv_data; - radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc); + error = radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc); + if (error) + goto failed; + pr_debug("registered pin %d (%s) on %s\n", pin->number, pindesc->name, pctldev->desc->name); return 0; + +failed: + kfree(pindesc); + return error; } static int pinctrl_register_pins(struct pinctrl_dev *pctldev, -- GitLab From cf8e6ae8575db86d7e36a12a9936a1076f509ed0 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Sat, 12 Nov 2022 15:18:32 +0000 Subject: [PATCH 2250/2290] mfd: stpmic1: Fix swapped mask/unmask in irq chip commit c79e387389d5add7cb967d2f7622c3bf5550927b upstream. The usual behavior of mask registers is writing a '1' bit to disable (mask) an interrupt; similarly, writing a '1' bit to an unmask register enables (unmasks) an interrupt. Due to a longstanding issue in regmap-irq, mask and unmask registers were inverted when both kinds of registers were present on the same chip, ie. regmap-irq actually wrote '1's to the mask register to enable an IRQ and '1's to the unmask register to disable an IRQ. This was fixed by commit e8ffb12e7f06 ("regmap-irq: Fix inverted handling of unmask registers") but the fix is opt-in via mask_unmask_non_inverted = true because it requires manual changes for each affected driver. The new behavior will become the default once all drivers have been updated. The STPMIC1 has a normal mask register with separate set and clear registers. The driver intends to use the set & clear registers with regmap-irq and has compensated for regmap-irq's inverted behavior, and should currently be working properly. Thus, swap mask_base and unmask_base, and opt in to the new non-inverted behavior. Signed-off-by: Aidan MacDonald Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20221112151835.39059-16-aidanmacdonald.0x0@gmail.com Cc: Yoann Congal Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/stpmic1.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c index eb3da558c3fbd..ee0469d5d4354 100644 --- a/drivers/mfd/stpmic1.c +++ b/drivers/mfd/stpmic1.c @@ -108,8 +108,9 @@ static const struct regmap_irq stpmic1_irqs[] = { static const struct regmap_irq_chip stpmic1_regmap_irq_chip = { .name = "pmic_irq", .status_base = INT_PENDING_R1, - .mask_base = INT_CLEAR_MASK_R1, - .unmask_base = INT_SET_MASK_R1, + .mask_base = INT_SET_MASK_R1, + .unmask_base = INT_CLEAR_MASK_R1, + .mask_unmask_non_inverted = true, .ack_base = INT_CLEAR_R1, .num_regs = STPMIC1_PMIC_NUM_IRQ_REGS, .irqs = stpmic1_irqs, -- GitLab From 104ef3d8cda2e135832f4c31af6fdb40f9f6193b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 18 Jul 2023 16:38:08 +1000 Subject: [PATCH 2251/2290] nfsd: don't allow nfsd threads to be signalled. commit 3903902401451b1cd9d797a8c79769eb26ac7fe5 upstream. The original implementation of nfsd used signals to stop threads during shutdown. In Linux 2.3.46pre5 nfsd gained the ability to shutdown threads internally it if was asked to run "0" threads. After this user-space transitioned to using "rpc.nfsd 0" to stop nfsd and sending signals to threads was no longer an important part of the API. In commit 3ebdbe5203a8 ("SUNRPC: discard svo_setup and rename svc_set_num_threads_sync()") (v5.17-rc1~75^2~41) we finally removed the use of signals for stopping threads, using kthread_stop() instead. This patch makes the "obvious" next step and removes the ability to signal nfsd threads - or any svc threads. nfsd stops allowing signals and we don't check for their delivery any more. This will allow for some simplification in later patches. A change worth noting is in nfsd4_ssc_setup_dul(). There was previously a signal_pending() check which would only succeed when the thread was being shut down. It should really have tested kthread_should_stop() as well. Now it just does the latter, not the former. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfs/callback.c | 9 +-------- fs/nfsd/nfs4proc.c | 5 ++--- fs/nfsd/nfssvc.c | 12 ------------ net/sunrpc/svc_xprt.c | 16 ++++++---------- 4 files changed, 9 insertions(+), 33 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 456af7d230cf1..46a0a2d6962e1 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -80,9 +80,6 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); while (!kthread_freezable_should_stop(NULL)) { - - if (signal_pending(current)) - flush_signals(current); /* * Listen for a request on the socket */ @@ -112,11 +109,7 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); while (!kthread_freezable_should_stop(NULL)) { - - if (signal_pending(current)) - flush_signals(current); - - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { req = list_first_entry(&serv->sv_cb_list, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ba53cd89ec62c..b6d768bd5ccca 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1313,12 +1313,11 @@ try_again: /* found a match */ if (ni->nsui_busy) { /* wait - and try again */ - prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, - TASK_INTERRUPTIBLE); + prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE); spin_unlock(&nn->nfsd_ssc_lock); /* allow 20secs for mount/unmount for now - revisit */ - if (signal_pending(current) || + if (kthread_should_stop() || (schedule_timeout(20*HZ) == 0)) { finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 0c75636054a54..a8190caf77f17 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -952,15 +952,6 @@ nfsd(void *vrqstp) current->fs->umask = 0; - /* - * thread is spawned with all signals set to SIG_IGN, re-enable - * the ones that will bring down the thread - */ - allow_signal(SIGKILL); - allow_signal(SIGHUP); - allow_signal(SIGINT); - allow_signal(SIGQUIT); - atomic_inc(&nfsdstats.th_cnt); set_freezable(); @@ -985,9 +976,6 @@ nfsd(void *vrqstp) validate_process_creds(); } - /* Clear signals before calling svc_exit_thread() */ - flush_signals(current); - atomic_dec(&nfsdstats.th_cnt); out: diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8117d0e08d5a2..1393eefbf2187 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -696,8 +696,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) /* Made progress, don't sleep yet */ continue; - set_current_state(TASK_INTERRUPTIBLE); - if (signalled() || kthread_should_stop()) { + set_current_state(TASK_IDLE); + if (kthread_should_stop()) { set_current_state(TASK_RUNNING); return -EINTR; } @@ -733,7 +733,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) return false; /* are we shutting down? */ - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return false; /* are we freezing? */ @@ -755,11 +755,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (rqstp->rq_xprt) goto out_found; - /* - * We have to be able to interrupt this wait - * to bring down the daemons ... - */ - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_IDLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); @@ -781,7 +777,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (!time_left) atomic_long_inc(&pool->sp_stats.threads_timedout); - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return ERR_PTR(-EINTR); return ERR_PTR(-EAGAIN); out_found: @@ -879,7 +875,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) try_to_freeze(); cond_resched(); err = -EINTR; - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) goto out; xprt = svc_get_next_xprt(rqstp, timeout); -- GitLab From 5d91238b590bd883c86ba7707c5c9096469c08b7 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 20 May 2024 02:31:53 +0300 Subject: [PATCH 2252/2290] KEYS: trusted: Fix memory leak in tpm2_key_encode() commit ffcaa2172cc1a85ddb8b783de96d38ca8855e248 upstream. 'scratch' is never freed. Fix this by calling kfree() in the success, and in the error case. Cc: stable@vger.kernel.org # +v5.13 Fixes: f2219745250f ("security: keys: trusted: use ASN.1 TPM2 key format for the blobs") Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- security/keys/trusted-keys/trusted_tpm2.c | 24 +++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index bc700f85f80be..a5feee8c11af9 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -38,6 +38,7 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, u8 *end_work = scratch + SCRATCH_SIZE; u8 *priv, *pub; u16 priv_len, pub_len; + int ret; priv_len = get_unaligned_be16(src) + 2; priv = src; @@ -57,8 +58,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, unsigned char bool[3], *w = bool; /* tag 0 is emptyAuth */ w = asn1_encode_boolean(w, w + sizeof(bool), true); - if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) - return PTR_ERR(w); + if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) { + ret = PTR_ERR(w); + goto err; + } work = asn1_encode_tag(work, end_work, 0, bool, w - bool); } @@ -69,8 +72,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, * trigger, so if it does there's something nefarious going on */ if (WARN(work - scratch + pub_len + priv_len + 14 > SCRATCH_SIZE, - "BUG: scratch buffer is too small")) - return -EINVAL; + "BUG: scratch buffer is too small")) { + ret = -EINVAL; + goto err; + } work = asn1_encode_integer(work, end_work, options->keyhandle); work = asn1_encode_octet_string(work, end_work, pub, pub_len); @@ -79,10 +84,17 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, work1 = payload->blob; work1 = asn1_encode_sequence(work1, work1 + sizeof(payload->blob), scratch, work - scratch); - if (WARN(IS_ERR(work1), "BUG: ASN.1 encoder failed")) - return PTR_ERR(work1); + if (WARN(IS_ERR(work1), "BUG: ASN.1 encoder failed")) { + ret = PTR_ERR(work1); + goto err; + } + kfree(scratch); return work1 - payload->blob; + +err: + kfree(scratch); + return ret; } struct tpm2_key_context { -- GitLab From 493a8172e548ad2fe3f79808ebea93fbe05ac5da Mon Sep 17 00:00:00 2001 From: Mengqi Zhang Date: Mon, 25 Dec 2023 17:38:40 +0800 Subject: [PATCH 2253/2290] mmc: core: Add HS400 tuning in HS400es initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 77e01b49e35f24ebd1659096d5fc5c3b75975545 upstream. During the initialization to HS400es stage, add a HS400 tuning flow as an optional process. For Mediatek IP, the HS400es mode requires a specific tuning to ensure the correct HS400 timing setting. Signed-off-by: Mengqi Zhang Link: https://lore.kernel.org/r/20231225093839.22931-2-mengqi.zhang@mediatek.com Signed-off-by: Ulf Hansson Cc: "Lin Gui (桂林)" Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 3a927452a6501..7e39017e440fb 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1819,8 +1819,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if (err) goto free_card; - - } else if (!mmc_card_hs400es(card)) { + } else if (mmc_card_hs400es(card)) { + if (host->ops->execute_hs400_tuning) { + err = host->ops->execute_hs400_tuning(host, card); + if (err) + goto free_card; + } + } else { /* Select the desired bus width optionally */ err = mmc_select_bus_width(card); if (err > 0 && mmc_card_hs(card)) { -- GitLab From 495e934c66a9091538ba1fcb16ade130137d8e8b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:49 -0700 Subject: [PATCH 2254/2290] xfs: write page faults in iomap are not buffered writes [ Upstream commit 118e021b4b66f758f8e8f21dc0e5e0a4c721e69e ] When we reserve a delalloc region in xfs_buffered_write_iomap_begin, we mark the iomap as IOMAP_F_NEW so that the the write context understands that it allocated the delalloc region. If we then fail that buffered write, xfs_buffered_write_iomap_end() checks for the IOMAP_F_NEW flag and if it is set, it punches out the unused delalloc region that was allocated for the write. The assumption this code makes is that all buffered write operations that can allocate space are run under an exclusive lock (i_rwsem). This is an invalid assumption: page faults in mmap()d regions call through this same function pair to map the file range being faulted and this runs only holding the inode->i_mapping->invalidate_lock in shared mode. IOWs, we can have races between page faults and write() calls that fail the nested page cache write operation that result in data loss. That is, the failing iomap_end call will punch out the data that the other racing iomap iteration brought into the page cache. This can be reproduced with generic/34[46] if we arbitrarily fail page cache copy-in operations from write() syscalls. Code analysis tells us that the iomap_page_mkwrite() function holds the already instantiated and uptodate folio locked across the iomap mapping iterations. Hence the folio cannot be removed from memory whilst we are mapping the range it covers, and as such we do not care if the mapping changes state underneath the iomap iteration loop: 1. if the folio is not already dirty, there is no writeback races possible. 2. if we allocated the mapping (delalloc or unwritten), the folio cannot already be dirty. See #1. 3. If the folio is already dirty, it must be up to date. As we hold it locked, it cannot be reclaimed from memory. Hence we always have valid data in the page cache while iterating the mapping. 4. Valid data in the page cache can exist when the underlying mapping is DELALLOC, UNWRITTEN or WRITTEN. Having the mapping change from DELALLOC->UNWRITTEN or UNWRITTEN->WRITTEN does not change the data in the page - it only affects actions if we are initialising a new page. Hence #3 applies and we don't care about these extent map transitions racing with iomap_page_mkwrite(). 5. iomap_page_mkwrite() checks for page invalidation races (truncate, hole punch, etc) after it locks the folio. We also hold the mapping->invalidation_lock here, and hence the mapping cannot change due to extent removal operations while we are iterating the folio. As such, filesystems that don't use bufferheads will never fail the iomap_folio_mkwrite_iter() operation on the current mapping, regardless of whether the iomap should be considered stale. Further, the range we are asked to iterate is limited to the range inside EOF that the folio spans. Hence, for XFS, we will only map the exact range we are asked for, and we will only do speculative preallocation with delalloc if we are mapping a hole at the EOF page. The iterator will consume the entire range of the folio that is within EOF, and anything beyond the EOF block cannot be accessed. We never need to truncate this post-EOF speculative prealloc away in the context of the iomap_page_mkwrite() iterator because if it remains unused we'll remove it when the last reference to the inode goes away. Hence we don't actually need an .iomap_end() cleanup/error handling path at all for iomap_page_mkwrite() for XFS. This means we can separate the page fault processing from the complexity of the .iomap_end() processing in the buffered write path. This also means that the buffered write path will also be able to take the mapping->invalidate_lock as necessary. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_iomap.c | 9 +++++++++ fs/xfs/xfs_iomap.h | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e462d39c840e6..595a5bcf46b94 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1325,7 +1325,7 @@ __xfs_filemap_fault( if (write_fault) { xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ret = iomap_page_mkwrite(vmf, - &xfs_buffered_write_iomap_ops); + &xfs_page_mkwrite_iomap_ops); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); } else { ret = filemap_fault(vmf); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 07da03976ec12..5cea069a38b4e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1187,6 +1187,15 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = { .iomap_end = xfs_buffered_write_iomap_end, }; +/* + * iomap_page_mkwrite() will never fail in a way that requires delalloc extents + * that it allocated to be revoked. Hence we do not need an .iomap_end method + * for this operation. + */ +const struct iomap_ops xfs_page_mkwrite_iomap_ops = { + .iomap_begin = xfs_buffered_write_iomap_begin, +}; + static int xfs_read_iomap_begin( struct inode *inode, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index c782e8c0479c0..0f62ab633040c 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -47,6 +47,7 @@ xfs_aligned_fsb_count( } extern const struct iomap_ops xfs_buffered_write_iomap_ops; +extern const struct iomap_ops xfs_page_mkwrite_iomap_ops; extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; -- GitLab From 142eafd24de513b7f4ec4274cf6dc8e53ca962cb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:50 -0700 Subject: [PATCH 2255/2290] xfs: punching delalloc extents on write failure is racy [ Upstream commit 198dd8aedee6a7d2de0dfa739f9a008a938f6848 ] xfs_buffered_write_iomap_end() has a comment about the safety of punching delalloc extents based holding the IOLOCK_EXCL. This comment is wrong, and punching delalloc extents is not race free. When we punch out a delalloc extent after a write failure in xfs_buffered_write_iomap_end(), we punch out the page cache with truncate_pagecache_range() before we punch out the delalloc extents. At this point, we only hold the IOLOCK_EXCL, so there is nothing stopping mmap() write faults racing with this cleanup operation, reinstantiating a folio over the range we are about to punch and hence requiring the delalloc extent to be kept. If this race condition is hit, we can end up with a dirty page in the page cache that has no delalloc extent or space reservation backing it. This leads to bad things happening at writeback time. To avoid this race condition, we need the page cache truncation to be atomic w.r.t. the extent manipulation. We can do this by holding the mapping->invalidate_lock exclusively across this operation - this will prevent new pages from being inserted into the page cache whilst we are removing the pages and the backing extent and space reservation. Taking the mapping->invalidate_lock exclusively in the buffered write IO path is safe - it naturally nests inside the IOLOCK (see truncate and fallocate paths). iomap_zero_range() can be called from under the mapping->invalidate_lock (from the truncate path via either xfs_zero_eof() or xfs_truncate_page(), but iomap_zero_iter() will not instantiate new delalloc pages (because it skips holes) and hence will not ever need to punch out delalloc extents on failure. Fix the locking issue, and clean up the code logic a little to avoid unnecessary work if we didn't allocate the delalloc extent or wrote the entire region we allocated. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iomap.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 5cea069a38b4e..a2e45ea1b0cb3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1147,6 +1147,10 @@ xfs_buffered_write_iomap_end( written = 0; } + /* If we didn't reserve the blocks, we're not allowed to punch them. */ + if (!(iomap->flags & IOMAP_F_NEW)) + return 0; + /* * start_fsb refers to the first unused block after a short write. If * nothing was written, round offset down to point at the first block in @@ -1158,27 +1162,28 @@ xfs_buffered_write_iomap_end( start_fsb = XFS_B_TO_FSB(mp, offset + written); end_fsb = XFS_B_TO_FSB(mp, offset + length); + /* Nothing to do if we've written the entire delalloc extent */ + if (start_fsb >= end_fsb) + return 0; + /* - * Trim delalloc blocks if they were allocated by this write and we - * didn't manage to write the whole range. - * - * We don't need to care about racing delalloc as we hold i_mutex - * across the reserve/allocate/unreserve calls. If there are delalloc - * blocks in the range, they are ours. + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite between the page cache + * truncation and the delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. */ - if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { - truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), - XFS_FSB_TO_B(mp, end_fsb) - 1); - - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); - if (error && !xfs_is_shutdown(mp)) { - xfs_alert(mp, "%s: unable to clean up ino %lld", - __func__, ip->i_ino); - return error; - } + filemap_invalidate_lock(inode->i_mapping); + truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), + XFS_FSB_TO_B(mp, end_fsb) - 1); + + error = xfs_bmap_punch_delalloc_range(ip, start_fsb, + end_fsb - start_fsb); + filemap_invalidate_unlock(inode->i_mapping); + if (error && !xfs_is_shutdown(mp)) { + xfs_alert(mp, "%s: unable to clean up ino %lld", + __func__, ip->i_ino); + return error; } - return 0; } -- GitLab From 8b6afad39bd168808e42f98959775a59c42fad76 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:51 -0700 Subject: [PATCH 2256/2290] xfs: use byte ranges for write cleanup ranges [ Upstream commit b71f889c18ada210a97aa3eb5e00c0de552234c6 ] xfs_buffered_write_iomap_end() currently converts the byte ranges passed to it to filesystem blocks to pass them to the bmap code to punch out delalloc blocks, but then has to convert filesytem blocks back to byte ranges for page cache truncate. We're about to make the page cache truncate go away and replace it with a page cache walk, so having to convert everything to/from/to filesystem blocks is messy and error-prone. It is much easier to pass around byte ranges and convert to page indexes and/or filesystem blocks only where those units are needed. In preparation for the page cache walk being added, add a helper that converts byte ranges to filesystem blocks and calls xfs_bmap_punch_delalloc_range() and convert xfs_buffered_write_iomap_end() to calculate limits in byte ranges. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iomap.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index a2e45ea1b0cb3..7bb55dbc19d35 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1120,6 +1120,20 @@ out_unlock: return error; } +static int +xfs_buffered_write_delalloc_punch( + struct inode *inode, + loff_t start_byte, + loff_t end_byte) +{ + struct xfs_mount *mp = XFS_M(inode->i_sb); + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); + + return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb, + end_fsb - start_fsb); +} + static int xfs_buffered_write_iomap_end( struct inode *inode, @@ -1129,10 +1143,9 @@ xfs_buffered_write_iomap_end( unsigned flags, struct iomap *iomap) { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t start_fsb; - xfs_fileoff_t end_fsb; + struct xfs_mount *mp = XFS_M(inode->i_sb); + loff_t start_byte; + loff_t end_byte; int error = 0; if (iomap->type != IOMAP_DELALLOC) @@ -1157,13 +1170,13 @@ xfs_buffered_write_iomap_end( * the range. */ if (unlikely(!written)) - start_fsb = XFS_B_TO_FSBT(mp, offset); + start_byte = round_down(offset, mp->m_sb.sb_blocksize); else - start_fsb = XFS_B_TO_FSB(mp, offset + written); - end_fsb = XFS_B_TO_FSB(mp, offset + length); + start_byte = round_up(offset + written, mp->m_sb.sb_blocksize); + end_byte = round_up(offset + length, mp->m_sb.sb_blocksize); /* Nothing to do if we've written the entire delalloc extent */ - if (start_fsb >= end_fsb) + if (start_byte >= end_byte) return 0; /* @@ -1173,15 +1186,12 @@ xfs_buffered_write_iomap_end( * leave dirty pages with no space reservation in the cache. */ filemap_invalidate_lock(inode->i_mapping); - truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), - XFS_FSB_TO_B(mp, end_fsb) - 1); - - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); + truncate_pagecache_range(inode, start_byte, end_byte - 1); + error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte); filemap_invalidate_unlock(inode->i_mapping); if (error && !xfs_is_shutdown(mp)) { - xfs_alert(mp, "%s: unable to clean up ino %lld", - __func__, ip->i_ino); + xfs_alert(mp, "%s: unable to clean up ino 0x%llx", + __func__, XFS_I(inode)->i_ino); return error; } return 0; -- GitLab From 12339ec6fe4d41e69a81a13ca5e1c443fbe5bcba Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:52 -0700 Subject: [PATCH 2257/2290] xfs,iomap: move delalloc punching to iomap [ Upstream commit 9c7babf94a0d686b552e53aded8d4703d1b8b92b ] Because that's what Christoph wants for this error handling path only XFS uses. It requires a new iomap export for handling errors over delalloc ranges. This is basically the XFS code as is stands, but even though Christoph wants this as iomap funcitonality, we still have to call it from the filesystem specific ->iomap_end callback, and call into the iomap code with yet another filesystem specific callback to punch the delalloc extent within the defined ranges. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/iomap/buffered-io.c | 60 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_iomap.c | 47 ++++++--------------------------- include/linux/iomap.h | 4 +++ 3 files changed, 72 insertions(+), 39 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a0a4d8de82cad..24be3297822b2 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -827,6 +827,66 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); +/* + * When a short write occurs, the filesystem may need to remove reserved space + * that was allocated in ->iomap_begin from it's ->iomap_end method. For + * filesystems that use delayed allocation, we need to punch out delalloc + * extents from the range that are not dirty in the page cache. As the write can + * race with page faults, there can be dirty pages over the delalloc extent + * outside the range of a short write but still within the delalloc extent + * allocated for this iomap. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations, but converts them back to {offset,len} tuples for + * the punch callback. + */ +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, + ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + loff_t start_byte; + loff_t end_byte; + int blocksize = i_blocksize(inode); + int error = 0; + + if (iomap->type != IOMAP_DELALLOC) + return 0; + + /* If we didn't reserve the blocks, we're not allowed to punch them. */ + if (!(iomap->flags & IOMAP_F_NEW)) + return 0; + + /* + * start_byte refers to the first unused block after a short write. If + * nothing was written, round offset down to point at the first block in + * the range. + */ + if (unlikely(!written)) + start_byte = round_down(pos, blocksize); + else + start_byte = round_up(pos + written, blocksize); + end_byte = round_up(pos + length, blocksize); + + /* Nothing to do if we've written the entire delalloc extent */ + if (start_byte >= end_byte) + return 0; + + /* + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite between the page cache + * truncation and the delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. + */ + filemap_invalidate_lock(inode->i_mapping); + truncate_pagecache_range(inode, start_byte, end_byte - 1); + error = punch(inode, start_byte, end_byte - start_byte); + filemap_invalidate_unlock(inode->i_mapping); + + return error; +} +EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc); + static loff_t iomap_unshare_iter(struct iomap_iter *iter) { struct iomap *iomap = &iter->iomap; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7bb55dbc19d35..ea96e8a348687 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1123,12 +1123,12 @@ out_unlock: static int xfs_buffered_write_delalloc_punch( struct inode *inode, - loff_t start_byte, - loff_t end_byte) + loff_t offset, + loff_t length) { struct xfs_mount *mp = XFS_M(inode->i_sb); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); - xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb, end_fsb - start_fsb); @@ -1143,13 +1143,9 @@ xfs_buffered_write_iomap_end( unsigned flags, struct iomap *iomap) { - struct xfs_mount *mp = XFS_M(inode->i_sb); - loff_t start_byte; - loff_t end_byte; - int error = 0; - if (iomap->type != IOMAP_DELALLOC) - return 0; + struct xfs_mount *mp = XFS_M(inode->i_sb); + int error; /* * Behave as if the write failed if drop writes is enabled. Set the NEW @@ -1160,35 +1156,8 @@ xfs_buffered_write_iomap_end( written = 0; } - /* If we didn't reserve the blocks, we're not allowed to punch them. */ - if (!(iomap->flags & IOMAP_F_NEW)) - return 0; - - /* - * start_fsb refers to the first unused block after a short write. If - * nothing was written, round offset down to point at the first block in - * the range. - */ - if (unlikely(!written)) - start_byte = round_down(offset, mp->m_sb.sb_blocksize); - else - start_byte = round_up(offset + written, mp->m_sb.sb_blocksize); - end_byte = round_up(offset + length, mp->m_sb.sb_blocksize); - - /* Nothing to do if we've written the entire delalloc extent */ - if (start_byte >= end_byte) - return 0; - - /* - * Lock the mapping to avoid races with page faults re-instantiating - * folios and dirtying them via ->page_mkwrite between the page cache - * truncation and the delalloc extent removal. Failing to do this can - * leave dirty pages with no space reservation in the cache. - */ - filemap_invalidate_lock(inode->i_mapping); - truncate_pagecache_range(inode, start_byte, end_byte - 1); - error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte); - filemap_invalidate_unlock(inode->i_mapping); + error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, + length, written, &xfs_buffered_write_delalloc_punch); if (error && !xfs_is_shutdown(mp)) { xfs_alert(mp, "%s: unable to clean up ino 0x%llx", __func__, XFS_I(inode)->i_ino); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 238a03087e17e..0698c4b8ce0e2 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -226,6 +226,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)); + int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); -- GitLab From 38be53c3fd7f4f4bd5de319a323d72f9f6beb16d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:53 -0700 Subject: [PATCH 2258/2290] iomap: buffered write failure should not truncate the page cache [ Upstream commit f43dc4dc3eff028b5ddddd99f3a66c5a6bdd4e78 ] iomap_file_buffered_write_punch_delalloc() currently invalidates the page cache over the unused range of the delalloc extent that was allocated. While the write allocated the delalloc extent, it does not own it exclusively as the write does not hold any locks that prevent either writeback or mmap page faults from changing the state of either the page cache or the extent state backing this range. Whilst xfs_bmap_punch_delalloc_range() already handles races in extent conversion - it will only punch out delalloc extents and it ignores any other type of extent - the page cache truncate does not discriminate between data written by this write or some other task. As a result, truncating the page cache can result in data corruption if the write races with mmap modifications to the file over the same range. generic/346 exercises this workload, and if we randomly fail writes (as will happen when iomap gets stale iomap detection later in the patchset), it will randomly corrupt the file data because it removes data written by mmap() in the same page as the write() that failed. Hence we do not want to punch out the page cache over the range of the extent we failed to write to - what we actually need to do is detect the ranges that have dirty data in cache over them and *not punch them out*. To do this, we have to walk the page cache over the range of the delalloc extent we want to remove. This is made complex by the fact we have to handle partially up-to-date folios correctly and this can happen even when the FSB size == PAGE_SIZE because we now support multi-page folios in the page cache. Because we are only interested in discovering the edges of data ranges in the page cache (i.e. hole-data boundaries) we can make use of mapping_seek_hole_data() to find those transitions in the page cache. As we hold the invalidate_lock, we know that the boundaries are not going to change while we walk the range. This interface is also byte-based and is sub-page block aware, so we can find the data ranges in the cache based on byte offsets rather than page, folio or fs block sized chunks. This greatly simplifies the logic of finding dirty cached ranges in the page cache. Once we've identified a range that contains cached data, we can then iterate the range folio by folio. This allows us to determine if the data is dirty and hence perform the correct delalloc extent punching operations. The seek interface we use to iterate data ranges will give us sub-folio start/end granularity, so we may end up looking up the same folio multiple times as the seek interface iterates across each discontiguous data region in the folio. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/iomap/buffered-io.c | 195 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 180 insertions(+), 15 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 24be3297822b2..60bd16f1a23fb 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -827,6 +827,165 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); +/* + * Scan the data range passed to us for dirty page cache folios. If we find a + * dirty folio, punch out the preceeding range and update the offset from which + * the next punch will start from. + * + * We can punch out storage reservations under clean pages because they either + * contain data that has been written back - in which case the delalloc punch + * over that range is a no-op - or they have been read faults in which case they + * contain zeroes and we can remove the delalloc backing range and any new + * writes to those pages will do the normal hole filling operation... + * + * This makes the logic simple: we only need to keep the delalloc extents only + * over the dirty ranges of the page cache. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations. + */ +static int iomap_write_delalloc_scan(struct inode *inode, + loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, + int (*punch)(struct inode *inode, loff_t offset, loff_t length)) +{ + while (start_byte < end_byte) { + struct folio *folio; + + /* grab locked page */ + folio = filemap_lock_folio(inode->i_mapping, + start_byte >> PAGE_SHIFT); + if (!folio) { + start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + + PAGE_SIZE; + continue; + } + + /* if dirty, punch up to offset */ + if (folio_test_dirty(folio)) { + if (start_byte > *punch_start_byte) { + int error; + + error = punch(inode, *punch_start_byte, + start_byte - *punch_start_byte); + if (error) { + folio_unlock(folio); + folio_put(folio); + return error; + } + } + + /* + * Make sure the next punch start is correctly bound to + * the end of this data range, not the end of the folio. + */ + *punch_start_byte = min_t(loff_t, end_byte, + folio_next_index(folio) << PAGE_SHIFT); + } + + /* move offset to start of next folio in range */ + start_byte = folio_next_index(folio) << PAGE_SHIFT; + folio_unlock(folio); + folio_put(folio); + } + return 0; +} + +/* + * Punch out all the delalloc blocks in the range given except for those that + * have dirty data still pending in the page cache - those are going to be + * written and so must still retain the delalloc backing for writeback. + * + * As we are scanning the page cache for data, we don't need to reimplement the + * wheel - mapping_seek_hole_data() does exactly what we need to identify the + * start and end of data ranges correctly even for sub-folio block sizes. This + * byte range based iteration is especially convenient because it means we + * don't have to care about variable size folios, nor where the start or end of + * the data range lies within a folio, if they lie within the same folio or even + * if there are multiple discontiguous data ranges within the folio. + * + * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so + * can return data ranges that exist in the cache beyond EOF. e.g. a page fault + * spanning EOF will initialise the post-EOF data to zeroes and mark it up to + * date. A write page fault can then mark it dirty. If we then fail a write() + * beyond EOF into that up to date cached range, we allocate a delalloc block + * beyond EOF and then have to punch it out. Because the range is up to date, + * mapping_seek_hole_data() will return it, and we will skip the punch because + * the folio is dirty. THis is incorrect - we always need to punch out delalloc + * beyond EOF in this case as writeback will never write back and covert that + * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, + * resulting in always punching out the range from the EOF to the end of the + * range the iomap spans. + * + * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it + * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA + * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) + * returns the end of the data range (data_end). Using closed intervals would + * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose + * the code to subtle off-by-one bugs.... + */ +static int iomap_write_delalloc_release(struct inode *inode, + loff_t start_byte, loff_t end_byte, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + loff_t punch_start_byte = start_byte; + loff_t scan_end_byte = min(i_size_read(inode), end_byte); + int error = 0; + + /* + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite whilst we walk the + * cache and perform delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. + */ + filemap_invalidate_lock(inode->i_mapping); + while (start_byte < scan_end_byte) { + loff_t data_end; + + start_byte = mapping_seek_hole_data(inode->i_mapping, + start_byte, scan_end_byte, SEEK_DATA); + /* + * If there is no more data to scan, all that is left is to + * punch out the remaining range. + */ + if (start_byte == -ENXIO || start_byte == scan_end_byte) + break; + if (start_byte < 0) { + error = start_byte; + goto out_unlock; + } + WARN_ON_ONCE(start_byte < punch_start_byte); + WARN_ON_ONCE(start_byte > scan_end_byte); + + /* + * We find the end of this contiguous cached data range by + * seeking from start_byte to the beginning of the next hole. + */ + data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, + scan_end_byte, SEEK_HOLE); + if (data_end < 0) { + error = data_end; + goto out_unlock; + } + WARN_ON_ONCE(data_end <= start_byte); + WARN_ON_ONCE(data_end > scan_end_byte); + + error = iomap_write_delalloc_scan(inode, &punch_start_byte, + start_byte, data_end, punch); + if (error) + goto out_unlock; + + /* The next data search starts at the end of this one. */ + start_byte = data_end; + } + + if (punch_start_byte < end_byte) + error = punch(inode, punch_start_byte, + end_byte - punch_start_byte); +out_unlock: + filemap_invalidate_unlock(inode->i_mapping); + return error; +} + /* * When a short write occurs, the filesystem may need to remove reserved space * that was allocated in ->iomap_begin from it's ->iomap_end method. For @@ -837,8 +996,25 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write); * allocated for this iomap. * * This function uses [start_byte, end_byte) intervals (i.e. open ended) to - * simplify range iterations, but converts them back to {offset,len} tuples for - * the punch callback. + * simplify range iterations. + * + * The punch() callback *must* only punch delalloc extents in the range passed + * to it. It must skip over all other types of extents in the range and leave + * them completely unchanged. It must do this punch atomically with respect to + * other extent modifications. + * + * The punch() callback may be called with a folio locked to prevent writeback + * extent allocation racing at the edge of the range we are currently punching. + * The locked folio may or may not cover the range being punched, so it is not + * safe for the punch() callback to lock folios itself. + * + * Lock order is: + * + * inode->i_rwsem (shared or exclusive) + * inode->i_mapping->invalidate_lock (exclusive) + * folio_lock() + * ->punch + * internal filesystem allocation lock */ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, struct iomap *iomap, loff_t pos, loff_t length, @@ -848,7 +1024,6 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t start_byte; loff_t end_byte; int blocksize = i_blocksize(inode); - int error = 0; if (iomap->type != IOMAP_DELALLOC) return 0; @@ -872,18 +1047,8 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, if (start_byte >= end_byte) return 0; - /* - * Lock the mapping to avoid races with page faults re-instantiating - * folios and dirtying them via ->page_mkwrite between the page cache - * truncation and the delalloc extent removal. Failing to do this can - * leave dirty pages with no space reservation in the cache. - */ - filemap_invalidate_lock(inode->i_mapping); - truncate_pagecache_range(inode, start_byte, end_byte - 1); - error = punch(inode, start_byte, end_byte - start_byte); - filemap_invalidate_unlock(inode->i_mapping); - - return error; + return iomap_write_delalloc_release(inode, start_byte, end_byte, + punch); } EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc); -- GitLab From 580f40b4c956f38e83f66ebed4d81bbe4a7d82fb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:54 -0700 Subject: [PATCH 2259/2290] xfs: xfs_bmap_punch_delalloc_range() should take a byte range [ Upstream commit 7348b322332d8602a4133f0b861334ea021b134a ] All the callers of xfs_bmap_punch_delalloc_range() jump through hoops to convert a byte range to filesystem blocks before calling xfs_bmap_punch_delalloc_range(). Instead, pass the byte range to xfs_bmap_punch_delalloc_range() and have it do the conversion to filesystem blocks internally. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 16 ++++++---------- fs/xfs/xfs_bmap_util.c | 10 ++++++---- fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_iomap.c | 8 ++------ 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 5d1a995b15f83..6aadc5815068e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -114,9 +114,8 @@ xfs_end_ioend( if (unlikely(error)) { if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); - xfs_bmap_punch_delalloc_range(ip, - XFS_B_TO_FSBT(mp, offset), - XFS_B_TO_FSB(mp, size)); + xfs_bmap_punch_delalloc_range(ip, offset, + offset + size); } goto done; } @@ -455,12 +454,8 @@ xfs_discard_folio( struct folio *folio, loff_t pos) { - struct inode *inode = folio->mapping->host; - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(folio->mapping->host); struct xfs_mount *mp = ip->i_mount; - size_t offset = offset_in_folio(folio, pos); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, pos); - xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, offset); int error; if (xfs_is_shutdown(mp)) @@ -470,8 +465,9 @@ xfs_discard_folio( "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", folio, ip->i_ino, pos); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - i_blocks_per_folio(inode, folio) - pageoff_fsb); + error = xfs_bmap_punch_delalloc_range(ip, pos, + round_up(pos, folio_size(folio))); + if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 04d0c2bff67c4..867645b74d889 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -590,11 +590,13 @@ out_unlock_iolock: int xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, - xfs_fileoff_t start_fsb, - xfs_fileoff_t length) + xfs_off_t start_byte, + xfs_off_t end_byte) { + struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = &ip->i_df; - xfs_fileoff_t end_fsb = start_fsb + length; + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; int error = 0; @@ -607,7 +609,7 @@ xfs_bmap_punch_delalloc_range( while (got.br_startoff + got.br_blockcount > start_fsb) { del = got; - xfs_trim_extent(&del, start_fsb, length); + xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb); /* * A delete can push the cursor forward. Step back to the diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 24b37d211f1dc..6888078f5c31e 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) #endif /* CONFIG_XFS_RT */ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, - xfs_fileoff_t start_fsb, xfs_fileoff_t length); + xfs_off_t start_byte, xfs_off_t end_byte); struct kgetbmap { __s64 bmv_offset; /* file offset of segment in blocks */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ea96e8a348687..09676ff6940eb 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1126,12 +1126,8 @@ xfs_buffered_write_delalloc_punch( loff_t offset, loff_t length) { - struct xfs_mount *mp = XFS_M(inode->i_sb); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); - - return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb, - end_fsb - start_fsb); + return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, + offset + length); } static int -- GitLab From 54a37e5d07478358dcbf6e73b6c7e40e50a6f375 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:55 -0700 Subject: [PATCH 2260/2290] iomap: write iomap validity checks [ Upstream commit d7b64041164ca177170191d2ad775da074ab2926 ] A recent multithreaded write data corruption has been uncovered in the iomap write code. The core of the problem is partial folio writes can be flushed to disk while a new racing write can map it and fill the rest of the page: writeback new write allocate blocks blocks are unwritten submit IO ..... map blocks iomap indicates UNWRITTEN range loop { lock folio copyin data ..... IO completes runs unwritten extent conv blocks are marked written get next folio } Now add memory pressure such that memory reclaim evicts the partially written folio that has already been written to disk. When the new write finally gets to the last partial page of the new write, it does not find it in cache, so it instantiates a new page, sees the iomap is unwritten, and zeros the part of the page that it does not have data from. This overwrites the data on disk that was originally written. The full description of the corruption mechanism can be found here: https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/ To solve this problem, we need to check whether the iomap is still valid after we lock each folio during the write. We have to do it after we lock the page so that we don't end up with state changes occurring while we wait for the folio to be locked. Hence we need a mechanism to be able to check that the cached iomap is still valid (similar to what we already do in buffered writeback), and we need a way for ->begin_write to back out and tell the high level iomap iterator that we need to remap the remaining write range. The iomap needs to grow some storage for the validity cookie that the filesystem provides to travel with the iomap. XFS, in particular, also needs to know some more information about what the iomap maps (attribute extents rather than file data extents) to for the validity cookie to cover all the types of iomaps we might need to validate. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/iomap/buffered-io.c | 29 +++++++++++++++++++++++++++- fs/iomap/iter.c | 19 ++++++++++++++++++- include/linux/iomap.h | 43 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 81 insertions(+), 10 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 60bd16f1a23fb..dac1a5c110c0e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -579,7 +579,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter, return iomap_read_inline_data(iter, folio); } -static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, +static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, size_t len, struct folio **foliop) { const struct iomap_page_ops *page_ops = iter->iomap.page_ops; @@ -613,6 +613,27 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM; goto out_no_page; } + + /* + * Now we have a locked folio, before we do anything with it we need to + * check that the iomap we have cached is not stale. The inode extent + * mapping can change due to concurrent IO in flight (e.g. + * IOMAP_UNWRITTEN state can change and memory reclaim could have + * reclaimed a previously partially written page at this index after IO + * completion before this write reaches this file offset) and hence we + * could do the wrong thing here (zero a page range incorrectly or fail + * to zero) and corrupt data. + */ + if (page_ops && page_ops->iomap_valid) { + bool iomap_valid = page_ops->iomap_valid(iter->inode, + &iter->iomap); + if (!iomap_valid) { + iter->iomap.flags |= IOMAP_F_STALE; + status = 0; + goto out_unlock; + } + } + if (pos + len > folio_pos(folio) + folio_size(folio)) len = folio_pos(folio) + folio_size(folio) - pos; @@ -768,6 +789,8 @@ again: status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) break; + if (iter->iomap.flags & IOMAP_F_STALE) + break; page = folio_file_page(folio, pos >> PAGE_SHIFT); if (mapping_writably_mapped(mapping)) @@ -1076,6 +1099,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) return status; + if (iter->iomap.flags & IOMAP_F_STALE) + break; status = iomap_write_end(iter, pos, bytes, bytes, folio); if (WARN_ON_ONCE(status == 0)) @@ -1131,6 +1156,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) status = iomap_write_begin(iter, pos, bytes, &folio); if (status) return status; + if (iter->iomap.flags & IOMAP_F_STALE) + break; offset = offset_in_folio(folio, pos); if (bytes > folio_size(folio) - offset) diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c index a1c7592d2aded..79a0614eaab77 100644 --- a/fs/iomap/iter.c +++ b/fs/iomap/iter.c @@ -7,12 +7,28 @@ #include #include "trace.h" +/* + * Advance to the next range we need to map. + * + * If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully + * processed - it was aborted because the extent the iomap spanned may have been + * changed during the operation. In this case, the iteration behaviour is to + * remap the unprocessed range of the iter, and that means we may need to remap + * even when we've made no progress (i.e. iter->processed = 0). Hence the + * "finished iterating" case needs to distinguish between + * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we + * need to remap the entire remaining range. + */ static inline int iomap_iter_advance(struct iomap_iter *iter) { + bool stale = iter->iomap.flags & IOMAP_F_STALE; + /* handle the previous iteration (if any) */ if (iter->iomap.length) { - if (iter->processed <= 0) + if (iter->processed < 0) return iter->processed; + if (!iter->processed && !stale) + return 0; if (WARN_ON_ONCE(iter->processed > iomap_length(iter))) return -EIO; iter->pos += iter->processed; @@ -33,6 +49,7 @@ static inline void iomap_iter_done(struct iomap_iter *iter) WARN_ON_ONCE(iter->iomap.offset > iter->pos); WARN_ON_ONCE(iter->iomap.length == 0); WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos); + WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE); trace_iomap_iter_dstmap(iter->inode, &iter->iomap); if (iter->srcmap.type != IOMAP_HOLE) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0698c4b8ce0e2..0983dfc9a203c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -49,26 +49,35 @@ struct vm_fault; * * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of * buffer heads for this mapping. + * + * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent + * rather than a file data extent. */ -#define IOMAP_F_NEW 0x01 -#define IOMAP_F_DIRTY 0x02 -#define IOMAP_F_SHARED 0x04 -#define IOMAP_F_MERGED 0x08 -#define IOMAP_F_BUFFER_HEAD 0x10 -#define IOMAP_F_ZONE_APPEND 0x20 +#define IOMAP_F_NEW (1U << 0) +#define IOMAP_F_DIRTY (1U << 1) +#define IOMAP_F_SHARED (1U << 2) +#define IOMAP_F_MERGED (1U << 3) +#define IOMAP_F_BUFFER_HEAD (1U << 4) +#define IOMAP_F_ZONE_APPEND (1U << 5) +#define IOMAP_F_XATTR (1U << 6) /* * Flags set by the core iomap code during operations: * * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size * has changed as the result of this write operation. + * + * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file + * range it covers needs to be remapped by the high level before the operation + * can proceed. */ -#define IOMAP_F_SIZE_CHANGED 0x100 +#define IOMAP_F_SIZE_CHANGED (1U << 8) +#define IOMAP_F_STALE (1U << 9) /* * Flags from 0x1000 up are for file system specific usage: */ -#define IOMAP_F_PRIVATE 0x1000 +#define IOMAP_F_PRIVATE (1U << 12) /* @@ -89,6 +98,7 @@ struct iomap { void *inline_data; void *private; /* filesystem private */ const struct iomap_page_ops *page_ops; + u64 validity_cookie; /* used with .iomap_valid() */ }; static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) @@ -128,6 +138,23 @@ struct iomap_page_ops { int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, struct page *page); + + /* + * Check that the cached iomap still maps correctly to the filesystem's + * internal extent map. FS internal extent maps can change while iomap + * is iterating a cached iomap, so this hook allows iomap to detect that + * the iomap needs to be refreshed during a long running write + * operation. + * + * The filesystem can store internal state (e.g. a sequence number) in + * iomap->validity_cookie when the iomap is first mapped to be able to + * detect changes between mapping time and whenever .iomap_valid() is + * called. + * + * This is called with the folio over the specified file position held + * locked by the iomap code. + */ + bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); }; /* -- GitLab From ea67e73129fceffd40b9193da93544c34d81b9c2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:56 -0700 Subject: [PATCH 2261/2290] xfs: use iomap_valid method to detect stale cached iomaps [ Upstream commit 304a68b9c63bbfc1f6e159d68e8892fc54a06067 ] Now that iomap supports a mechanism to validate cached iomaps for buffered write operations, hook it up to the XFS buffered write ops so that we can avoid data corruptions that result from stale cached iomaps. See: https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/ or the ->iomap_valid() introduction commit for exact details of the corruption vector. The validity cookie we store in the iomap is based on the type of iomap we return. It is expected that the iomap->flags we set in xfs_bmbt_to_iomap() is not perturbed by the iomap core and are returned to us in the iomap passed via the .iomap_valid() callback. This ensures that the validity cookie is always checking the correct inode fork sequence numbers to detect potential changes that affect the extent cached by the iomap. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 6 ++- fs/xfs/xfs_aops.c | 2 +- fs/xfs/xfs_iomap.c | 95 +++++++++++++++++++++++++++++++--------- fs/xfs/xfs_iomap.h | 5 ++- fs/xfs/xfs_pnfs.c | 6 ++- 5 files changed, 87 insertions(+), 27 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 49d0d4ea63fcd..56b9b7db38bbd 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4551,7 +4551,8 @@ xfs_bmapi_convert_delalloc( * the extent. Just return the real extent at this offset. */ if (!isnullstartblock(bma.got.br_startblock)) { - xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, + xfs_iomap_inode_sequence(ip, flags)); *seq = READ_ONCE(ifp->if_seq); goto out_trans_cancel; } @@ -4599,7 +4600,8 @@ xfs_bmapi_convert_delalloc( XFS_STATS_INC(mp, xs_xstrat_quick); ASSERT(!isnullstartblock(bma.got.br_startblock)); - xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, + xfs_iomap_inode_sequence(ip, flags)); *seq = READ_ONCE(ifp->if_seq); if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 6aadc5815068e..a22d90af40c85 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -372,7 +372,7 @@ retry: isnullstartblock(imap.br_startblock)) goto allocate_blocks; - xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0); + xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 09676ff6940eb..26ca3cc1a0489 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -48,13 +48,45 @@ xfs_alert_fsblock_zero( return -EFSCORRUPTED; } +u64 +xfs_iomap_inode_sequence( + struct xfs_inode *ip, + u16 iomap_flags) +{ + u64 cookie = 0; + + if (iomap_flags & IOMAP_F_XATTR) + return READ_ONCE(ip->i_af.if_seq); + if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp) + cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32; + return cookie | READ_ONCE(ip->i_df.if_seq); +} + +/* + * Check that the iomap passed to us is still valid for the given offset and + * length. + */ +static bool +xfs_iomap_valid( + struct inode *inode, + const struct iomap *iomap) +{ + return iomap->validity_cookie == + xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags); +} + +const struct iomap_page_ops xfs_iomap_page_ops = { + .iomap_valid = xfs_iomap_valid, +}; + int xfs_bmbt_to_iomap( struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, unsigned int mapping_flags, - u16 iomap_flags) + u16 iomap_flags, + u64 sequence_cookie) { struct xfs_mount *mp = ip->i_mount; struct xfs_buftarg *target = xfs_inode_buftarg(ip); @@ -91,6 +123,9 @@ xfs_bmbt_to_iomap( if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) iomap->flags |= IOMAP_F_DIRTY; + + iomap->validity_cookie = sequence_cookie; + iomap->page_ops = &xfs_iomap_page_ops; return 0; } @@ -195,7 +230,8 @@ xfs_iomap_write_direct( xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, unsigned int flags, - struct xfs_bmbt_irec *imap) + struct xfs_bmbt_irec *imap, + u64 *seq) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -285,6 +321,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: + *seq = xfs_iomap_inode_sequence(ip, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -743,6 +780,7 @@ xfs_direct_write_iomap_begin( bool shared = false; u16 iomap_flags = 0; unsigned int lockmode = XFS_ILOCK_SHARED; + u64 seq; ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); @@ -811,9 +849,10 @@ xfs_direct_write_iomap_begin( goto out_unlock; } + seq = xfs_iomap_inode_sequence(ip, iomap_flags); xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); allocate_blocks: error = -EAGAIN; @@ -839,24 +878,26 @@ allocate_blocks: xfs_iunlock(ip, lockmode); error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, - flags, &imap); + flags, &imap, &seq); if (error) return error; trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - iomap_flags | IOMAP_F_NEW); + iomap_flags | IOMAP_F_NEW, seq); out_found_cow: - xfs_iunlock(ip, lockmode); length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); if (imap.br_startblock != HOLESTARTBLOCK) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); + seq = xfs_iomap_inode_sequence(ip, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); if (error) - return error; + goto out_unlock; } - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, lockmode); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); out_unlock: if (lockmode) @@ -915,6 +956,7 @@ xfs_buffered_write_iomap_begin( int allocfork = XFS_DATA_FORK; int error = 0; unsigned int lockmode = XFS_ILOCK_EXCL; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1094,26 +1136,31 @@ retry: * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. */ + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); xfs_iunlock(ip, XFS_ILOCK_EXCL); trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); found_imap: + seq = xfs_iomap_inode_sequence(ip, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); found_cow: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + seq = xfs_iomap_inode_sequence(ip, 0); if (imap.br_startoff <= offset_fsb) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); if (error) - return error; + goto out_unlock; + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED); + IOMAP_F_SHARED, seq); } xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1193,6 +1240,7 @@ xfs_read_iomap_begin( int nimaps = 1, error = 0; bool shared = false; unsigned int lockmode = XFS_ILOCK_SHARED; + u64 seq; ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); @@ -1206,13 +1254,14 @@ xfs_read_iomap_begin( &nimaps, 0); if (!error && (flags & IOMAP_REPORT)) error = xfs_reflink_trim_around_shared(ip, &imap, &shared); + seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); xfs_iunlock(ip, lockmode); if (error) return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - shared ? IOMAP_F_SHARED : 0); + shared ? IOMAP_F_SHARED : 0, seq); } const struct iomap_ops xfs_read_iomap_ops = { @@ -1237,6 +1286,7 @@ xfs_seek_iomap_begin( struct xfs_bmbt_irec imap, cmap; int error = 0; unsigned lockmode; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1271,8 +1321,9 @@ xfs_seek_iomap_begin( if (data_fsb < cow_fsb + cmap.br_blockcount) end_fsb = min(end_fsb, data_fsb); xfs_trim_extent(&cmap, offset_fsb, end_fsb); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED); + IOMAP_F_SHARED, seq); /* * This is a COW extent, so we must probe the page cache * because there could be dirty page cache being backed @@ -1293,8 +1344,9 @@ xfs_seek_iomap_begin( imap.br_startblock = HOLESTARTBLOCK; imap.br_state = XFS_EXT_NORM; done: + seq = xfs_iomap_inode_sequence(ip, 0); xfs_trim_extent(&imap, offset_fsb, end_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); out_unlock: xfs_iunlock(ip, lockmode); return error; @@ -1320,6 +1372,7 @@ xfs_xattr_iomap_begin( struct xfs_bmbt_irec imap; int nimaps = 1, error = 0; unsigned lockmode; + int seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1336,12 +1389,14 @@ xfs_xattr_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, XFS_BMAPI_ATTRFORK); out_unlock: + + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR); xfs_iunlock(ip, lockmode); if (error) return error; ASSERT(nimaps); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq); } const struct iomap_ops xfs_xattr_iomap_ops = { diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 0f62ab633040c..4da13440bae9b 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -13,14 +13,15 @@ struct xfs_bmbt_irec; int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, unsigned int flags, - struct xfs_bmbt_irec *imap); + struct xfs_bmbt_irec *imap, u64 *sequence); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip, xfs_fileoff_t end_fsb); +u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags); int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, unsigned int mapping_flags, - u16 iomap_flags); + u16 iomap_flags, u64 sequence_cookie); int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len, bool *did_zero); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 37a24f0f7cd40..38d23f0e703a8 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -125,6 +125,7 @@ xfs_fs_map_blocks( int nimaps = 1; uint lock_flags; int error = 0; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -176,6 +177,7 @@ xfs_fs_map_blocks( lock_flags = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, bmapi_flags); + seq = xfs_iomap_inode_sequence(ip, 0); ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); @@ -189,7 +191,7 @@ xfs_fs_map_blocks( xfs_iunlock(ip, lock_flags); error = xfs_iomap_write_direct(ip, offset_fsb, - end_fsb - offset_fsb, 0, &imap); + end_fsb - offset_fsb, 0, &imap, &seq); if (error) goto out_unlock; @@ -209,7 +211,7 @@ xfs_fs_map_blocks( } xfs_iunlock(ip, XFS_IOLOCK_EXCL); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq); *device_generation = mp->m_generation; return error; out_unlock: -- GitLab From e811fec51c66a0056459daa1ac834aea7d8d98f5 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:57 -0700 Subject: [PATCH 2262/2290] xfs: drop write error injection is unfixable, remove it [ Upstream commit 6e8af15ccdc4e138a5b529c1901a0013e1dcaa09 ] With the changes to scan the page cache for dirty data to avoid data corruptions from partial write cleanup racing with other page cache operations, the drop writes error injection no longer works the same way it used to and causes xfs/196 to fail. This is because xfs/196 writes to the file and populates the page cache before it turns on the error injection and starts failing -overwrites-. The result is that the original drop-writes code failed writes only -after- overwriting the data in the cache, followed by invalidates the cached data, then punching out the delalloc extent from under that data. On the surface, this looks fine. The problem is that page cache invalidation *doesn't guarantee that it removes anything from the page cache* and it doesn't change the dirty state of the folio. When block size == page size and we do page aligned IO (as xfs/196 does) everything happens to align perfectly and page cache invalidation removes the single page folios that span the written data. Hence the followup delalloc punch pass does not find cached data over that range and it can punch the extent out. IOWs, xfs/196 "works" for block size == page size with the new code. I say "works", because it actually only works for the case where IO is page aligned, and no data was read from disk before writes occur. Because the moment we actually read data first, the readahead code allocates multipage folios and suddenly the invalidate code goes back to zeroing subfolio ranges without changing dirty state. Hence, with multipage folios in play, block size == page size is functionally identical to block size < page size behaviour, and drop-writes is manifestly broken w.r.t to this case. Invalidation of a subfolio range doesn't result in the folio being removed from the cache, just the range gets zeroed. Hence after we've sequentially walked over a folio that we've dirtied (via write data) and then invalidated, we end up with a dirty folio full of zeroed data. And because the new code skips punching ranges that have dirty folios covering them, we end up leaving the delalloc range intact after failing all the writes. Hence failed writes now end up writing zeroes to disk in the cases where invalidation zeroes folios rather than removing them from cache. This is a fundamental change of behaviour that is needed to avoid the data corruption vectors that exist in the old write fail path, and it renders the drop-writes injection non-functional and unworkable as it stands. As it is, I think the error injection is also now unnecessary, as partial writes that need delalloc extent are going to be a lot more common with stale iomap detection in place. Hence this patch removes the drop-writes error injection completely. xfs/196 can remain for testing kernels that don't have this data corruption fix, but those that do will report: xfs/196 3s ... [not run] XFS error injection drop_writes unknown on this kernel. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_errortag.h | 12 +++++------- fs/xfs/xfs_error.c | 27 ++++++++++++++++++++------- fs/xfs/xfs_iomap.c | 9 --------- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 5362908164b0b..580ccbd5aadc2 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -40,13 +40,12 @@ #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 #define XFS_ERRTAG_BMAP_FINISH_ONE 26 #define XFS_ERRTAG_AG_RESV_CRITICAL 27 + /* - * DEBUG mode instrumentation to test and/or trigger delayed allocation - * block killing in the event of failed writes. When enabled, all - * buffered writes are silenty dropped and handled as if they failed. - * All delalloc blocks in the range of the write (including pre-existing - * delalloc blocks!) are tossed as part of the write failure error - * handling sequence. + * Drop-writes support removed because write error handling cannot trash + * pre-existing delalloc extents in any useful way anymore. We retain the + * definition so that we can reject it as an invalid value in + * xfs_errortag_valid(). */ #define XFS_ERRTAG_DROP_WRITES 28 #define XFS_ERRTAG_LOG_BAD_CRC 29 @@ -95,7 +94,6 @@ #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 #define XFS_RANDOM_BMAP_FINISH_ONE 1 #define XFS_RANDOM_AG_RESV_CRITICAL 4 -#define XFS_RANDOM_DROP_WRITES 1 #define XFS_RANDOM_LOG_BAD_CRC 1 #define XFS_RANDOM_LOG_ITEM_PIN 1 #define XFS_RANDOM_BUF_LRU_REF 2 diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index c6b2aabd6f187..dea3c0649d2f7 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -46,7 +46,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_REFCOUNT_FINISH_ONE, XFS_RANDOM_BMAP_FINISH_ONE, XFS_RANDOM_AG_RESV_CRITICAL, - XFS_RANDOM_DROP_WRITES, + 0, /* XFS_RANDOM_DROP_WRITES has been removed */ XFS_RANDOM_LOG_BAD_CRC, XFS_RANDOM_LOG_ITEM_PIN, XFS_RANDOM_BUF_LRU_REF, @@ -162,7 +162,6 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL); -XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES); XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); @@ -206,7 +205,6 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(refcount_finish_one), XFS_ERRORTAG_ATTR_LIST(bmap_finish_one), XFS_ERRORTAG_ATTR_LIST(ag_resv_critical), - XFS_ERRORTAG_ATTR_LIST(drop_writes), XFS_ERRORTAG_ATTR_LIST(log_bad_crc), XFS_ERRORTAG_ATTR_LIST(log_item_pin), XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), @@ -256,6 +254,19 @@ xfs_errortag_del( kmem_free(mp->m_errortag); } +static bool +xfs_errortag_valid( + unsigned int error_tag) +{ + if (error_tag >= XFS_ERRTAG_MAX) + return false; + + /* Error out removed injection types */ + if (error_tag == XFS_ERRTAG_DROP_WRITES) + return false; + return true; +} + bool xfs_errortag_test( struct xfs_mount *mp, @@ -277,7 +288,9 @@ xfs_errortag_test( if (!mp->m_errortag) return false; - ASSERT(error_tag < XFS_ERRTAG_MAX); + if (!xfs_errortag_valid(error_tag)) + return false; + randfactor = mp->m_errortag[error_tag]; if (!randfactor || prandom_u32_max(randfactor)) return false; @@ -293,7 +306,7 @@ xfs_errortag_get( struct xfs_mount *mp, unsigned int error_tag) { - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; return mp->m_errortag[error_tag]; @@ -305,7 +318,7 @@ xfs_errortag_set( unsigned int error_tag, unsigned int tag_value) { - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; mp->m_errortag[error_tag] = tag_value; @@ -319,7 +332,7 @@ xfs_errortag_add( { BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; return xfs_errortag_set(mp, error_tag, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 26ca3cc1a0489..1bdd7afc10108 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1190,15 +1190,6 @@ xfs_buffered_write_iomap_end( struct xfs_mount *mp = XFS_M(inode->i_sb); int error; - /* - * Behave as if the write failed if drop writes is enabled. Set the NEW - * flag to force delalloc cleanup. - */ - if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { - iomap->flags |= IOMAP_F_NEW; - written = 0; - } - error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, length, written, &xfs_buffered_write_delalloc_punch); if (error && !xfs_is_shutdown(mp)) { -- GitLab From e2ae64993ce537d1710b888e51bdf01caae1e9eb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 1 May 2024 11:40:58 -0700 Subject: [PATCH 2263/2290] xfs: fix off-by-one-block in xfs_discard_folio() [ Upstream commit 8ac5b996bf5199f15b7687ceae989f8b2a410dda ] The recent writeback corruption fixes changed the code in xfs_discard_folio() to calculate a byte range to for punching delalloc extents. A mistake was made in using round_up(pos) for the end offset, because when pos points at the first byte of a block, it does not get rounded up to point to the end byte of the block. hence the punch range is short, and this leads to unexpected behaviour in certain cases in xfs_bmap_punch_delalloc_range. e.g. pos = 0 means we call xfs_bmap_punch_delalloc_range(0,0), so there is no previous extent and it rounds up the punch to the end of the delalloc extent it found at offset 0, not the end of the range given to xfs_bmap_punch_delalloc_range(). Fix this by handling the zero block offset case correctly. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=217030 Link: https://lore.kernel.org/linux-xfs/Y+vOfaxIWX1c%2Fyy9@bfoster/ Fixes: 7348b322332d ("xfs: xfs_bmap_punch_delalloc_range() should take a byte range") Reported-by: Pengfei Xu Found-by: Brian Foster Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a22d90af40c85..21c241e96d483 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -439,15 +439,17 @@ xfs_prepare_ioend( } /* - * If the page has delalloc blocks on it, we need to punch them out before we - * invalidate the page. If we don't, we leave a stale delalloc mapping on the - * inode that can trip up a later direct I/O read operation on the same region. + * If the folio has delalloc blocks on it, the caller is asking us to punch them + * out. If we don't, we can leave a stale delalloc mapping covered by a clean + * page that needs to be dirtied again before the delalloc mapping can be + * converted. This stale delalloc mapping can trip up a later direct I/O read + * operation on the same region. * - * We prevent this by truncating away the delalloc regions on the page. Because + * We prevent this by truncating away the delalloc regions on the folio. Because * they are delalloc, we can do this without needing a transaction. Indeed - if * we get ENOSPC errors, we have to be able to do this truncation without a - * transaction as there is no space left for block reservation (typically why we - * see a ENOSPC in writeback). + * transaction as there is no space left for block reservation (typically why + * we see a ENOSPC in writeback). */ static void xfs_discard_folio( @@ -465,8 +467,13 @@ xfs_discard_folio( "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", folio, ip->i_ino, pos); + /* + * The end of the punch range is always the offset of the the first + * byte of the next folio. Hence the end offset is only dependent on the + * folio itself and not the start offset that is passed in. + */ error = xfs_bmap_punch_delalloc_range(ip, pos, - round_up(pos, folio_size(folio))); + folio_pos(folio) + folio_size(folio)); if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); -- GitLab From 77d31f0c70c16bda8962c2cc78f4a805d4fd71d5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 May 2024 11:40:59 -0700 Subject: [PATCH 2264/2290] xfs: fix incorrect error-out in xfs_remove [ Upstream commit 2653d53345bda90604f673bb211dd060a5a5c232 ] Clean up resources if resetting the dotdot entry doesn't succeed. Observed through code inspection. Fixes: 5838d0356bb3 ("xfs: reset child dir '..' entry when unlinking child") Signed-off-by: Darrick J. Wong Reviewed-by: Andrey Albershteyn Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index aa303be11576f..d354ea2b74f96 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2479,7 +2479,7 @@ xfs_remove( error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, tp->t_mountp->m_sb.sb_rootino, 0); if (error) - return error; + goto out_trans_cancel; } } else { /* -- GitLab From 42163ff6c6f992a61347a754d58a30c5d55ef2fc Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 1 May 2024 11:41:00 -0700 Subject: [PATCH 2265/2290] xfs: fix sb write verify for lazysbcount [ Upstream commit 59f6ab40fd8735c9a1a15401610a31cc06a0bbd6 ] When lazysbcount is enabled, fsstress and loop mount/unmount test report the following problems: XFS (loop0): SB summary counter sanity check failed XFS (loop0): Metadata corruption detected at xfs_sb_write_verify+0x13b/0x460, xfs_sb block 0x0 XFS (loop0): Unmount and run xfs_repair XFS (loop0): First 128 bytes of corrupted metadata buffer: 00000000: 58 46 53 42 00 00 10 00 00 00 00 00 00 28 00 00 XFSB.........(.. 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000020: 69 fb 7c cd 5f dc 44 af 85 74 e0 cc d4 e3 34 5a i.|._.D..t....4Z 00000030: 00 00 00 00 00 20 00 06 00 00 00 00 00 00 00 80 ..... .......... 00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................ 00000050: 00 00 00 01 00 0a 00 00 00 00 00 04 00 00 00 00 ................ 00000060: 00 00 0a 00 b4 b5 02 00 02 00 00 08 00 00 00 00 ................ 00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 14 00 00 19 ................ XFS (loop0): Corruption of in-memory data (0x8) detected at _xfs_buf_ioapply +0xe1e/0x10e0 (fs/xfs/xfs_buf.c:1580). Shutting down filesystem. XFS (loop0): Please unmount the filesystem and rectify the problem(s) XFS (loop0): log mount/recovery failed: error -117 XFS (loop0): log mount failed This corruption will shutdown the file system and the file system will no longer be mountable. The following script can reproduce the problem, but it may take a long time. #!/bin/bash device=/dev/sda testdir=/mnt/test round=0 function fail() { echo "$*" exit 1 } mkdir -p $testdir while [ $round -lt 10000 ] do echo "******* round $round ********" mkfs.xfs -f $device mount $device $testdir || fail "mount failed!" fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null & sleep 4 killall -w fsstress umount $testdir xfs_repair -e $device > /dev/null if [ $? -eq 2 ];then echo "ERR CODE 2: Dirty log exception during repair." exit 1 fi round=$(($round+1)) done With lazysbcount is enabled, There is no additional lock protection for reading m_ifree and m_icount in xfs_log_sb(), if other cpu modifies the m_ifree, this will make the m_ifree greater than m_icount. For example, consider the following sequence and ifreedelta is postive: CPU0 CPU1 xfs_log_sb xfs_trans_unreserve_and_mod_sb ---------- ------------------------------ percpu_counter_sum(&mp->m_icount) percpu_counter_add_batch(&mp->m_icount, idelta, XFS_ICOUNT_BATCH) percpu_counter_add(&mp->m_ifree, ifreedelta); percpu_counter_sum(&mp->m_ifree) After this, incorrect inode count (sb_ifree > sb_icount) will be writen to the log. In the subsequent writing of sb, incorrect inode count (sb_ifree > sb_icount) will fail to pass the boundary check in xfs_validate_sb_write() that cause the file system shutdown. When lazysbcount is enabled, we don't need to guarantee that Lazy sb counters are completely correct, but we do need to guarantee that sb_ifree <= sb_icount. On the other hand, the constraint that m_ifree <= m_icount must be satisfied any time that there /cannot/ be other threads allocating or freeing inode chunks. If the constraint is violated under these circumstances, sb_i{count,free} (the ondisk superblock inode counters) maybe incorrect and need to be marked sick at unmount, the count will be rebuilt on the next mount. Fixes: 8756a5af1819 ("libxfs: add more bounds checking to sb sanity checks") Signed-off-by: Long Li Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_sb.c | 4 +++- fs/xfs/xfs_mount.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index b6a584e044be0..28c464307817d 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -973,7 +973,9 @@ xfs_log_sb( */ if (xfs_has_lazysbcount(mp)) { mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); - mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); + mp->m_sb.sb_ifree = min_t(uint64_t, + percpu_counter_sum(&mp->m_ifree), + mp->m_sb.sb_icount); mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e8bb3c2e847e1..fb87ffb48f7fe 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -538,6 +538,20 @@ xfs_check_summary_counts( return 0; } +static void +xfs_unmount_check( + struct xfs_mount *mp) +{ + if (xfs_is_shutdown(mp)) + return; + + if (percpu_counter_sum(&mp->m_ifree) > + percpu_counter_sum(&mp->m_icount)) { + xfs_alert(mp, "ifree/icount mismatch at unmount"); + xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); + } +} + /* * Flush and reclaim dirty inodes in preparation for unmount. Inodes and * internal inode structures can be sitting in the CIL and AIL at this point, @@ -1077,6 +1091,7 @@ xfs_unmountfs( if (error) xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); + xfs_unmount_check(mp); xfs_log_unmount(mp); xfs_da_unmount(mp); -- GitLab From 781f80e5194152cea89173662af5374436586baa Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 1 May 2024 11:41:01 -0700 Subject: [PATCH 2266/2290] xfs: fix incorrect i_nlink caused by inode racing [ Upstream commit 28b4b0596343d19d140da059eee0e5c2b5328731 ] The following error occurred during the fsstress test: XFS: Assertion failed: VFS_I(ip)->i_nlink >= 2, file: fs/xfs/xfs_inode.c, line: 2452 The problem was that inode race condition causes incorrect i_nlink to be written to disk, and then it is read into memory. Consider the following call graph, inodes that are marked as both XFS_IFLUSHING and XFS_IRECLAIMABLE, i_nlink will be reset to 1 and then restored to original value in xfs_reinit_inode(). Therefore, the i_nlink of directory on disk may be set to 1. xfsaild xfs_inode_item_push xfs_iflush_cluster xfs_iflush xfs_inode_to_disk xfs_iget xfs_iget_cache_hit xfs_iget_recycle xfs_reinit_inode inode_init_always xfs_reinit_inode() needs to hold the ILOCK_EXCL as it is changing internal inode state and can race with other RCU protected inode lookups. On the read side, xfs_iflush_cluster() grabs the ILOCK_SHARED while under rcu + ip->i_flags_lock, and so xfs_iflush/xfs_inode_to_disk() are protected from racing inode updates (during transactions) by that lock. Fixes: ff7bebeb91f8 ("xfs: refactor the inode recycling code") # goes further back than this Signed-off-by: Long Li Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index d884cba1d7072..dd5a664c294f5 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -342,6 +342,9 @@ xfs_iget_recycle( trace_xfs_iget_recycle(ip); + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return -EAGAIN; + /* * We need to make it look like the inode is being reclaimed to prevent * the actual reclaim workers from stomping over us while we recycle @@ -355,6 +358,7 @@ xfs_iget_recycle( ASSERT(!rwsem_is_locked(&inode->i_rwsem)); error = xfs_reinit_inode(mp, inode); + xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { /* * Re-initializing the inode failed, and we are in deep @@ -523,6 +527,8 @@ xfs_iget_cache_hit( if (ip->i_flags & XFS_IRECLAIMABLE) { /* Drops i_flags_lock and RCU read lock. */ error = xfs_iget_recycle(pag, ip); + if (error == -EAGAIN) + goto out_skip; if (error) return error; } else { -- GitLab From 5465403341b954c46d646b6eb9a3cf97fac08eb9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 May 2024 11:41:02 -0700 Subject: [PATCH 2267/2290] xfs: invalidate block device page cache during unmount [ Upstream commit 032e160305f6872e590c77f11896fb28365c6d6c ] Every now and then I see fstests failures on aarch64 (64k pages) that trigger on the following sequence: mkfs.xfs $dev mount $dev $mnt touch $mnt/a umount $mnt xfs_db -c 'path /a' -c 'print' $dev 99% of the time this succeeds, but every now and then xfs_db cannot find /a and fails. This turns out to be a race involving udev/blkid, the page cache for the block device, and the xfs_db process. udev is triggered whenever anyone closes a block device or unmounts it. The default udev rules invoke blkid to read the fs super and create symlinks to the bdev under /dev/disk. For this, it uses buffered reads through the page cache. xfs_db also uses buffered reads to examine metadata. There is no coordination between xfs_db and udev, which means that they can run concurrently. Note there is no coordination between the kernel and blkid either. On a system with 64k pages, the page cache can cache the superblock and the root inode (and hence the root dir) with the same 64k page. If udev spawns blkid after the mkfs and the system is busy enough that it is still running when xfs_db starts up, they'll both read from the same page in the pagecache. The unmount writes updated inode metadata to disk directly. The XFS buffer cache does not use the bdev pagecache, nor does it invalidate the pagecache on umount. If the above scenario occurs, the pagecache no longer reflects what's on disk, xfs_db reads the stale metadata, and fails to find /a. Most of the time this succeeds because closing a bdev invalidates the page cache, but when processes race, everyone loses. Fix the problem by invalidating the bdev pagecache after flushing the bdev, so that xfs_db will see up to date metadata. Signed-off-by: Darrick J. Wong Reviewed-by: Gao Xiang Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index dde346450952a..54c774af6e1c6 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1945,6 +1945,7 @@ xfs_free_buftarg( list_lru_destroy(&btp->bt_lru); blkdev_issue_flush(btp->bt_bdev); + invalidate_bdev(btp->bt_bdev); fs_put_dax(btp->bt_daxdev, btp->bt_mount); kmem_free(btp); -- GitLab From e62c784a56a66f476bb11f1aabcf2c935b7044bf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 May 2024 11:41:03 -0700 Subject: [PATCH 2268/2290] xfs: attach dquots to inode before reading data/cow fork mappings [ Upstream commit 4c6dbfd2756bd83a0085ed804e2bb7be9cc16bc5 ] I've been running near-continuous integration testing of online fsck, and I've noticed that once a day, one of the ARM VMs will fail the test with out of order records in the data fork. xfs/804 races fsstress with online scrub (aka scan but do not change anything), so I think this might be a bug in the core xfs code. This also only seems to trigger if one runs the test for more than ~6 minutes via TIME_FACTOR=13 or something. https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfstests-dev.git/tree/tests/xfs/804?h=djwong-wtf I added a debugging patch to the kernel to check the data fork extents after taking the ILOCK, before dropping ILOCK, and before and after each bmapping operation. So far I've narrowed it down to the delalloc code inserting a record in the wrong place in the iext tree: xfs_bmap_add_extent_hole_delay, near line 2691: case 0: /* * New allocation is not contiguous with another * delayed allocation. * Insert a new entry. */ oldlen = newlen = 0; xfs_iunlock_check_datafork(ip); <-- ok here xfs_iext_insert(ip, icur, new, state); xfs_iunlock_check_datafork(ip); <-- bad here break; } I recorded the state of the data fork mappings and iext cursor state when a corrupt data fork is detected immediately after the xfs_bmap_add_extent_hole_delay call in xfs_bmapi_reserve_delalloc: ino 0x140bb3 func xfs_bmapi_reserve_delalloc line 4164 data fork: ino 0x140bb3 nr 0x0 nr_real 0x0 offset 0xb9 blockcount 0x1f startblock 0x935de2 state 1 ino 0x140bb3 nr 0x1 nr_real 0x1 offset 0xe6 blockcount 0xa startblock 0xffffffffe0007 state 0 ino 0x140bb3 nr 0x2 nr_real 0x1 offset 0xd8 blockcount 0xe startblock 0x935e01 state 0 Here we see that a delalloc extent was inserted into the wrong position in the iext leaf, same as all the other times. The extra trace data I collected are as follows: ino 0x140bb3 fork 0 oldoff 0xe6 oldlen 0x4 oldprealloc 0x6 isize 0xe6000 ino 0x140bb3 oldgotoff 0xea oldgotstart 0xfffffffffffffffe oldgotcount 0x0 oldgotstate 0 ino 0x140bb3 crapgotoff 0x0 crapgotstart 0x0 crapgotcount 0x0 crapgotstate 0 ino 0x140bb3 freshgotoff 0xd8 freshgotstart 0x935e01 freshgotcount 0xe freshgotstate 0 ino 0x140bb3 nowgotoff 0xe6 nowgotstart 0xffffffffe0007 nowgotcount 0xa nowgotstate 0 ino 0x140bb3 oldicurpos 1 oldleafnr 2 oldleaf 0xfffffc00f0609a00 ino 0x140bb3 crapicurpos 2 crapleafnr 2 crapleaf 0xfffffc00f0609a00 ino 0x140bb3 freshicurpos 1 freshleafnr 2 freshleaf 0xfffffc00f0609a00 ino 0x140bb3 newicurpos 1 newleafnr 3 newleaf 0xfffffc00f0609a00 The first line shows that xfs_bmapi_reserve_delalloc was called with whichfork=XFS_DATA_FORK, off=0xe6, len=0x4, prealloc=6. The second line ("oldgot") shows the contents of @got at the beginning of the call, which are the results of the first iext lookup in xfs_buffered_write_iomap_begin. Line 3 ("crapgot") is the result of duplicating the cursor at the start of the body of xfs_bmapi_reserve_delalloc and performing a fresh lookup at @off. Line 4 ("freshgot") is the result of a new xfs_iext_get_extent right before the call to xfs_bmap_add_extent_hole_delay. Totally garbage. Line 5 ("nowgot") is contents of @got after the xfs_bmap_add_extent_hole_delay call. Line 6 is the contents of @icur at the beginning fo the call. Lines 7-9 are the contents of the iext cursors at the point where the block mappings were sampled. I think @oldgot is a HOLESTARTBLOCK extent because the first lookup didn't find anything, so we filled in imap with "fake hole until the end". At the time of the first lookup, I suspect that there's only one 32-block unwritten extent in the mapping (hence oldicurpos==1) but by the time we get to recording crapgot, crapicurpos==2. Dave then added: Ok, that's much simpler to reason about, and implies the smoke is coming from xfs_buffered_write_iomap_begin() or xfs_bmapi_reserve_delalloc(). I suspect the former - it does a lot of stuff with the ILOCK_EXCL held..... .... including calling xfs_qm_dqattach_locked(). xfs_buffered_write_iomap_begin ILOCK_EXCL look up icur xfs_qm_dqattach_locked xfs_qm_dqattach_one xfs_qm_dqget_inode dquot cache miss xfs_iunlock(ip, XFS_ILOCK_EXCL); error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); xfs_ilock(ip, XFS_ILOCK_EXCL); .... xfs_bmapi_reserve_delalloc(icur) Yup, that's what is letting the magic smoke out - xfs_qm_dqattach_locked() can cycle the ILOCK. If that happens, we can pass a stale icur to xfs_bmapi_reserve_delalloc() and it all goes downhill from there. Back to Darrick now: So. Fix this by moving the dqattach_locked call up before we take the ILOCK, like all the other callers in that file. Fixes: a526c85c2236 ("xfs: move xfs_file_iomap_begin_delay around") # goes further back than this Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iomap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1bdd7afc10108..ab5512c0bcf7a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -968,6 +968,10 @@ xfs_buffered_write_iomap_begin( ASSERT(!XFS_IS_REALTIME_INODE(ip)); + error = xfs_qm_dqattach(ip); + if (error) + return error; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; @@ -1071,10 +1075,6 @@ xfs_buffered_write_iomap_begin( allocfork = XFS_COW_FORK; } - error = xfs_qm_dqattach_locked(ip, false); - if (error) - goto out_unlock; - if (eof && offset + count > XFS_ISIZE(ip)) { /* * Determine the initial size of the preallocation. -- GitLab From 2f1eb71ae86d20a528a8c6ebe929218ad01c2302 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Wed, 1 May 2024 11:41:04 -0700 Subject: [PATCH 2269/2290] xfs: wait iclog complete before tearing down AIL [ Upstream commit 1eb52a6a71981b80f9acbd915acd6a05a5037196 ] Fix uaf in xfs_trans_ail_delete during xlog force shutdown. In commit cd6f79d1fb32 ("xfs: run callbacks before waking waiters in xlog_state_shutdown_callbacks") changed the order of running callbacks and wait for iclog completion to avoid unmount path untimely destroy AIL. But which seems not enough to ensue this, adding mdelay in `xfs_buf_item_unpin` can prove that. The reproduction is as follows. To ensure destroy AIL safely, we should wait all xlog ioend workers done and sync the AIL. ================================================================== BUG: KASAN: use-after-free in xfs_trans_ail_delete+0x240/0x2a0 Read of size 8 at addr ffff888023169400 by task kworker/1:1H/43 CPU: 1 PID: 43 Comm: kworker/1:1H Tainted: G W 6.1.0-rc1-00002-gc28266863c4a #137 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: xfs-log/sda xlog_ioend_work Call Trace: dump_stack_lvl+0x4d/0x66 print_report+0x171/0x4a6 kasan_report+0xb3/0x130 xfs_trans_ail_delete+0x240/0x2a0 xfs_buf_item_done+0x7b/0xa0 xfs_buf_ioend+0x1e9/0x11f0 xfs_buf_item_unpin+0x4c8/0x860 xfs_trans_committed_bulk+0x4c2/0x7c0 xlog_cil_committed+0xab6/0xfb0 xlog_cil_process_committed+0x117/0x1e0 xlog_state_shutdown_callbacks+0x208/0x440 xlog_force_shutdown+0x1b3/0x3a0 xlog_ioend_work+0xef/0x1d0 process_one_work+0x6f9/0xf70 worker_thread+0x578/0xf30 kthread+0x28c/0x330 ret_from_fork+0x1f/0x30 Allocated by task 9606: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_kmalloc+0x7a/0x90 __kmalloc+0x59/0x140 kmem_alloc+0xb2/0x2f0 xfs_trans_ail_init+0x20/0x320 xfs_log_mount+0x37e/0x690 xfs_mountfs+0xe36/0x1b40 xfs_fs_fill_super+0xc5c/0x1a70 get_tree_bdev+0x3c5/0x6c0 vfs_get_tree+0x85/0x250 path_mount+0xec3/0x1830 do_mount+0xef/0x110 __x64_sys_mount+0x150/0x1f0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 9662: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 __kasan_slab_free+0x105/0x1a0 __kmem_cache_free+0x99/0x2d0 kvfree+0x3a/0x40 xfs_log_unmount+0x60/0xf0 xfs_unmountfs+0xf3/0x1d0 xfs_fs_put_super+0x78/0x300 generic_shutdown_super+0x151/0x400 kill_block_super+0x9a/0xe0 deactivate_locked_super+0x82/0xe0 deactivate_super+0x91/0xb0 cleanup_mnt+0x32a/0x4a0 task_work_run+0x15f/0x240 exit_to_user_mode_prepare+0x188/0x190 syscall_exit_to_user_mode+0x12/0x30 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The buggy address belongs to the object at ffff888023169400 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 0 bytes inside of 128-byte region [ffff888023169400, ffff888023169480) The buggy address belongs to the physical page: page:ffffea00008c5a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888023168f80 pfn:0x23168 head:ffffea00008c5a00 order:1 compound_mapcount:0 compound_pincount:0 flags: 0x1fffff80010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff) raw: 001fffff80010200 ffffea00006b3988 ffffea0000577a88 ffff88800f842ac0 raw: ffff888023168f80 0000000000150007 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888023169300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888023169380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888023169400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888023169480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888023169500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Disabling lock debugging due to kernel taint Fixes: cd6f79d1fb32 ("xfs: run callbacks before waking waiters in xlog_state_shutdown_callbacks") Signed-off-by: Guo Xuenan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f02a0dd522b3d..60b19f6d70776 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -886,6 +886,23 @@ xlog_force_iclog( return xlog_state_release_iclog(iclog->ic_log, iclog, NULL); } +/* + * Cycle all the iclogbuf locks to make sure all log IO completion + * is done before we tear down these buffers. + */ +static void +xlog_wait_iclog_completion(struct xlog *log) +{ + int i; + struct xlog_in_core *iclog = log->l_iclog; + + for (i = 0; i < log->l_iclog_bufs; i++) { + down(&iclog->ic_sema); + up(&iclog->ic_sema); + iclog = iclog->ic_next; + } +} + /* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions @@ -1111,6 +1128,14 @@ xfs_log_unmount( { xfs_log_clean(mp); + /* + * If shutdown has come from iclog IO context, the log + * cleaning will have been skipped and so we need to wait + * for the iclog to complete shutdown processing before we + * tear anything down. + */ + xlog_wait_iclog_completion(mp->m_log); + xfs_buftarg_drain(mp->m_ddev_targp); xfs_trans_ail_destroy(mp); @@ -2113,17 +2138,6 @@ xlog_dealloc_log( xlog_in_core_t *iclog, *next_iclog; int i; - /* - * Cycle all the iclogbuf locks to make sure all log IO completion - * is done before we tear down these buffers. - */ - iclog = log->l_iclog; - for (i = 0; i < log->l_iclog_bufs; i++) { - down(&iclog->ic_sema); - up(&iclog->ic_sema); - iclog = iclog->ic_next; - } - /* * Destroy the CIL after waiting for iclog IO completion because an * iclog EIO error will try to shut down the log, which accesses the -- GitLab From 0d889ae85fcf0cf9bc6e998fca2ec760e08e0b46 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Wed, 1 May 2024 11:41:05 -0700 Subject: [PATCH 2270/2290] xfs: fix super block buf log item UAF during force shutdown [ Upstream commit 575689fc0ffa6c4bb4e72fd18e31a6525a6124e0 ] xfs log io error will trigger xlog shut down, and end_io worker call xlog_state_shutdown_callbacks to unpin and release the buf log item. The race condition is that when there are some thread doing transaction commit and happened not to be intercepted by xlog_is_shutdown, then, these log item will be insert into CIL, when unpin and release these buf log item, UAF will occur. BTW, add delay before `xlog_cil_commit` can increase recurrence probability. The following call graph actually encountered this bad situation. fsstress io end worker kworker/0:1H-216 xlog_ioend_work ->xlog_force_shutdown ->xlog_state_shutdown_callbacks ->xlog_cil_process_committed ->xlog_cil_committed ->xfs_trans_committed_bulk ->xfs_trans_apply_sb_deltas ->li_ops->iop_unpin(lip, 1); ->xfs_trans_getsb ->_xfs_trans_bjoin ->xfs_buf_item_init ->if (bip) { return 0;} //relog ->xlog_cil_commit ->xlog_cil_insert_items //insert into CIL ->xfs_buf_ioend_fail(bp); ->xfs_buf_ioend ->xfs_buf_item_done ->xfs_buf_item_relse ->xfs_buf_item_free when cil push worker gather percpu cil and insert super block buf log item into ctx->log_items then uaf occurs. ================================================================== BUG: KASAN: use-after-free in xlog_cil_push_work+0x1c8f/0x22f0 Write of size 8 at addr ffff88801800f3f0 by task kworker/u4:4/105 CPU: 0 PID: 105 Comm: kworker/u4:4 Tainted: G W 6.1.0-rc1-00001-g274115149b42 #136 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: xfs-cil/sda xlog_cil_push_work Call Trace: dump_stack_lvl+0x4d/0x66 print_report+0x171/0x4a6 kasan_report+0xb3/0x130 xlog_cil_push_work+0x1c8f/0x22f0 process_one_work+0x6f9/0xf70 worker_thread+0x578/0xf30 kthread+0x28c/0x330 ret_from_fork+0x1f/0x30 Allocated by task 2145: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_slab_alloc+0x54/0x60 kmem_cache_alloc+0x14a/0x510 xfs_buf_item_init+0x160/0x6d0 _xfs_trans_bjoin+0x7f/0x2e0 xfs_trans_getsb+0xb6/0x3f0 xfs_trans_apply_sb_deltas+0x1f/0x8c0 __xfs_trans_commit+0xa25/0xe10 xfs_symlink+0xe23/0x1660 xfs_vn_symlink+0x157/0x280 vfs_symlink+0x491/0x790 do_symlinkat+0x128/0x220 __x64_sys_symlink+0x7a/0x90 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 216: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 __kasan_slab_free+0x105/0x1a0 kmem_cache_free+0xb6/0x460 xfs_buf_ioend+0x1e9/0x11f0 xfs_buf_item_unpin+0x3d6/0x840 xfs_trans_committed_bulk+0x4c2/0x7c0 xlog_cil_committed+0xab6/0xfb0 xlog_cil_process_committed+0x117/0x1e0 xlog_state_shutdown_callbacks+0x208/0x440 xlog_force_shutdown+0x1b3/0x3a0 xlog_ioend_work+0xef/0x1d0 process_one_work+0x6f9/0xf70 worker_thread+0x578/0xf30 kthread+0x28c/0x330 ret_from_fork+0x1f/0x30 The buggy address belongs to the object at ffff88801800f388 which belongs to the cache xfs_buf_item of size 272 The buggy address is located 104 bytes inside of 272-byte region [ffff88801800f388, ffff88801800f498) The buggy address belongs to the physical page: page:ffffea0000600380 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88801800f208 pfn:0x1800e head:ffffea0000600380 order:1 compound_mapcount:0 compound_pincount:0 flags: 0x1fffff80010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff) raw: 001fffff80010200 ffffea0000699788 ffff88801319db50 ffff88800fb50640 raw: ffff88801800f208 000000000015000a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88801800f280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801800f300: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88801800f380: fc fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88801800f400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801800f480: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Disabling lock debugging due to kernel taint Signed-off-by: Guo Xuenan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 522d450a94b18..df7322ed73fa9 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1018,6 +1018,8 @@ xfs_buf_item_relse( trace_xfs_buf_item_relse(bp, _RET_IP_); ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + if (atomic_read(&bip->bli_refcount)) + return; bp->b_log_item = NULL; xfs_buf_rele(bp); xfs_buf_item_free(bip); -- GitLab From 131a854c092f0963731c4d29d93a2ce6c75bdbc7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 May 2024 11:41:06 -0700 Subject: [PATCH 2271/2290] xfs: hoist refcount record merge predicates [ Upstream commit 9d720a5a658f5135861773f26e927449bef93d61 ] Hoist these multiline conditionals into separate static inline helpers to improve readability and set the stage for corruption fixes that will be introduced in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Xiao Yang Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_refcount.c | 129 ++++++++++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 16 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 3f34bafe18dd1..4408893333a6c 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -815,11 +815,119 @@ out_error: /* Is this extent valid? */ static inline bool xfs_refc_valid( - struct xfs_refcount_irec *rc) + const struct xfs_refcount_irec *rc) { return rc->rc_startblock != NULLAGBLOCK; } +static inline bool +xfs_refc_want_merge_center( + const struct xfs_refcount_irec *left, + const struct xfs_refcount_irec *cleft, + const struct xfs_refcount_irec *cright, + const struct xfs_refcount_irec *right, + bool cleft_is_cright, + enum xfs_refc_adjust_op adjust, + unsigned long long *ulenp) +{ + unsigned long long ulen = left->rc_blockcount; + + /* + * To merge with a center record, both shoulder records must be + * adjacent to the record we want to adjust. This is only true if + * find_left and find_right made all four records valid. + */ + if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || + !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) + return false; + + /* There must only be one record for the entire range. */ + if (!cleft_is_cright) + return false; + + /* The shoulder record refcounts must match the new refcount. */ + if (left->rc_refcount != cleft->rc_refcount + adjust) + return false; + if (right->rc_refcount != cleft->rc_refcount + adjust) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cleft->rc_blockcount + right->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + *ulenp = ulen; + return true; +} + +static inline bool +xfs_refc_want_merge_left( + const struct xfs_refcount_irec *left, + const struct xfs_refcount_irec *cleft, + enum xfs_refc_adjust_op adjust) +{ + unsigned long long ulen = left->rc_blockcount; + + /* + * For a left merge, the left shoulder record must be adjacent to the + * start of the range. If this is true, find_left made left and cleft + * contain valid contents. + */ + if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) + return false; + + /* Left shoulder record refcount must match the new refcount. */ + if (left->rc_refcount != cleft->rc_refcount + adjust) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cleft->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + return true; +} + +static inline bool +xfs_refc_want_merge_right( + const struct xfs_refcount_irec *cright, + const struct xfs_refcount_irec *right, + enum xfs_refc_adjust_op adjust) +{ + unsigned long long ulen = right->rc_blockcount; + + /* + * For a right merge, the right shoulder record must be adjacent to the + * end of the range. If this is true, find_right made cright and right + * contain valid contents. + */ + if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) + return false; + + /* Right shoulder record refcount must match the new refcount. */ + if (right->rc_refcount != cright->rc_refcount + adjust) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cright->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + return true; +} + /* * Try to merge with any extents on the boundaries of the adjustment range. */ @@ -861,23 +969,15 @@ xfs_refcount_merge_extents( (cleft.rc_blockcount == cright.rc_blockcount); /* Try to merge left, cleft, and right. cleft must == cright. */ - ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + - right.rc_blockcount; - if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && - xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && - left.rc_refcount == cleft.rc_refcount + adjust && - right.rc_refcount == cleft.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, + adjust, &ulen)) { *shape_changed = true; return xfs_refcount_merge_center_extents(cur, &left, &cleft, &right, ulen, aglen); } /* Try to merge left and cleft. */ - ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; - if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && - left.rc_refcount == cleft.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { *shape_changed = true; error = xfs_refcount_merge_left_extent(cur, &left, &cleft, agbno, aglen); @@ -893,10 +993,7 @@ xfs_refcount_merge_extents( } /* Try to merge cright and right. */ - ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; - if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && - right.rc_refcount == cright.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_right(&cright, &right, adjust)) { *shape_changed = true; return xfs_refcount_merge_right_extent(cur, &right, &cright, aglen); -- GitLab From 537baedb3e8141299cd7e9d6444e71bba87a8376 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 May 2024 11:41:07 -0700 Subject: [PATCH 2272/2290] xfs: estimate post-merge refcounts correctly [ Upstream commit b25d1984aa884fc91a73a5a407b9ac976d441e9b ] Upon enabling fsdax + reflink for XFS, xfs/179 began to report refcount metadata corruptions after being run. Specifically, xfs_repair noticed single-block refcount records that could be combined but had not been. The root cause of this is improper MAXREFCOUNT edge case handling in xfs_refcount_merge_extents. When we're trying to find candidates for a refcount btree record merge, we compute the refcount attribute of the merged record, but we fail to account for the fact that once a record hits rc_refcount == MAXREFCOUNT, it is pinned that way forever. Hence the computed refcount is wrong, and we fail to merge the extents. Fix this by adjusting the merge predicates to compute the adjusted refcount correctly. Fixes: 3172725814f9 ("xfs: adjust refcount of an extent of blocks in refcount btree") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Xiao Yang Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_refcount.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 4408893333a6c..6f7ed9288fe40 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -820,6 +820,17 @@ xfs_refc_valid( return rc->rc_startblock != NULLAGBLOCK; } +static inline xfs_nlink_t +xfs_refc_merge_refcount( + const struct xfs_refcount_irec *irec, + enum xfs_refc_adjust_op adjust) +{ + /* Once a record hits MAXREFCOUNT, it is pinned there forever */ + if (irec->rc_refcount == MAXREFCOUNT) + return MAXREFCOUNT; + return irec->rc_refcount + adjust; +} + static inline bool xfs_refc_want_merge_center( const struct xfs_refcount_irec *left, @@ -831,6 +842,7 @@ xfs_refc_want_merge_center( unsigned long long *ulenp) { unsigned long long ulen = left->rc_blockcount; + xfs_nlink_t new_refcount; /* * To merge with a center record, both shoulder records must be @@ -846,9 +858,10 @@ xfs_refc_want_merge_center( return false; /* The shoulder record refcounts must match the new refcount. */ - if (left->rc_refcount != cleft->rc_refcount + adjust) + new_refcount = xfs_refc_merge_refcount(cleft, adjust); + if (left->rc_refcount != new_refcount) return false; - if (right->rc_refcount != cleft->rc_refcount + adjust) + if (right->rc_refcount != new_refcount) return false; /* @@ -871,6 +884,7 @@ xfs_refc_want_merge_left( enum xfs_refc_adjust_op adjust) { unsigned long long ulen = left->rc_blockcount; + xfs_nlink_t new_refcount; /* * For a left merge, the left shoulder record must be adjacent to the @@ -881,7 +895,8 @@ xfs_refc_want_merge_left( return false; /* Left shoulder record refcount must match the new refcount. */ - if (left->rc_refcount != cleft->rc_refcount + adjust) + new_refcount = xfs_refc_merge_refcount(cleft, adjust); + if (left->rc_refcount != new_refcount) return false; /* @@ -903,6 +918,7 @@ xfs_refc_want_merge_right( enum xfs_refc_adjust_op adjust) { unsigned long long ulen = right->rc_blockcount; + xfs_nlink_t new_refcount; /* * For a right merge, the right shoulder record must be adjacent to the @@ -913,7 +929,8 @@ xfs_refc_want_merge_right( return false; /* Right shoulder record refcount must match the new refcount. */ - if (right->rc_refcount != cright->rc_refcount + adjust) + new_refcount = xfs_refc_merge_refcount(cright, adjust); + if (right->rc_refcount != new_refcount) return false; /* -- GitLab From 2cc027623e73738a3b7a286327719ab2d65cd4cd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 May 2024 11:41:08 -0700 Subject: [PATCH 2273/2290] xfs: invalidate xfs_bufs when allocating cow extents [ Upstream commit ddfdd530e43fcb3f7a0a69966e5f6c33497b4ae3 ] While investigating test failures in xfs/17[1-3] in alwayscow mode, I noticed through code inspection that xfs_bmap_alloc_userdata isn't setting XFS_ALLOC_USERDATA when allocating extents for a file's CoW fork. COW staging extents should be flagged as USERDATA, since user data are persisted to these blocks before being remapped into a file. This mis-classification has a few impacts on the behavior of the system. First, the filestreams allocator is supposed to keep allocating from a chosen AG until it runs out of space in that AG. However, it only does that for USERDATA allocations, which means that COW allocations aren't tied to the filestreams AG. Fortunately, few people use filestreams, so nobody's noticed. A more serious problem is that xfs_alloc_ag_vextent_small looks for a buffer to invalidate *if* the USERDATA flag is set and the AG is so full that the allocation had to come from the AGFL because the cntbt is empty. The consequences of not invalidating the buffer are severe -- if the AIL incorrectly checkpoints a buffer that is now being used to store user data, that action will clobber the user's written data. Fix filestreams and yet another data corruption vector by flagging COW allocations as USERDATA. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 56b9b7db38bbd..0d56a8d862e80 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4058,7 +4058,7 @@ xfs_bmap_alloc_userdata( * the busy list. */ bma->datatype = XFS_ALLOC_NOBUSY; - if (whichfork == XFS_DATA_FORK) { + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { bma->datatype |= XFS_ALLOC_USERDATA; if (bma->offset == 0) bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; -- GitLab From 4db0e08ef9aceee6947ebdb387439b5600e7564f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 May 2024 11:41:09 -0700 Subject: [PATCH 2274/2290] xfs: allow inode inactivation during a ro mount log recovery [ Upstream commit 76e589013fec672c3587d6314f2d1f0aeddc26d9 ] In the next patch, we're going to prohibit log recovery if the primary superblock contains an unrecognized rocompat feature bit even on readonly mounts. This requires removing all the code in the log mounting process that temporarily disables the readonly state. Unfortunately, inode inactivation disables itself on readonly mounts. Clearing the iunlinked lists after log recovery needs inactivation to run to free the unreferenced inodes, which (AFAICT) is the only reason why log mounting plays games with the readonly state in the first place. Therefore, change the inactivation predicates to allow inactivation during log recovery of a readonly mount. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d354ea2b74f96..54b707787f907 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1652,8 +1652,11 @@ xfs_inode_needs_inactive( if (VFS_I(ip)->i_mode == 0) return false; - /* If this is a read-only mount, don't do this (would generate I/O) */ - if (xfs_is_readonly(mp)) + /* + * If this is a read-only mount, don't do this (would generate I/O) + * unless we're in log recovery and cleaning the iunlinked list. + */ + if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) return false; /* If the log isn't running, push inodes straight to reclaim. */ @@ -1713,8 +1716,11 @@ xfs_inactive( mp = ip->i_mount; ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); - /* If this is a read-only mount, don't do this (would generate I/O) */ - if (xfs_is_readonly(mp)) + /* + * If this is a read-only mount, don't do this (would generate I/O) + * unless we're in log recovery and cleaning the iunlinked list. + */ + if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) goto out; /* Metadata inodes require explicit resource cleanup. */ -- GitLab From 7430ff84c2e68388a5f4c6a04e93d32d943c374d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 May 2024 11:41:10 -0700 Subject: [PATCH 2275/2290] xfs: fix log recovery when unknown rocompat bits are set [ Upstream commit 74ad4693b6473950e971b3dc525b5ee7570e05d0 ] Log recovery has always run on read only mounts, even where the primary superblock advertises unknown rocompat bits. Due to a misunderstanding between Eric and Darrick back in 2018, we accidentally changed the superblock write verifier to shutdown the fs over that exact scenario. As a result, the log cleaning that occurs at the end of the mounting process fails if there are unknown rocompat bits set. As we now allow writing of the superblock if there are unknown rocompat bits set on a RO mount, we no longer want to turn off RO state to allow log recovery to succeed on a RO mount. Hence we also remove all the (now unnecessary) RO state toggling from the log recovery path. Fixes: 9e037cb7972f ("xfs: check for unknown v5 feature bits in superblock write verifier" Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_sb.c | 3 ++- fs/xfs/xfs_log.c | 17 ----------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 28c464307817d..bf2cca78304eb 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -266,7 +266,8 @@ xfs_validate_sb_write( return -EFSCORRUPTED; } - if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + if (!xfs_is_readonly(mp) && + xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, "Corruption detected in superblock read-only compatible features (0x%x)!", (sbp->sb_features_ro_compat & diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 60b19f6d70776..d9aa5eab02c3f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -730,15 +730,7 @@ xfs_log_mount( * just worked. */ if (!xfs_has_norecovery(mp)) { - /* - * log recovery ignores readonly state and so we need to clear - * mount-based read only state so it can write to disk. - */ - bool readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, - &mp->m_opstate); error = xlog_recover(log); - if (readonly) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); if (error) { xfs_warn(mp, "log mount/recovery failed: error %d", error); @@ -787,7 +779,6 @@ xfs_log_mount_finish( struct xfs_mount *mp) { struct xlog *log = mp->m_log; - bool readonly; int error = 0; if (xfs_has_norecovery(mp)) { @@ -795,12 +786,6 @@ xfs_log_mount_finish( return 0; } - /* - * log recovery ignores readonly state and so we need to clear - * mount-based read only state so it can write to disk. - */ - readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - /* * During the second phase of log recovery, we need iget and * iput to behave like they do for an active filesystem. @@ -850,8 +835,6 @@ xfs_log_mount_finish( xfs_buftarg_drain(mp->m_ddev_targp); clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); - if (readonly) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); /* Make sure the log is dead if we're returning failure. */ ASSERT(!error || xlog_is_shutdown(log)); -- GitLab From fbdf080691bb9db8875689620da2e7647b284f2a Mon Sep 17 00:00:00 2001 From: Hironori Shiina Date: Wed, 1 May 2024 11:41:11 -0700 Subject: [PATCH 2276/2290] xfs: get root inode correctly at bulkstat [ Upstream commit 817644fa4525258992f17fecf4f1d6cdd2e1b731 ] The root inode number should be set to `breq->startino` for getting stat information of the root when XFS_BULK_IREQ_SPECIAL_ROOT is used. Otherwise, the inode search is started from 1 (XFS_BULK_IREQ_SPECIAL_ROOT) and the inode with the lowest number in a filesystem is returned. Fixes: bf3cb3944792 ("xfs: allow single bulkstat of special inodes") Signed-off-by: Hironori Shiina Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1f783e9796296..85fbb3b71d1c6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -754,7 +754,7 @@ xfs_bulkstat_fmt( static int xfs_bulk_ireq_setup( struct xfs_mount *mp, - struct xfs_bulk_ireq *hdr, + const struct xfs_bulk_ireq *hdr, struct xfs_ibulk *breq, void __user *ubuffer) { @@ -780,7 +780,7 @@ xfs_bulk_ireq_setup( switch (hdr->ino) { case XFS_BULK_IREQ_SPECIAL_ROOT: - hdr->ino = mp->m_sb.sb_rootino; + breq->startino = mp->m_sb.sb_rootino; break; default: return -EINVAL; -- GitLab From d9a85a8d82d002371eb2fdbd2c74cfdf491d394d Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 1 May 2024 11:41:12 -0700 Subject: [PATCH 2277/2290] xfs: short circuit xfs_growfs_data_private() if delta is zero [ Upstream commit 84712492e6dab803bf595fb8494d11098b74a652 ] Although xfs_growfs_data() doesn't call xfs_growfs_data_private() if in->newblocks == mp->m_sb.sb_dblocks, xfs_growfs_data_private() further massages the new block count so that we don't i.e. try to create a too-small new AG. This may lead to a delta of "0" in xfs_growfs_data_private(), so we end up in the shrink case and emit the EXPERIMENTAL warning even if we're not changing anything at all. Fix this by returning straightaway if the block delta is zero. (nb: in older kernels, the result of entering the shrink case with delta == 0 may actually let an -ENOSPC escape to userspace, which is confusing for users.) Fixes: fb2fc1720185 ("xfs: support shrinking unused space in the last AG") Signed-off-by: Eric Sandeen Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_fsops.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 13851c0d640bc..332da0d7b85cf 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -129,6 +129,10 @@ xfs_growfs_data_private( if (delta < 0 && nagcount < 2) return -EINVAL; + /* No work to do */ + if (delta == 0) + return 0; + oagcount = mp->m_sb.sb_agcount; /* allocate the new per-ag structures */ if (nagcount > oagcount) { -- GitLab From a94cf7660402c1c3a76e832f72fc3fb5f0fbbb48 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Nov 2022 12:54:24 +0000 Subject: [PATCH 2278/2290] arm64: atomics: lse: remove stale dependency on JUMP_LABEL commit 657eef0a5420a02c02945ed8c87f2ddcbd255772 upstream. Currently CONFIG_ARM64_USE_LSE_ATOMICS depends upon CONFIG_JUMP_LABEL, as the inline atomics were indirected with a static branch. However, since commit: 21fb26bfb01ffe0d ("arm64: alternatives: add alternative_has_feature_*()") ... we use an alternative_branch (which is always available) rather than a static branch, and hence the dependency is unnecessary. Remove the stale dependency, along with the stale include. This will allow the use of LSE atomics in kernels built with CONFIG_JUMP_LABEL=n, and reduces the risk of circular header dependencies via . Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20221114125424.2998268-1-mark.rutland@arm.com Signed-off-by: Will Deacon Signed-off-by: Oleksandr Tymoshenko Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/lse.h | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c15f71501c6c2..044b98a62f7bb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1752,7 +1752,6 @@ config ARM64_LSE_ATOMICS config ARM64_USE_LSE_ATOMICS bool "Atomic instructions" - depends on JUMP_LABEL default y help As part of the Large System Extensions, ARMv8.1 introduces new diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index c503db8e73b01..f99d74826a7ef 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -10,7 +10,6 @@ #include #include -#include #include #include #include -- GitLab From 92cb363d16ac1e41c9764cdb513d0e89a6ff4915 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 26 Dec 2023 15:32:19 +0530 Subject: [PATCH 2279/2290] drm/amdgpu: Fix possible NULL dereference in amdgpu_ras_query_error_status_helper() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b8d55a90fd55b767c25687747e2b24abd1ef8680 upstream. Return invalid error code -EINVAL for invalid block id. Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:1183 amdgpu_ras_query_error_status_helper() error: we previously assumed 'info' could be null (see line 1176) Suggested-by: Hawking Zhang Cc: Tao Zhou Cc: Hawking Zhang Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher [Ajay: applied AMDGPU_RAS_BLOCK_COUNT condition to amdgpu_ras_query_error_status() as amdgpu_ras_query_error_status_helper() not present in v6.6, v6.1 amdgpu_ras_query_error_status_helper() was introduced in 8cc0f5669eb6] Signed-off-by: Ajay Kaher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9fe2eae88ec17..ee83d282b49a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -974,6 +974,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, if (!obj) return -EINVAL; + if (!info || info->head.block == AMDGPU_RAS_BLOCK_COUNT) + return -EINVAL; + if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { amdgpu_ras_get_ecc_info(adev, &err_data); } else { -- GitLab From e78531e8cacec34f94eb65d1f466eb1e25408cb7 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Sun, 21 Apr 2024 17:37:49 +0000 Subject: [PATCH 2280/2290] binder: fix max_thread type inconsistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 42316941335644a98335f209daafa4c122f28983 upstream. The type defined for the BINDER_SET_MAX_THREADS ioctl was changed from size_t to __u32 in order to avoid incompatibility issues between 32 and 64-bit kernels. However, the internal types used to copy from user and store the value were never updated. Use u32 to fix the inconsistency. Fixes: a9350fc859ae ("staging: android: binder: fix BINDER_SET_MAX_THREADS declaration") Reported-by: Arve Hjønnevåg Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20240421173750.3117808-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- drivers/android/binder_internal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8c2b7c074eca1..46111f8c12e61 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5350,7 +5350,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto err; break; case BINDER_SET_MAX_THREADS: { - int max_threads; + u32 max_threads; if (copy_from_user(&max_threads, ubuf, sizeof(max_threads))) { diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index abe19d88c6ecc..c2c1bb3c1e60b 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -420,7 +420,7 @@ struct binder_proc { struct list_head todo; struct binder_stats stats; struct list_head delivered_death; - int max_threads; + u32 max_threads; int requested_threads; int requested_threads_started; int tmp_ref; -- GitLab From 341eb08dbca9eae05308c442fbfab1813a44c97a Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Thu, 2 May 2024 10:11:03 +0530 Subject: [PATCH 2281/2290] usb: dwc3: Wait unconditionally after issuing EndXfer command commit 1d26ba0944d398f88aaf997bda3544646cf21945 upstream. Currently all controller IP/revisions except DWC3_usb3 >= 310a wait 1ms unconditionally for ENDXFER completion when IOC is not set. This is because DWC_usb3 controller revisions >= 3.10a supports GUCTL2[14: Rst_actbitlater] bit which allows polling CMDACT bit to know whether ENDXFER command is completed. Consider a case where an IN request was queued, and parallelly soft_disconnect was called (due to ffs_epfile_release). This eventually calls stop_active_transfer with IOC cleared, hence send_gadget_ep_cmd() skips waiting for CMDACT cleared during EndXfer. For DWC3 controllers with revisions >= 310a, we don't forcefully wait for 1ms either, and we proceed by unmapping the requests. If ENDXFER didn't complete by this time, it leads to SMMU faults since the controller would still be accessing those requests. Fix this by ensuring ENDXFER completion by adding 1ms delay in __dwc3_stop_active_transfer() unconditionally. Cc: stable@vger.kernel.org Fixes: b353eb6dc285 ("usb: dwc3: gadget: Skip waiting for CMDACT cleared during endxfer") Signed-off-by: Prashanth K Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240502044103.1066350-1-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 2d7ac92ce9b84..c72c6f8ec2c88 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1708,7 +1708,6 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc) */ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) { - struct dwc3 *dwc = dep->dwc; struct dwc3_gadget_ep_cmd_params params; u32 cmd; int ret; @@ -1733,8 +1732,7 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int dep->resource_index = 0; if (!interrupt) { - if (!DWC3_IP_IS(DWC3) || DWC3_VER_IS_PRIOR(DWC3, 310A)) - mdelay(1); + mdelay(1); dep->flags &= ~DWC3_EP_TRANSFER_STARTED; } else if (!ret) { dep->flags |= DWC3_EP_END_TRANSFER_PENDING; -- GitLab From 17466488ae33ff048a51db9e3e6a4f9ba31ae6a4 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Fri, 10 May 2024 11:08:28 +0200 Subject: [PATCH 2282/2290] net: usb: ax88179_178a: fix link status when link is set to down/up commit ecf848eb934b03959918f5269f64c0e52bc23998 upstream. The idea was to keep only one reset at initialization stage in order to reduce the total delay, or the reset from usbnet_probe or the reset from usbnet_open. I have seen that restarting from usbnet_probe is necessary to avoid doing too complex things. But when the link is set to down/up (for example to configure a different mac address) the link is not correctly recovered unless a reset is commanded from usbnet_open. So, detect the initialization stage (first call) to not reset from usbnet_open after the reset from usbnet_probe and after this stage, always reset from usbnet_open too (when the link needs to be rechecked). Apply to all the possible devices, the behavior now is going to be the same. cc: stable@vger.kernel.org # 6.6+ Fixes: 56f78615bcb1 ("net: usb: ax88179_178a: avoid writing the mac address before first reading") Reported-by: Isaac Ganoung Reported-by: Yongqin Liu Signed-off-by: Jose Ignacio Tornos Martinez Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240510090846.328201-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 37 ++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 21b6c4d94a632..6d31061818e93 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -174,6 +174,7 @@ struct ax88179_data { u32 wol_supported; u32 wolopts; u8 disconnecting; + u8 initialized; }; struct ax88179_int_data { @@ -1673,6 +1674,18 @@ static int ax88179_reset(struct usbnet *dev) return 0; } +static int ax88179_net_reset(struct usbnet *dev) +{ + struct ax88179_data *ax179_data = dev->driver_priv; + + if (ax179_data->initialized) + ax88179_reset(dev); + else + ax179_data->initialized = 1; + + return 0; +} + static int ax88179_stop(struct usbnet *dev) { u16 tmp16; @@ -1692,6 +1705,7 @@ static const struct driver_info ax88179_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1704,6 +1718,7 @@ static const struct driver_info ax88178a_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1716,7 +1731,7 @@ static const struct driver_info cypress_GX3_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1729,7 +1744,7 @@ static const struct driver_info dlink_dub1312_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1742,7 +1757,7 @@ static const struct driver_info sitecom_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1755,7 +1770,7 @@ static const struct driver_info samsung_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1768,7 +1783,7 @@ static const struct driver_info lenovo_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1781,7 +1796,7 @@ static const struct driver_info belkin_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1794,7 +1809,7 @@ static const struct driver_info toshiba_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1807,7 +1822,7 @@ static const struct driver_info mct_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1820,7 +1835,7 @@ static const struct driver_info at_umc2000_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1833,7 +1848,7 @@ static const struct driver_info at_umc200_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1846,7 +1861,7 @@ static const struct driver_info at_umc2000sp_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, -- GitLab From f099b8127d634ac1a163792cf13db4311a072088 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 7 May 2024 16:43:16 +0300 Subject: [PATCH 2283/2290] usb: typec: ucsi: displayport: Fix potential deadlock commit b791a67f68121d69108640d4a3e591d210ffe850 upstream. The function ucsi_displayport_work() does not access the connector, so it also must not acquire the connector lock. This fixes a potential deadlock scenario: ucsi_displayport_work() -> lock(&con->lock) typec_altmode_vdm() dp_altmode_vdm() dp_altmode_work() typec_altmode_enter() ucsi_displayport_enter() -> lock(&con->lock) Reported-by: Mathias Nyman Fixes: af8622f6a585 ("usb: typec: ucsi: Support for DisplayPort alt mode") Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240507134316.161999-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 73cd5bf350472..2431febc46151 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -275,8 +275,6 @@ static void ucsi_displayport_work(struct work_struct *work) struct ucsi_dp *dp = container_of(work, struct ucsi_dp, work); int ret; - mutex_lock(&dp->con->lock); - ret = typec_altmode_vdm(dp->alt, dp->header, dp->vdo_data, dp->vdo_size); if (ret) @@ -285,8 +283,6 @@ static void ucsi_displayport_work(struct work_struct *work) dp->vdo_data = NULL; dp->vdo_size = 0; dp->header = 0; - - mutex_unlock(&dp->con->lock); } void ucsi_displayport_remove_partner(struct typec_altmode *alt) -- GitLab From 3f4be9dbef59dbe74f1b5ab34d8992ed558ac755 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 29 Apr 2024 15:35:58 +0200 Subject: [PATCH 2284/2290] usb: typec: tipd: fix event checking for tps6598x commit 409c1cfb5a803f3cf2d17aeaf75c25c4be951b07 upstream. The current interrupt service routine of the tps6598x only reads the first 64 bits of the INT_EVENT1 and INT_EVENT2 registers, which means that any event above that range will be ignored, leaving interrupts unattended. Moreover, those events will not be cleared, and the device will keep the interrupt enabled. This issue has been observed while attempting to load patches, and the 'ReadyForPatch' field (bit 81) of INT_EVENT1 was set. Given that older versions of the tps6598x (1, 2 and 6) provide 8-byte registers, a mechanism based on the upper byte of the version register (0x0F) has been included. The manufacturer has confirmed [1] that this byte is always 0 for older versions, and either 0xF7 (DH parts) or 0xF9 (DK parts) is returned in newer versions (7 and 8). Read the complete INT_EVENT registers to handle all interrupts generated by the device and account for the hardware version to select the register size. Link: https://e2e.ti.com/support/power-management-group/power-management/f/power-management-forum/1346521/tps65987d-register-command-to-distinguish-between-tps6591-2-6-and-tps65987-8 [1] Fixes: 0a4c005bd171 ("usb: typec: driver for TI TPS6598x USB Power Delivery controllers") Cc: stable@vger.kernel.org Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20240429-tps6598x_fix_event_handling-v3-2-4e8e58dce489@wolfvision.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 45 ++++++++++++++++++++++--------- drivers/usb/typec/tipd/tps6598x.h | 11 ++++++++ 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 195c9c16f817f..e804db927d5cf 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -24,6 +24,7 @@ #define TPS_REG_MODE 0x03 #define TPS_REG_CMD1 0x08 #define TPS_REG_DATA1 0x09 +#define TPS_REG_VERSION 0x0F #define TPS_REG_INT_EVENT1 0x14 #define TPS_REG_INT_EVENT2 0x15 #define TPS_REG_INT_MASK1 0x16 @@ -518,49 +519,67 @@ err_unlock: static irqreturn_t tps6598x_interrupt(int irq, void *data) { + int intev_len = TPS_65981_2_6_INTEVENT_LEN; struct tps6598x *tps = data; - u64 event1 = 0; - u64 event2 = 0; + u64 event1[2] = { }; + u64 event2[2] = { }; + u32 version; u32 status; int ret; mutex_lock(&tps->lock); - ret = tps6598x_read64(tps, TPS_REG_INT_EVENT1, &event1); - ret |= tps6598x_read64(tps, TPS_REG_INT_EVENT2, &event2); + ret = tps6598x_read32(tps, TPS_REG_VERSION, &version); + if (ret) + dev_warn(tps->dev, "%s: failed to read version (%d)\n", + __func__, ret); + + if (TPS_VERSION_HW_VERSION(version) == TPS_VERSION_HW_65987_8_DH || + TPS_VERSION_HW_VERSION(version) == TPS_VERSION_HW_65987_8_DK) + intev_len = TPS_65987_8_INTEVENT_LEN; + + ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT1, event1, intev_len); + + ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT1, event1, intev_len); if (ret) { - dev_err(tps->dev, "%s: failed to read events\n", __func__); + dev_err(tps->dev, "%s: failed to read event1\n", __func__); goto err_unlock; } - trace_tps6598x_irq(event1, event2); + ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT2, event2, intev_len); + if (ret) { + dev_err(tps->dev, "%s: failed to read event2\n", __func__); + goto err_unlock; + } + trace_tps6598x_irq(event1[0], event2[0]); - if (!(event1 | event2)) + if (!(event1[0] | event1[1] | event2[0] | event2[1])) goto err_unlock; if (!tps6598x_read_status(tps, &status)) goto err_clear_ints; - if ((event1 | event2) & TPS_REG_INT_POWER_STATUS_UPDATE) + if ((event1[0] | event2[0]) & TPS_REG_INT_POWER_STATUS_UPDATE) if (!tps6598x_read_power_status(tps)) goto err_clear_ints; - if ((event1 | event2) & TPS_REG_INT_DATA_STATUS_UPDATE) + if ((event1[0] | event2[0]) & TPS_REG_INT_DATA_STATUS_UPDATE) if (!tps6598x_read_data_status(tps)) goto err_clear_ints; /* Handle plug insert or removal */ - if ((event1 | event2) & TPS_REG_INT_PLUG_EVENT) + if ((event1[0] | event2[0]) & TPS_REG_INT_PLUG_EVENT) tps6598x_handle_plug_event(tps, status); err_clear_ints: - tps6598x_write64(tps, TPS_REG_INT_CLEAR1, event1); - tps6598x_write64(tps, TPS_REG_INT_CLEAR2, event2); + tps6598x_block_write(tps, TPS_REG_INT_CLEAR1, event1, intev_len); + tps6598x_block_write(tps, TPS_REG_INT_CLEAR2, event2, intev_len); err_unlock: mutex_unlock(&tps->lock); - if (event1 | event2) + if (event1[0] | event1[1] | event2[0] | event2[1]) return IRQ_HANDLED; + return IRQ_NONE; } diff --git a/drivers/usb/typec/tipd/tps6598x.h b/drivers/usb/typec/tipd/tps6598x.h index 527857549d699..1fc3cc8ad199a 100644 --- a/drivers/usb/typec/tipd/tps6598x.h +++ b/drivers/usb/typec/tipd/tps6598x.h @@ -199,4 +199,15 @@ #define TPS_DATA_STATUS_DP_SPEC_PIN_ASSIGNMENT_A BIT(2) #define TPS_DATA_STATUS_DP_SPEC_PIN_ASSIGNMENT_B (BIT(2) | BIT(1)) +/* Version Register */ +#define TPS_VERSION_HW_VERSION_MASK GENMASK(31, 24) +#define TPS_VERSION_HW_VERSION(x) TPS_FIELD_GET(TPS_VERSION_HW_VERSION_MASK, (x)) +#define TPS_VERSION_HW_65981_2_6 0x00 +#define TPS_VERSION_HW_65987_8_DH 0xF7 +#define TPS_VERSION_HW_65987_8_DK 0xF9 + +/* Int Event Register length */ +#define TPS_65981_2_6_INTEVENT_LEN 8 +#define TPS_65987_8_INTEVENT_LEN 11 + #endif /* __TPS6598X_H__ */ -- GitLab From a6b9c5de4a31502083c7967f47ec21c2c580973c Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Wed, 24 Apr 2024 15:21:41 +0100 Subject: [PATCH 2285/2290] serial: kgdboc: Fix NMI-safety problems from keyboard reset code commit b2aba15ad6f908d1a620fd97f6af5620c3639742 upstream. Currently, when kdb is compiled with keyboard support, then we will use schedule_work() to provoke reset of the keyboard status. Unfortunately schedule_work() gets called from the kgdboc post-debug-exception handler. That risks deadlock since schedule_work() is not NMI-safe and, even on platforms where the NMI is not directly used for debugging, the debug trap can have NMI-like behaviour depending on where breakpoints are placed. Fix this by using the irq work system, which is NMI-safe, to defer the call to schedule_work() to a point when it is safe to call. Reported-by: Liuye Closes: https://lore.kernel.org/all/20240228025602.3087748-1-liu.yeC@h3c.com/ Cc: stable@vger.kernel.org Reviewed-by: Douglas Anderson Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240424-kgdboc_fix_schedule_work-v2-1-50f5a490aec5@linaro.org Signed-off-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 7aa37be3216a5..86433e3c3409a 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,25 @@ static struct kgdb_io kgdboc_earlycon_io_ops; static int (*earlycon_orig_exit)(struct console *con); #endif /* IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */ +/* + * When we leave the debug trap handler we need to reset the keyboard status + * (since the original keyboard state gets partially clobbered by kdb use of + * the keyboard). + * + * The path to deliver the reset is somewhat circuitous. + * + * To deliver the reset we register an input handler, reset the keyboard and + * then deregister the input handler. However, to get this done right, we do + * have to carefully manage the calling context because we can only register + * input handlers from task context. + * + * In particular we need to trigger the action from the debug trap handler with + * all its NMI and/or NMI-like oddities. To solve this the kgdboc trap exit code + * (the "post_exception" callback) uses irq_work_queue(), which is NMI-safe, to + * schedule a callback from a hardirq context. From there we have to defer the + * work again, this time using schedule_work(), to get a callback using the + * system workqueue, which runs in task context. + */ #ifdef CONFIG_KDB_KEYBOARD static int kgdboc_reset_connect(struct input_handler *handler, struct input_dev *dev, @@ -99,10 +119,17 @@ static void kgdboc_restore_input_helper(struct work_struct *dummy) static DECLARE_WORK(kgdboc_restore_input_work, kgdboc_restore_input_helper); +static void kgdboc_queue_restore_input_helper(struct irq_work *unused) +{ + schedule_work(&kgdboc_restore_input_work); +} + +static DEFINE_IRQ_WORK(kgdboc_restore_input_irq_work, kgdboc_queue_restore_input_helper); + static void kgdboc_restore_input(void) { if (likely(system_state == SYSTEM_RUNNING)) - schedule_work(&kgdboc_restore_input_work); + irq_work_queue(&kgdboc_restore_input_irq_work); } static int kgdboc_register_kbd(char **cptr) @@ -133,6 +160,7 @@ static void kgdboc_unregister_kbd(void) i--; } } + irq_work_sync(&kgdboc_restore_input_irq_work); flush_work(&kgdboc_restore_input_work); } #else /* ! CONFIG_KDB_KEYBOARD */ -- GitLab From 1d9e2de24533daca36cbf09e8d8596bf72b526b2 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 21 Mar 2024 09:46:13 +0100 Subject: [PATCH 2286/2290] remoteproc: mediatek: Make sure IPI buffer fits in L2TCM commit 331f91d86f71d0bb89a44217cc0b2a22810bbd42 upstream. The IPI buffer location is read from the firmware that we load to the System Companion Processor, and it's not granted that both the SRAM (L2TCM) size that is defined in the devicetree node is large enough for that, and while this is especially true for multi-core SCP, it's still useful to check on single-core variants as well. Failing to perform this check may make this driver perform R/W operations out of the L2TCM boundary, resulting (at best) in a kernel panic. To fix that, check that the IPI buffer fits, otherwise return a failure and refuse to boot the relevant SCP core (or the SCP at all, if this is single core). Fixes: 3efa0ea743b7 ("remoteproc/mediatek: read IPI buffer offset from FW") Signed-off-by: AngeloGioacchino Del Regno Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240321084614.45253-2-angelogioacchino.delregno@collabora.com Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/mtk_scp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c index d421a2ccaa1ea..ffec5299b5c1d 100644 --- a/drivers/remoteproc/mtk_scp.c +++ b/drivers/remoteproc/mtk_scp.c @@ -126,7 +126,7 @@ static int scp_elf_read_ipi_buf_addr(struct mtk_scp *scp, static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw) { int ret; - size_t offset; + size_t buf_sz, offset; /* read the ipi buf addr from FW itself first */ ret = scp_elf_read_ipi_buf_addr(scp, fw, &offset); @@ -138,6 +138,14 @@ static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw) } dev_info(scp->dev, "IPI buf addr %#010zx\n", offset); + /* Make sure IPI buffer fits in the L2TCM range assigned to this core */ + buf_sz = sizeof(*scp->recv_buf) + sizeof(*scp->send_buf); + + if (scp->sram_size < buf_sz + offset) { + dev_err(scp->dev, "IPI buffer does not fit in SRAM.\n"); + return -EOVERFLOW; + } + scp->recv_buf = (struct mtk_share_obj __iomem *) (scp->sram_base + offset); scp->send_buf = (struct mtk_share_obj __iomem *) -- GitLab From 681935009fec3fc22af97ee312d4a24ccf3cf087 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 13 May 2024 21:19:04 +0300 Subject: [PATCH 2287/2290] KEYS: trusted: Do not use WARN when encode fails commit 050bf3c793a07f96bd1e2fd62e1447f731ed733b upstream. When asn1_encode_sequence() fails, WARN is not the correct solution. 1. asn1_encode_sequence() is not an internal function (located in lib/asn1_encode.c). 2. Location is known, which makes the stack trace useless. 3. Results a crash if panic_on_warn is set. It is also noteworthy that the use of WARN is undocumented, and it should be avoided unless there is a carefully considered rationale to use it. Replace WARN with pr_err, and print the return value instead, which is only useful piece of information. Cc: stable@vger.kernel.org # v5.13+ Fixes: f2219745250f ("security: keys: trusted: use ASN.1 TPM2 key format for the blobs") Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- security/keys/trusted-keys/trusted_tpm2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index a5feee8c11af9..ea277c55a38db 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -84,8 +84,9 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, work1 = payload->blob; work1 = asn1_encode_sequence(work1, work1 + sizeof(payload->blob), scratch, work - scratch); - if (WARN(IS_ERR(work1), "BUG: ASN.1 encoder failed")) { + if (IS_ERR(work1)) { ret = PTR_ERR(work1); + pr_err("BUG: ASN.1 encoder failed with %d\n", ret); goto err; } -- GitLab From cd82e9620e23244c40037a724318f75aa9e23aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 23 Apr 2024 12:34:25 +0200 Subject: [PATCH 2288/2290] admin-guide/hw-vuln/core-scheduling: fix return type of PR_SCHED_CORE_GET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8af2d1ab78f2342f8c4c3740ca02d86f0ebfac5a upstream. sched_core_share_pid() copies the cookie to userspace with put_user(id, (u64 __user *)uaddr), expecting 64 bits of space. The "unsigned long" datatype that is documented in core-scheduling.rst however is only 32 bits large on 32 bit architectures. Document "unsigned long long" as the correct data type that is always 64bits large. This matches what the selftest cs_prctl_test.c has been doing all along. Fixes: 0159bb020ca9 ("Documentation: Add usecases, design and interface for core scheduling") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/util-linux/df7a25a0-7923-4f8b-a527-5e6f0064074d@t-8ch.de/ Signed-off-by: Thomas Weißschuh Reviewed-by: Chris Hyser Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20240423-core-scheduling-cookie-v1-1-5753a35f8dfc@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/core-scheduling.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst index cf1eeefdfc32f..a92e10ec402e7 100644 --- a/Documentation/admin-guide/hw-vuln/core-scheduling.rst +++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst @@ -67,8 +67,8 @@ arg4: will be performed for all tasks in the task group of ``pid``. arg5: - userspace pointer to an unsigned long for storing the cookie returned by - ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands. + userspace pointer to an unsigned long long for storing the cookie returned + by ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands. In order for a process to push a cookie to, or pull a cookie from a process, it is required to have the ptrace access mode: `PTRACE_MODE_READ_REALCREDS` to the -- GitLab From b1c74dad43f9a9783dec5f4a07f83e78ca367e60 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 1 May 2024 12:16:11 +0900 Subject: [PATCH 2289/2290] docs: kernel_include.py: Cope with docutils 0.21 commit d43ddd5c91802a46354fa4c4381416ef760676e2 upstream. Running "make htmldocs" on a newly installed Sphinx 7.3.7 ends up in a build error: Sphinx parallel build error: AttributeError: module 'docutils.nodes' has no attribute 'reprunicode' docutils 0.21 has removed nodes.reprunicode, quote from release note [1]: * Removed objects: docutils.nodes.reprunicode, docutils.nodes.ensure_str() Python 2 compatibility hacks Sphinx 7.3.0 supports docutils 0.21 [2]: kernel_include.py, whose origin is misc.py of docutils, uses reprunicode. Upstream docutils removed the offending line from the corresponding file (docutils/docutils/parsers/rst/directives/misc.py) in January 2022. Quoting the changelog [3]: Deprecate `nodes.reprunicode` and `nodes.ensure_str()`. Drop uses of the deprecated constructs (not required with Python 3). Do the same for kernel_include.py. Tested against: - Sphinx 2.4.5 (docutils 0.17.1) - Sphinx 3.4.3 (docutils 0.17.1) - Sphinx 5.3.0 (docutils 0.18.1) - Sphinx 6.2.1 (docutils 0.19) - Sphinx 7.2.6 (docutils 0.20.1) - Sphinx 7.3.7 (docutils 0.21.2) Link: http://www.docutils.org/RELEASE-NOTES.html#release-0-21-2024-04-09 [1] Link: https://www.sphinx-doc.org/en/master/changes.html#release-7-3-0-released-apr-16-2024 [2] Link: https://github.com/docutils/docutils/commit/c8471ce47a24 [3] Signed-off-by: Akira Yokosawa Cc: stable@vger.kernel.org Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/faf5fa45-2a9d-4573-9d2e-3930bdc1ed65@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/sphinx/kernel_include.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index abe7680883771..6387624423363 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -97,7 +97,6 @@ class KernelInclude(Include): # HINT: this is the only line I had to change / commented out: #path = utils.relative_path(None, path) - path = nodes.reprunicode(path) encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) e_handler=self.state.document.settings.input_encoding_error_handler -- GitLab From 88690811da69826fdb59d908a6e5e9d0c63b581a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 25 May 2024 16:21:36 +0200 Subject: [PATCH 2290/2290] Linux 6.1.92 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240523130332.496202557@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Mark Brown Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Jon Hunter Tested-by: Salvatore Bonaccorso Tested-by: Mateusz Jończyk Tested-by: Ron Economos Tested-by: Kelsey Steele Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a7d90996e4125..0be668057cb2a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 91 +SUBLEVEL = 92 EXTRAVERSION = NAME = Curry Ramen -- GitLab